#7311: fix HTMLParser to accept non-ASCII attribute values.

This commit is contained in:
Ezio Melotti 2011-04-05 20:40:52 +03:00
parent 104c3f1020
commit 9f1ffb2ae9
3 changed files with 20 additions and 1 deletions

View file

@ -26,7 +26,7 @@ commentclose = re.compile(r'--\s*>')
tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
attrfind = re.compile(
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name