mirror of
https://github.com/python/cpython.git
synced 2025-07-24 19:54:21 +00:00
#7311: fix HTMLParser to accept non-ASCII attribute values.
This commit is contained in:
parent
104c3f1020
commit
9f1ffb2ae9
3 changed files with 20 additions and 1 deletions
|
@ -26,7 +26,7 @@ commentclose = re.compile(r'--\s*>')
|
|||
tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
|
||||
attrfind = re.compile(
|
||||
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
|
||||
r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
|
||||
r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
|
||||
|
||||
locatestarttagend = re.compile(r"""
|
||||
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
|
||||
|
|
|
@ -208,6 +208,23 @@ DOCTYPE html [
|
|||
("starttag", "a", [("href", "mailto:xyz@example.com")]),
|
||||
])
|
||||
|
||||
def test_attr_nonascii(self):
|
||||
# see issue 7311
|
||||
self._run_check(u"<img src=/foo/bar.png alt=\u4e2d\u6587>", [
|
||||
("starttag", "img", [("src", "/foo/bar.png"),
|
||||
("alt", u"\u4e2d\u6587")]),
|
||||
])
|
||||
self._run_check(u"<a title='\u30c6\u30b9\u30c8' "
|
||||
u"href='\u30c6\u30b9\u30c8.html'>", [
|
||||
("starttag", "a", [("title", u"\u30c6\u30b9\u30c8"),
|
||||
("href", u"\u30c6\u30b9\u30c8.html")]),
|
||||
])
|
||||
self._run_check(u'<a title="\u30c6\u30b9\u30c8" '
|
||||
u'href="\u30c6\u30b9\u30c8.html">', [
|
||||
("starttag", "a", [("title", u"\u30c6\u30b9\u30c8"),
|
||||
("href", u"\u30c6\u30b9\u30c8.html")]),
|
||||
])
|
||||
|
||||
def test_attr_entity_replacement(self):
|
||||
self._run_check("""<a b='&><"''>""", [
|
||||
("starttag", "a", [("b", "&><\"'")]),
|
||||
|
|
|
@ -47,6 +47,8 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #7311: fix HTMLParser to accept non-ASCII attribute values.
|
||||
|
||||
- Issue #10963: Ensure that subprocess.communicate() never raises EPIPE.
|
||||
|
||||
- Issue #11662: Make urllib and urllib2 ignore redirections if the
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue