mirror of
https://github.com/python/cpython.git
synced 2025-10-07 15:42:02 +00:00
Fix Issue10759 - html.parser.unescape() fails on HTML entities with incorrect syntax
This commit is contained in:
parent
3b4499c5c7
commit
164540fee1
2 changed files with 15 additions and 7 deletions
|
@ -434,6 +434,7 @@ class HTMLParser(_markupbase.ParserBase):
|
||||||
return s
|
return s
|
||||||
def replaceEntities(s):
|
def replaceEntities(s):
|
||||||
s = s.groups()[0]
|
s = s.groups()[0]
|
||||||
|
try:
|
||||||
if s[0] == "#":
|
if s[0] == "#":
|
||||||
s = s[1:]
|
s = s[1:]
|
||||||
if s[0] in ['x','X']:
|
if s[0] in ['x','X']:
|
||||||
|
@ -441,6 +442,8 @@ class HTMLParser(_markupbase.ParserBase):
|
||||||
else:
|
else:
|
||||||
c = int(s)
|
c = int(s)
|
||||||
return chr(c)
|
return chr(c)
|
||||||
|
except ValueError:
|
||||||
|
return '&#'+ s +';'
|
||||||
else:
|
else:
|
||||||
# Cannot use name2codepoint directly, because HTMLParser
|
# Cannot use name2codepoint directly, because HTMLParser
|
||||||
# supports apos, which is not part of HTML 4
|
# supports apos, which is not part of HTML 4
|
||||||
|
|
|
@ -356,6 +356,11 @@ class HTMLParserTolerantTestCase(TestCaseBase):
|
||||||
[('action', 'bogus|&#()value')])],
|
[('action', 'bogus|&#()value')])],
|
||||||
collector = self.collector)
|
collector = self.collector)
|
||||||
|
|
||||||
|
def test_unescape_function(self):
|
||||||
|
p = html.parser.HTMLParser()
|
||||||
|
self.assertEqual(p.unescape('&#bad;'),'&#bad;')
|
||||||
|
self.assertEqual(p.unescape('&'),'&')
|
||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
support.run_unittest(HTMLParserTestCase, HTMLParserTolerantTestCase)
|
support.run_unittest(HTMLParserTestCase, HTMLParserTolerantTestCase)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue