mirror of
https://github.com/python/cpython.git
synced 2025-09-17 22:20:23 +00:00
Fix Issue10759 - HTMLParser.unescape() to handle malform charrefs.
This commit is contained in:
parent
06fdbedf81
commit
3f60f09eb2
2 changed files with 15 additions and 7 deletions
|
@ -367,13 +367,16 @@ class HTMLParser(markupbase.ParserBase):
|
|||
return s
|
||||
def replaceEntities(s):
|
||||
s = s.groups()[0]
|
||||
if s[0] == "#":
|
||||
s = s[1:]
|
||||
if s[0] in ['x','X']:
|
||||
c = int(s[1:], 16)
|
||||
else:
|
||||
c = int(s)
|
||||
return unichr(c)
|
||||
try:
|
||||
if s[0] == "#":
|
||||
s = s[1:]
|
||||
if s[0] in ['x','X']:
|
||||
c = int(s[1:], 16)
|
||||
else:
|
||||
c = int(s)
|
||||
return unichr(c)
|
||||
except ValueError:
|
||||
return '&#'+s+';'
|
||||
else:
|
||||
# Cannot use name2codepoint directly, because HTMLParser supports apos,
|
||||
# which is not part of HTML 4
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue