mirror of
https://github.com/python/cpython.git
synced 2025-08-22 09:45:06 +00:00
#13987: HTMLParser is now able to handle EOFs in the middle of a construct.
This commit is contained in:
parent
fd7e4964bb
commit
d2307cb48a
3 changed files with 21 additions and 11 deletions
|
@ -170,9 +170,16 @@ class HTMLParser(markupbase.ParserBase):
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
if k < 0:
|
if k < 0:
|
||||||
if end:
|
if not end:
|
||||||
self.error("EOF in middle of construct")
|
break
|
||||||
break
|
k = rawdata.find('>', i + 1)
|
||||||
|
if k < 0:
|
||||||
|
k = rawdata.find('<', i + 1)
|
||||||
|
if k < 0:
|
||||||
|
k = i + 1
|
||||||
|
else:
|
||||||
|
k += 1
|
||||||
|
self.handle_data(rawdata[i:k])
|
||||||
i = self.updatepos(i, k)
|
i = self.updatepos(i, k)
|
||||||
elif startswith("&#", i):
|
elif startswith("&#", i):
|
||||||
match = charref.match(rawdata, i)
|
match = charref.match(rawdata, i)
|
||||||
|
|
|
@ -204,16 +204,16 @@ text
|
||||||
def test_starttag_junk_chars(self):
|
def test_starttag_junk_chars(self):
|
||||||
self._run_check("</>", [])
|
self._run_check("</>", [])
|
||||||
self._run_check("</$>", [('comment', '$')])
|
self._run_check("</$>", [('comment', '$')])
|
||||||
self._parse_error("</")
|
self._run_check("</", [('data', '</')])
|
||||||
self._parse_error("</a")
|
self._run_check("</a", [('data', '</a')])
|
||||||
self._parse_error("<a<a>")
|
self._parse_error("<a<a>")
|
||||||
self._run_check("</a<a>", [('endtag', 'a<a')])
|
self._run_check("</a<a>", [('endtag', 'a<a')])
|
||||||
self._parse_error("<!")
|
self._run_check("<!", [('data', '<!')])
|
||||||
self._parse_error("<a")
|
self._run_check("<a", [('data', '<a')])
|
||||||
self._parse_error("<a foo='bar'")
|
self._run_check("<a foo='bar'", [('data', "<a foo='bar'")])
|
||||||
self._parse_error("<a foo='bar")
|
self._run_check("<a foo='bar", [('data', "<a foo='bar")])
|
||||||
self._parse_error("<a foo='>'")
|
self._run_check("<a foo='>'", [('data', "<a foo='>'")])
|
||||||
self._parse_error("<a foo='>")
|
self._run_check("<a foo='>", [('data', "<a foo='>")])
|
||||||
|
|
||||||
def test_valid_doctypes(self):
|
def test_valid_doctypes(self):
|
||||||
# from http://www.w3.org/QA/2002/04/valid-dtd-list.html
|
# from http://www.w3.org/QA/2002/04/valid-dtd-list.html
|
||||||
|
|
|
@ -93,6 +93,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #13987: HTMLParser is now able to handle EOFs in the middle of a
|
||||||
|
construct.
|
||||||
|
|
||||||
- Issue #13015: Fix a possible reference leak in defaultdict.__repr__.
|
- Issue #13015: Fix a possible reference leak in defaultdict.__repr__.
|
||||||
Patch by Suman Saha.
|
Patch by Suman Saha.
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue