#13987: HTMLParser is now able to handle EOFs in the middle of a construct.

This commit is contained in:
Ezio Melotti 2012-02-15 12:44:23 +02:00
parent fd7e4964bb
commit d2307cb48a
3 changed files with 21 additions and 11 deletions

View file

@ -170,9 +170,16 @@ class HTMLParser(markupbase.ParserBase):
else: else:
break break
if k < 0: if k < 0:
if end: if not end:
self.error("EOF in middle of construct") break
break k = rawdata.find('>', i + 1)
if k < 0:
k = rawdata.find('<', i + 1)
if k < 0:
k = i + 1
else:
k += 1
self.handle_data(rawdata[i:k])
i = self.updatepos(i, k) i = self.updatepos(i, k)
elif startswith("&#", i): elif startswith("&#", i):
match = charref.match(rawdata, i) match = charref.match(rawdata, i)

View file

@ -204,16 +204,16 @@ text
def test_starttag_junk_chars(self): def test_starttag_junk_chars(self):
self._run_check("</>", []) self._run_check("</>", [])
self._run_check("</$>", [('comment', '$')]) self._run_check("</$>", [('comment', '$')])
self._parse_error("</") self._run_check("</", [('data', '</')])
self._parse_error("</a") self._run_check("</a", [('data', '</a')])
self._parse_error("<a<a>") self._parse_error("<a<a>")
self._run_check("</a<a>", [('endtag', 'a<a')]) self._run_check("</a<a>", [('endtag', 'a<a')])
self._parse_error("<!") self._run_check("<!", [('data', '<!')])
self._parse_error("<a") self._run_check("<a", [('data', '<a')])
self._parse_error("<a foo='bar'") self._run_check("<a foo='bar'", [('data', "<a foo='bar'")])
self._parse_error("<a foo='bar") self._run_check("<a foo='bar", [('data', "<a foo='bar")])
self._parse_error("<a foo='>'") self._run_check("<a foo='>'", [('data', "<a foo='>'")])
self._parse_error("<a foo='>") self._run_check("<a foo='>", [('data', "<a foo='>")])
def test_valid_doctypes(self): def test_valid_doctypes(self):
# from http://www.w3.org/QA/2002/04/valid-dtd-list.html # from http://www.w3.org/QA/2002/04/valid-dtd-list.html

View file

@ -93,6 +93,9 @@ Core and Builtins
Library Library
------- -------
- Issue #13987: HTMLParser is now able to handle EOFs in the middle of a
construct.
- Issue #13015: Fix a possible reference leak in defaultdict.__repr__. - Issue #13015: Fix a possible reference leak in defaultdict.__repr__.
Patch by Suman Saha. Patch by Suman Saha.