#13987: HTMLParser is now able to handle malformed start tags.

This commit is contained in:
Ezio Melotti 2012-02-15 13:19:10 +02:00
parent d2307cb48a
commit 65d36dab4d
3 changed files with 9 additions and 6 deletions

View file

@ -315,8 +315,8 @@ class HTMLParser(markupbase.ParserBase):
- self.__starttag_text.rfind("\n")
else:
offset = offset + len(self.__starttag_text)
self.error("junk characters in start tag: %r"
% (rawdata[k:endpos][:20],))
self.handle_data(rawdata[i:endpos])
return endpos
if end.endswith('/>'):
# XHTML-style empty tag: <span attr="value" />
self.handle_startendtag(tag, attrs)
@ -353,8 +353,10 @@ class HTMLParser(markupbase.ParserBase):
# end of input in or before attribute value, or we have the
# '/' from a '/>' ending
return -1
self.updatepos(i, j)
self.error("malformed start tag")
if j > i:
return j
else:
return i + 1
raise AssertionError("we should not get here!")
# Internal -- parse endtag, return end or -1 if incomplete