#13987: HTMLParser is now able to handle malformed start tags.

2025-11-25 04:34:37 +00:00 · 2012-02-15 13:19:10 +02:00 · 2012-02-15 13:19:10 +02:00 · 65d36dab4d
commit 65d36dab4d
parent d2307cb48a
3 changed files with 9 additions and 6 deletions
--- a/Lib/HTMLParser.py
+++ b/Lib/HTMLParser.py
@ -315,8 +315,8 @@ class HTMLParser(markupbase.ParserBase):
                         - self.__starttag_text.rfind("\n")
            else:
                offset = offset + len(self.__starttag_text)
-            self.error("junk characters in start tag: %r"
-                       % (rawdata[k:endpos][:20],))
+            self.handle_data(rawdata[i:endpos])
+            return endpos
        if end.endswith('/>'):
            # XHTML-style empty tag: <span attr="value" />
            self.handle_startendtag(tag, attrs)
@ -353,8 +353,10 @@ class HTMLParser(markupbase.ParserBase):
                # end of input in or before attribute value, or we have the
                # '/' from a '/>' ending
                return -1
-            self.updatepos(i, j)
-            self.error("malformed start tag")
+            if j > i:
+                return j
+            else:
+                return i + 1
        raise AssertionError("we should not get here!")

    # Internal -- parse endtag, return end or -1 if incomplete