#13273: merge with 3.2.

2025-08-22 01:35:16 +00:00 · 2011-10-28 13:23:57 +03:00 · 2011-10-28 13:23:57 +03:00 · 91ec2e8a31
commit 91ec2e8a31
parent 455036fd1f f50ffa94ab
3 changed files with 38 additions and 3 deletions
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@ -30,7 +30,7 @@ attrfind = re.compile(
    r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
    r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
 attrfind_tolerant = re.compile(
-    r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
+    r',?\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
    r'(\'[^\']*\'|"[^"]*"|[^>\s]*))?')
 locatestarttagend = re.compile(r"""
  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
@ -277,12 +277,11 @@ class HTMLParser(_markupbase.ParserBase):
        assert match, 'unexpected call to parse_starttag()'
        k = match.end()
        self.lasttag = tag = rawdata[i+1:k].lower()
        while k < endpos:
            if self.strict:
                m = attrfind.match(rawdata, k)
            else:
-                m = attrfind_tolerant.search(rawdata, k)
+                m = attrfind_tolerant.match(rawdata, k)
            if not m:
                break
            attrname, rest, attrvalue = m.group(1, 2, 3)
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@ -373,6 +373,39 @@ class HTMLParserTolerantTestCase(TestCaseBase):
                                [('action', 'bogus|&#()value')])],
                        collector = self.collector)
    def test_issue13273(self):
        html = ('<div style=""    ><b>The <a href="some_url">rain</a> '
                '<br /> in <span>Spain</span></b></div>')
        expected = [
            ('starttag', 'div', [('style', '')]),
            ('starttag', 'b', []),
            ('data', 'The '),
            ('starttag', 'a', [('href', 'some_url')]),
            ('data', 'rain'),
            ('endtag', 'a'),
            ('data', ' '),
            ('startendtag', 'br', []),
            ('data', ' in '),
            ('starttag', 'span', []),
            ('data', 'Spain'),
            ('endtag', 'span'),
            ('endtag', 'b'),
            ('endtag', 'div')
        ]
        self._run_check(html, expected, collector=self.collector)
    def test_issue13273_2(self):
        html = '<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>'
        expected = [
            ('starttag', 'div', [('style', ''), ('foo', 'bar')]),
            ('starttag', 'b', []),
            ('data', 'The '),
            ('starttag', 'a', [('href', 'some_url')]),
            ('data', 'rain'),
            ('endtag', 'a'),
        ]
        self._run_check(html, expected, collector=self.collector)
    def test_unescape_function(self):
        p = html.parser.HTMLParser()
        self.assertEqual(p.unescape('&#bad;'),'&#bad;')
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -341,6 +341,9 @@ Core and Builtins
 Library
 -------
 - Issue #13273: fix a bug that prevented HTMLParser to properly detect some
  tags when strict=False.
 - Issue #11183: Add finer-grained exceptions to the ssl module, so that
  you don't have to inspect the exception's attributes in the common case.