[3.9] gh-135661: Fix parsing attributes with whitespaces around the "=" separator in HTMLParser (GH-136908) (GH-136922)
Some checks failed
Tests / Check for source changes (push) Has been cancelled
Tests / Check if the ABI has changed (push) Has been cancelled
Tests / Check if generated files are up to date (push) Has been cancelled
Tests / Windows (x86) (push) Has been cancelled
Tests / Windows (x64) (push) Has been cancelled
Tests / macOS (push) Has been cancelled
Tests / Ubuntu (push) Has been cancelled
Tests / Ubuntu SSL tests with OpenSSL (push) Has been cancelled

This fixes a regression introduced in GH-135930.
(cherry picked from commit dee6501894)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Miss Islington (bot) 2025-07-22 11:58:28 +02:00 committed by GitHub
parent 6c97200861
commit 06fc882eac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 18 additions and 19 deletions

View file

@ -595,7 +595,7 @@ text
html = '<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>'
expected = [
('starttag', 'div', [('style', ''), (',', None), ('foo', None), ('=', None), ('"bar"', None)]),
('starttag', 'div', [('style', ''), (',', None), ('foo', 'bar')]),
('starttag', 'b', []),
('data', 'The '),
('starttag', 'a', [('href', 'some_url')]),
@ -751,12 +751,12 @@ class AttributesTestCase(TestCaseBase):
]
self._run_check("""<a b='v' c="v" d=v e>""", output)
self._run_check("<a foo==bar>", [('starttag', 'a', [('foo', '=bar')])])
self._run_check("<a foo =bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])])
self._run_check("<a foo\t=bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])])
self._run_check("<a foo =bar>", [('starttag', 'a', [('foo', 'bar')])])
self._run_check("<a foo\t=bar>", [('starttag', 'a', [('foo', 'bar')])])
self._run_check("<a foo\v=bar>", [('starttag', 'a', [('foo\v', 'bar')])])
self._run_check("<a foo\xa0=bar>", [('starttag', 'a', [('foo\xa0', 'bar')])])
self._run_check("<a foo= bar>", [('starttag', 'a', [('foo', ''), ('bar', None)])])
self._run_check("<a foo=\tbar>", [('starttag', 'a', [('foo', ''), ('bar', None)])])
self._run_check("<a foo= bar>", [('starttag', 'a', [('foo', 'bar')])])
self._run_check("<a foo=\tbar>", [('starttag', 'a', [('foo', 'bar')])])
self._run_check("<a foo=\vbar>", [('starttag', 'a', [('foo', '\vbar')])])
self._run_check("<a foo=\xa0bar>", [('starttag', 'a', [('foo', '\xa0bar')])])
@ -767,8 +767,8 @@ class AttributesTestCase(TestCaseBase):
("d", "\txyz\n")])])
self._run_check("""<a b='' c="">""",
[("starttag", "a", [("b", ""), ("c", "")])])
self._run_check("<a b=\t c=\n>",
[("starttag", "a", [("b", ""), ("c", "")])])
self._run_check("<a b=\tx c=\ny>",
[('starttag', 'a', [('b', 'x'), ('c', 'y')])])
self._run_check("<a b=\v c=\xa0>",
[("starttag", "a", [("b", "\v"), ("c", "\xa0")])])
# Regression test for SF patch #669683.
@ -837,13 +837,17 @@ class AttributesTestCase(TestCaseBase):
)
expected = [
('starttag', 'a', [('href', "test'style='color:red;bad1'")]),
('data', 'test - bad1'), ('endtag', 'a'),
('data', 'test - bad1'),
('endtag', 'a'),
('starttag', 'a', [('href', "test'+style='color:red;ba2'")]),
('data', 'test - bad2'), ('endtag', 'a'),
('data', 'test - bad2'),
('endtag', 'a'),
('starttag', 'a', [('href', "test'\xa0style='color:red;bad3'")]),
('data', 'test - bad3'), ('endtag', 'a'),
('starttag', 'a', [('href', None), ('=', None), ("test'&nbsp;style", 'color:red;bad4')]),
('data', 'test - bad4'), ('endtag', 'a')
('data', 'test - bad3'),
('endtag', 'a'),
('starttag', 'a', [('href', "test'\xa0style='color:red;bad4'")]),
('data', 'test - bad4'),
('endtag', 'a'),
]
self._run_check(html, expected)