mirror of
https://github.com/python/cpython.git
synced 2025-09-11 19:27:07 +00:00
[3.13] gh-135661: Fix parsing attributes with whitespaces around the "=" separator in HTMLParser (GH-136908) (GH-136918)
Some checks are pending
Tests / (push) Blocked by required conditions
Tests / Windows MSI (push) Blocked by required conditions
Tests / Change detection (push) Waiting to run
Tests / Docs (push) Blocked by required conditions
Tests / Check if the ABI has changed (push) Blocked by required conditions
Tests / Check if Autoconf files are up to date (push) Blocked by required conditions
Tests / Check if generated files are up to date (push) Blocked by required conditions
Tests / Ubuntu SSL tests with OpenSSL (push) Blocked by required conditions
Tests / WASI (push) Blocked by required conditions
Tests / Hypothesis tests on Ubuntu (push) Blocked by required conditions
Tests / Address sanitizer (push) Blocked by required conditions
Tests / CIFuzz (push) Blocked by required conditions
Tests / All required checks pass (push) Blocked by required conditions
Lint / lint (push) Waiting to run
Some checks are pending
Tests / (push) Blocked by required conditions
Tests / Windows MSI (push) Blocked by required conditions
Tests / Change detection (push) Waiting to run
Tests / Docs (push) Blocked by required conditions
Tests / Check if the ABI has changed (push) Blocked by required conditions
Tests / Check if Autoconf files are up to date (push) Blocked by required conditions
Tests / Check if generated files are up to date (push) Blocked by required conditions
Tests / Ubuntu SSL tests with OpenSSL (push) Blocked by required conditions
Tests / WASI (push) Blocked by required conditions
Tests / Hypothesis tests on Ubuntu (push) Blocked by required conditions
Tests / Address sanitizer (push) Blocked by required conditions
Tests / CIFuzz (push) Blocked by required conditions
Tests / All required checks pass (push) Blocked by required conditions
Lint / lint (push) Waiting to run
This fixes a regression introduced in GH-135930.
(cherry picked from commit dee6501894
)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
82397064d2
commit
853b5c43d0
3 changed files with 18 additions and 19 deletions
|
@ -45,7 +45,7 @@ attrfind_tolerant = re.compile(r"""
|
||||||
(
|
(
|
||||||
(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name
|
(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name
|
||||||
)
|
)
|
||||||
(= # value indicator
|
([\t\n\r\f ]*=[\t\n\r\f ]* # value indicator
|
||||||
('[^']*' # LITA-enclosed value
|
('[^']*' # LITA-enclosed value
|
||||||
|"[^"]*" # LIT-enclosed value
|
|"[^"]*" # LIT-enclosed value
|
||||||
|(?!['"])[^>\t\n\r\f ]* # bare value
|
|(?!['"])[^>\t\n\r\f ]* # bare value
|
||||||
|
@ -57,7 +57,7 @@ locatetagend = re.compile(r"""
|
||||||
[a-zA-Z][^\t\n\r\f />]* # tag name
|
[a-zA-Z][^\t\n\r\f />]* # tag name
|
||||||
[\t\n\r\f /]* # optional whitespace before attribute name
|
[\t\n\r\f /]* # optional whitespace before attribute name
|
||||||
(?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name
|
(?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name
|
||||||
(?:= # value indicator
|
(?:[\t\n\r\f ]*=[\t\n\r\f ]* # value indicator
|
||||||
(?:'[^']*' # LITA-enclosed value
|
(?:'[^']*' # LITA-enclosed value
|
||||||
|"[^"]*" # LIT-enclosed value
|
|"[^"]*" # LIT-enclosed value
|
||||||
|(?!['"])[^>\t\n\r\f ]* # bare value
|
|(?!['"])[^>\t\n\r\f ]* # bare value
|
||||||
|
|
|
@ -623,7 +623,7 @@ text
|
||||||
|
|
||||||
html = '<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>'
|
html = '<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>'
|
||||||
expected = [
|
expected = [
|
||||||
('starttag', 'div', [('style', ''), (',', None), ('foo', None), ('=', None), ('"bar"', None)]),
|
('starttag', 'div', [('style', ''), (',', None), ('foo', 'bar')]),
|
||||||
('starttag', 'b', []),
|
('starttag', 'b', []),
|
||||||
('data', 'The '),
|
('data', 'The '),
|
||||||
('starttag', 'a', [('href', 'some_url')]),
|
('starttag', 'a', [('href', 'some_url')]),
|
||||||
|
@ -813,12 +813,12 @@ class AttributesTestCase(TestCaseBase):
|
||||||
]
|
]
|
||||||
self._run_check("""<a b='v' c="v" d=v e>""", output)
|
self._run_check("""<a b='v' c="v" d=v e>""", output)
|
||||||
self._run_check("<a foo==bar>", [('starttag', 'a', [('foo', '=bar')])])
|
self._run_check("<a foo==bar>", [('starttag', 'a', [('foo', '=bar')])])
|
||||||
self._run_check("<a foo =bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])])
|
self._run_check("<a foo =bar>", [('starttag', 'a', [('foo', 'bar')])])
|
||||||
self._run_check("<a foo\t=bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])])
|
self._run_check("<a foo\t=bar>", [('starttag', 'a', [('foo', 'bar')])])
|
||||||
self._run_check("<a foo\v=bar>", [('starttag', 'a', [('foo\v', 'bar')])])
|
self._run_check("<a foo\v=bar>", [('starttag', 'a', [('foo\v', 'bar')])])
|
||||||
self._run_check("<a foo\xa0=bar>", [('starttag', 'a', [('foo\xa0', 'bar')])])
|
self._run_check("<a foo\xa0=bar>", [('starttag', 'a', [('foo\xa0', 'bar')])])
|
||||||
self._run_check("<a foo= bar>", [('starttag', 'a', [('foo', ''), ('bar', None)])])
|
self._run_check("<a foo= bar>", [('starttag', 'a', [('foo', 'bar')])])
|
||||||
self._run_check("<a foo=\tbar>", [('starttag', 'a', [('foo', ''), ('bar', None)])])
|
self._run_check("<a foo=\tbar>", [('starttag', 'a', [('foo', 'bar')])])
|
||||||
self._run_check("<a foo=\vbar>", [('starttag', 'a', [('foo', '\vbar')])])
|
self._run_check("<a foo=\vbar>", [('starttag', 'a', [('foo', '\vbar')])])
|
||||||
self._run_check("<a foo=\xa0bar>", [('starttag', 'a', [('foo', '\xa0bar')])])
|
self._run_check("<a foo=\xa0bar>", [('starttag', 'a', [('foo', '\xa0bar')])])
|
||||||
|
|
||||||
|
@ -829,8 +829,8 @@ class AttributesTestCase(TestCaseBase):
|
||||||
("d", "\txyz\n")])])
|
("d", "\txyz\n")])])
|
||||||
self._run_check("""<a b='' c="">""",
|
self._run_check("""<a b='' c="">""",
|
||||||
[("starttag", "a", [("b", ""), ("c", "")])])
|
[("starttag", "a", [("b", ""), ("c", "")])])
|
||||||
self._run_check("<a b=\t c=\n>",
|
self._run_check("<a b=\tx c=\ny>",
|
||||||
[("starttag", "a", [("b", ""), ("c", "")])])
|
[('starttag', 'a', [('b', 'x'), ('c', 'y')])])
|
||||||
self._run_check("<a b=\v c=\xa0>",
|
self._run_check("<a b=\v c=\xa0>",
|
||||||
[("starttag", "a", [("b", "\v"), ("c", "\xa0")])])
|
[("starttag", "a", [("b", "\v"), ("c", "\xa0")])])
|
||||||
# Regression test for SF patch #669683.
|
# Regression test for SF patch #669683.
|
||||||
|
@ -899,13 +899,17 @@ class AttributesTestCase(TestCaseBase):
|
||||||
)
|
)
|
||||||
expected = [
|
expected = [
|
||||||
('starttag', 'a', [('href', "test'style='color:red;bad1'")]),
|
('starttag', 'a', [('href', "test'style='color:red;bad1'")]),
|
||||||
('data', 'test - bad1'), ('endtag', 'a'),
|
('data', 'test - bad1'),
|
||||||
|
('endtag', 'a'),
|
||||||
('starttag', 'a', [('href', "test'+style='color:red;ba2'")]),
|
('starttag', 'a', [('href', "test'+style='color:red;ba2'")]),
|
||||||
('data', 'test - bad2'), ('endtag', 'a'),
|
('data', 'test - bad2'),
|
||||||
|
('endtag', 'a'),
|
||||||
('starttag', 'a', [('href', "test'\xa0style='color:red;bad3'")]),
|
('starttag', 'a', [('href', "test'\xa0style='color:red;bad3'")]),
|
||||||
('data', 'test - bad3'), ('endtag', 'a'),
|
('data', 'test - bad3'),
|
||||||
('starttag', 'a', [('href', None), ('=', None), ("test' style", 'color:red;bad4')]),
|
('endtag', 'a'),
|
||||||
('data', 'test - bad4'), ('endtag', 'a')
|
('starttag', 'a', [('href', "test'\xa0style='color:red;bad4'")]),
|
||||||
|
('data', 'test - bad4'),
|
||||||
|
('endtag', 'a'),
|
||||||
]
|
]
|
||||||
self._run_check(html, expected)
|
self._run_check(html, expected)
|
||||||
|
|
||||||
|
|
|
@ -18,8 +18,3 @@ according to the HTML5 standard.
|
||||||
|
|
||||||
* Multiple ``=`` between attribute name and value are no longer collapsed.
|
* Multiple ``=`` between attribute name and value are no longer collapsed.
|
||||||
E.g. ``<a foo==bar>`` produces attribute "foo" with value "=bar".
|
E.g. ``<a foo==bar>`` produces attribute "foo" with value "=bar".
|
||||||
|
|
||||||
* Whitespaces between the ``=`` separator and attribute name or value are no
|
|
||||||
longer ignored. E.g. ``<a foo =bar>`` produces two attributes "foo" and
|
|
||||||
"=bar", both with value None; ``<a foo= bar>`` produces two attributes:
|
|
||||||
"foo" with value "" and "bar" with value None.
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue