mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
#23144: Make sure that HTMLParser.feed() returns all the data, even when convert_charrefs is True.
This commit is contained in:
parent
527ef0792f
commit
6f2bb98966
3 changed files with 25 additions and 5 deletions
|
@ -198,7 +198,15 @@ class HTMLParser(_markupbase.ParserBase):
|
||||||
if self.convert_charrefs and not self.cdata_elem:
|
if self.convert_charrefs and not self.cdata_elem:
|
||||||
j = rawdata.find('<', i)
|
j = rawdata.find('<', i)
|
||||||
if j < 0:
|
if j < 0:
|
||||||
if not end:
|
# if we can't find the next <, either we are at the end
|
||||||
|
# or there's more text incoming. If the latter is True,
|
||||||
|
# we can't pass the text to handle_data in case we have
|
||||||
|
# a charref cut in half at end. Try to determine if
|
||||||
|
# this is the case before proceding by looking for an
|
||||||
|
# & near the end and see if it's followed by a space or ;.
|
||||||
|
amppos = rawdata.rfind('&', max(i, n-34))
|
||||||
|
if (amppos >= 0 and
|
||||||
|
not re.compile(r'[\s;]').search(rawdata, amppos)):
|
||||||
break # wait till we get all the text
|
break # wait till we get all the text
|
||||||
j = n
|
j = n
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -72,9 +72,6 @@ class EventCollectorExtra(EventCollector):
|
||||||
|
|
||||||
class EventCollectorCharrefs(EventCollector):
|
class EventCollectorCharrefs(EventCollector):
|
||||||
|
|
||||||
def get_events(self):
|
|
||||||
return self.events
|
|
||||||
|
|
||||||
def handle_charref(self, data):
|
def handle_charref(self, data):
|
||||||
self.fail('This should never be called with convert_charrefs=True')
|
self.fail('This should never be called with convert_charrefs=True')
|
||||||
|
|
||||||
|
@ -685,6 +682,18 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
|
||||||
]
|
]
|
||||||
self._run_check(html, expected)
|
self._run_check(html, expected)
|
||||||
|
|
||||||
|
def test_convert_charrefs_dropped_text(self):
|
||||||
|
# #23144: make sure that all the events are triggered when
|
||||||
|
# convert_charrefs is True, even if we don't call .close()
|
||||||
|
parser = EventCollector(convert_charrefs=True)
|
||||||
|
# before the fix, bar & baz was missing
|
||||||
|
parser.feed("foo <a>link</a> bar & baz")
|
||||||
|
self.assertEqual(
|
||||||
|
parser.get_events(),
|
||||||
|
[('data', 'foo '), ('starttag', 'a', []), ('data', 'link'),
|
||||||
|
('endtag', 'a'), ('data', ' bar & baz')]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AttributesStrictTestCase(TestCaseBase):
|
class AttributesStrictTestCase(TestCaseBase):
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
+++++++++++
|
+++++++++++
|
||||||
Python News
|
Python News
|
||||||
+++++++++++
|
+++++++++++
|
||||||
|
|
||||||
|
@ -81,6 +81,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #23144: Make sure that HTMLParser.feed() returns all the data, even
|
||||||
|
when convert_charrefs is True.
|
||||||
|
|
||||||
- Issue #16180: Exit pdb if file has syntax error, instead of trapping user
|
- Issue #16180: Exit pdb if file has syntax error, instead of trapping user
|
||||||
in an infinite loop. Patch by Xavier de Gaye.
|
in an infinite loop. Patch by Xavier de Gaye.
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue