mirror of
https://github.com/python/cpython.git
synced 2025-07-09 20:35:26 +00:00
#13358: HTMLParser now calls handle_data only once for each CDATA.
This commit is contained in:
parent
8008f2aba0
commit
15cb489234
3 changed files with 26 additions and 3 deletions
|
@ -14,7 +14,6 @@ import re
|
|||
# Regular expressions used for parsing
|
||||
|
||||
interesting_normal = re.compile('[&<]')
|
||||
interesting_cdata = re.compile(r'<(/|\Z)')
|
||||
incomplete = re.compile('&[a-zA-Z#]')
|
||||
|
||||
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
|
||||
|
@ -149,8 +148,8 @@ class HTMLParser(_markupbase.ParserBase):
|
|||
return self.__starttag_text
|
||||
|
||||
def set_cdata_mode(self, elem):
|
||||
self.interesting = interesting_cdata
|
||||
self.cdata_elem = elem.lower()
|
||||
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
|
||||
|
||||
def clear_cdata_mode(self):
|
||||
self.interesting = interesting_normal
|
||||
|
@ -168,6 +167,8 @@ class HTMLParser(_markupbase.ParserBase):
|
|||
if match:
|
||||
j = match.start()
|
||||
else:
|
||||
if self.cdata_elem:
|
||||
break
|
||||
j = n
|
||||
if i < j: self.handle_data(rawdata[i:j])
|
||||
i = self.updatepos(i, j)
|
||||
|
@ -250,7 +251,7 @@ class HTMLParser(_markupbase.ParserBase):
|
|||
else:
|
||||
assert 0, "interesting.search() lied"
|
||||
# end while
|
||||
if end and i < n:
|
||||
if end and i < n and not self.cdata_elem:
|
||||
self.handle_data(rawdata[i:n])
|
||||
i = self.updatepos(i, n)
|
||||
self.rawdata = rawdata[i:]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue