mirror of
https://github.com/python/cpython.git
synced 2025-11-24 12:20:42 +00:00
[3.14] gh-137836: Support more RAWTEXT and PLAINTEXT elements in HTMLParser (GH-137837) (GH-140841)
* the "plaintext" element
* the RAWTEXT elements "xmp", "iframe", "noembed" and "noframes"
* optionally RAWTEXT (if scripting=True) element "noscript"
(cherry picked from commit a17c57eee5)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
d0c78a458b
commit
89818a5939
4 changed files with 163 additions and 114 deletions
|
|
@ -15,14 +15,18 @@
|
||||||
This module defines a class :class:`HTMLParser` which serves as the basis for
|
This module defines a class :class:`HTMLParser` which serves as the basis for
|
||||||
parsing text files formatted in HTML (HyperText Mark-up Language) and XHTML.
|
parsing text files formatted in HTML (HyperText Mark-up Language) and XHTML.
|
||||||
|
|
||||||
.. class:: HTMLParser(*, convert_charrefs=True)
|
.. class:: HTMLParser(*, convert_charrefs=True, scripting=False)
|
||||||
|
|
||||||
Create a parser instance able to parse invalid markup.
|
Create a parser instance able to parse invalid markup.
|
||||||
|
|
||||||
If *convert_charrefs* is ``True`` (the default), all character
|
If *convert_charrefs* is true (the default), all character
|
||||||
references (except the ones in ``script``/``style`` elements) are
|
references (except the ones in elements like ``script`` and ``style``) are
|
||||||
automatically converted to the corresponding Unicode characters.
|
automatically converted to the corresponding Unicode characters.
|
||||||
|
|
||||||
|
If *scripting* is false (the default), the content of the ``noscript``
|
||||||
|
element is parsed normally; if it's true, it's returned as is without
|
||||||
|
being parsed.
|
||||||
|
|
||||||
An :class:`.HTMLParser` instance is fed HTML data and calls handler methods
|
An :class:`.HTMLParser` instance is fed HTML data and calls handler methods
|
||||||
when start tags, end tags, text, comments, and other markup elements are
|
when start tags, end tags, text, comments, and other markup elements are
|
||||||
encountered. The user should subclass :class:`.HTMLParser` and override its
|
encountered. The user should subclass :class:`.HTMLParser` and override its
|
||||||
|
|
@ -37,6 +41,9 @@ parsing text files formatted in HTML (HyperText Mark-up Language) and XHTML.
|
||||||
.. versionchanged:: 3.5
|
.. versionchanged:: 3.5
|
||||||
The default value for argument *convert_charrefs* is now ``True``.
|
The default value for argument *convert_charrefs* is now ``True``.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.14.1
|
||||||
|
Added the *scripting* parameter.
|
||||||
|
|
||||||
|
|
||||||
Example HTML Parser Application
|
Example HTML Parser Application
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
@ -161,15 +168,15 @@ implementations do nothing (except for :meth:`~HTMLParser.handle_startendtag`):
|
||||||
.. method:: HTMLParser.handle_data(data)
|
.. method:: HTMLParser.handle_data(data)
|
||||||
|
|
||||||
This method is called to process arbitrary data (e.g. text nodes and the
|
This method is called to process arbitrary data (e.g. text nodes and the
|
||||||
content of ``<script>...</script>`` and ``<style>...</style>``).
|
content of elements like ``script`` and ``style``).
|
||||||
|
|
||||||
|
|
||||||
.. method:: HTMLParser.handle_entityref(name)
|
.. method:: HTMLParser.handle_entityref(name)
|
||||||
|
|
||||||
This method is called to process a named character reference of the form
|
This method is called to process a named character reference of the form
|
||||||
``&name;`` (e.g. ``>``), where *name* is a general entity reference
|
``&name;`` (e.g. ``>``), where *name* is a general entity reference
|
||||||
(e.g. ``'gt'``). This method is never called if *convert_charrefs* is
|
(e.g. ``'gt'``).
|
||||||
``True``.
|
This method is only called if *convert_charrefs* is false.
|
||||||
|
|
||||||
|
|
||||||
.. method:: HTMLParser.handle_charref(name)
|
.. method:: HTMLParser.handle_charref(name)
|
||||||
|
|
@ -177,8 +184,8 @@ implementations do nothing (except for :meth:`~HTMLParser.handle_startendtag`):
|
||||||
This method is called to process decimal and hexadecimal numeric character
|
This method is called to process decimal and hexadecimal numeric character
|
||||||
references of the form :samp:`&#{NNN};` and :samp:`&#x{NNN};`. For example, the decimal
|
references of the form :samp:`&#{NNN};` and :samp:`&#x{NNN};`. For example, the decimal
|
||||||
equivalent for ``>`` is ``>``, whereas the hexadecimal is ``>``;
|
equivalent for ``>`` is ``>``, whereas the hexadecimal is ``>``;
|
||||||
in this case the method will receive ``'62'`` or ``'x3E'``. This method
|
in this case the method will receive ``'62'`` or ``'x3E'``.
|
||||||
is never called if *convert_charrefs* is ``True``.
|
This method is only called if *convert_charrefs* is false.
|
||||||
|
|
||||||
|
|
||||||
.. method:: HTMLParser.handle_comment(data)
|
.. method:: HTMLParser.handle_comment(data)
|
||||||
|
|
@ -292,8 +299,8 @@ Parsing an element with a few attributes and a title:
|
||||||
Data : Python
|
Data : Python
|
||||||
End tag : h1
|
End tag : h1
|
||||||
|
|
||||||
The content of ``script`` and ``style`` elements is returned as is, without
|
The content of elements like ``script`` and ``style`` is returned as is,
|
||||||
further parsing:
|
without further parsing:
|
||||||
|
|
||||||
.. doctest::
|
.. doctest::
|
||||||
|
|
||||||
|
|
@ -304,10 +311,10 @@ further parsing:
|
||||||
End tag : style
|
End tag : style
|
||||||
|
|
||||||
>>> parser.feed('<script type="text/javascript">'
|
>>> parser.feed('<script type="text/javascript">'
|
||||||
... 'alert("<strong>hello!</strong>");</script>')
|
... 'alert("<strong>hello! ☺</strong>");</script>')
|
||||||
Start tag: script
|
Start tag: script
|
||||||
attr: ('type', 'text/javascript')
|
attr: ('type', 'text/javascript')
|
||||||
Data : alert("<strong>hello!</strong>");
|
Data : alert("<strong>hello! ☺</strong>");
|
||||||
End tag : script
|
End tag : script
|
||||||
|
|
||||||
Parsing comments:
|
Parsing comments:
|
||||||
|
|
@ -336,7 +343,7 @@ correct char (note: these 3 references are all equivalent to ``'>'``):
|
||||||
|
|
||||||
Feeding incomplete chunks to :meth:`~HTMLParser.feed` works, but
|
Feeding incomplete chunks to :meth:`~HTMLParser.feed` works, but
|
||||||
:meth:`~HTMLParser.handle_data` might be called more than once
|
:meth:`~HTMLParser.handle_data` might be called more than once
|
||||||
(unless *convert_charrefs* is set to ``True``):
|
if *convert_charrefs* is false:
|
||||||
|
|
||||||
.. doctest::
|
.. doctest::
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -127,17 +127,25 @@ class HTMLParser(_markupbase.ParserBase):
|
||||||
argument.
|
argument.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
CDATA_CONTENT_ELEMENTS = ("script", "style")
|
# See the HTML5 specs section "13.4 Parsing HTML fragments".
|
||||||
|
# https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
|
||||||
|
# CDATA_CONTENT_ELEMENTS are parsed in RAWTEXT mode
|
||||||
|
CDATA_CONTENT_ELEMENTS = ("script", "style", "xmp", "iframe", "noembed", "noframes")
|
||||||
RCDATA_CONTENT_ELEMENTS = ("textarea", "title")
|
RCDATA_CONTENT_ELEMENTS = ("textarea", "title")
|
||||||
|
|
||||||
def __init__(self, *, convert_charrefs=True):
|
def __init__(self, *, convert_charrefs=True, scripting=False):
|
||||||
"""Initialize and reset this instance.
|
"""Initialize and reset this instance.
|
||||||
|
|
||||||
If convert_charrefs is True (the default), all character references
|
If convert_charrefs is true (the default), all character references
|
||||||
are automatically converted to the corresponding Unicode characters.
|
are automatically converted to the corresponding Unicode characters.
|
||||||
|
|
||||||
|
If *scripting* is false (the default), the content of the
|
||||||
|
``noscript`` element is parsed normally; if it's true,
|
||||||
|
it's returned as is without being parsed.
|
||||||
"""
|
"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.convert_charrefs = convert_charrefs
|
self.convert_charrefs = convert_charrefs
|
||||||
|
self.scripting = scripting
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
|
|
@ -172,7 +180,9 @@ class HTMLParser(_markupbase.ParserBase):
|
||||||
def set_cdata_mode(self, elem, *, escapable=False):
|
def set_cdata_mode(self, elem, *, escapable=False):
|
||||||
self.cdata_elem = elem.lower()
|
self.cdata_elem = elem.lower()
|
||||||
self._escapable = escapable
|
self._escapable = escapable
|
||||||
if escapable and not self.convert_charrefs:
|
if self.cdata_elem == 'plaintext':
|
||||||
|
self.interesting = re.compile(r'\z')
|
||||||
|
elif escapable and not self.convert_charrefs:
|
||||||
self.interesting = re.compile(r'&|</%s(?=[\t\n\r\f />])' % self.cdata_elem,
|
self.interesting = re.compile(r'&|</%s(?=[\t\n\r\f />])' % self.cdata_elem,
|
||||||
re.IGNORECASE|re.ASCII)
|
re.IGNORECASE|re.ASCII)
|
||||||
else:
|
else:
|
||||||
|
|
@ -444,8 +454,10 @@ class HTMLParser(_markupbase.ParserBase):
|
||||||
self.handle_startendtag(tag, attrs)
|
self.handle_startendtag(tag, attrs)
|
||||||
else:
|
else:
|
||||||
self.handle_starttag(tag, attrs)
|
self.handle_starttag(tag, attrs)
|
||||||
if tag in self.CDATA_CONTENT_ELEMENTS:
|
if (tag in self.CDATA_CONTENT_ELEMENTS or
|
||||||
self.set_cdata_mode(tag)
|
(self.scripting and tag == "noscript") or
|
||||||
|
tag == "plaintext"):
|
||||||
|
self.set_cdata_mode(tag, escapable=False)
|
||||||
elif tag in self.RCDATA_CONTENT_ELEMENTS:
|
elif tag in self.RCDATA_CONTENT_ELEMENTS:
|
||||||
self.set_cdata_mode(tag, escapable=True)
|
self.set_cdata_mode(tag, escapable=True)
|
||||||
return endpos
|
return endpos
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,18 @@ from unittest.mock import patch
|
||||||
from test import support
|
from test import support
|
||||||
|
|
||||||
|
|
||||||
|
SAMPLE_RCDATA = (
|
||||||
|
'<!-- not a comment -->'
|
||||||
|
"<not a='start tag'>"
|
||||||
|
'<![CDATA[not a cdata]]>'
|
||||||
|
'<!not a bogus comment>'
|
||||||
|
'</not a bogus comment>'
|
||||||
|
'\u2603'
|
||||||
|
)
|
||||||
|
|
||||||
|
SAMPLE_RAWTEXT = SAMPLE_RCDATA + '&☺'
|
||||||
|
|
||||||
|
|
||||||
class EventCollector(html.parser.HTMLParser):
|
class EventCollector(html.parser.HTMLParser):
|
||||||
|
|
||||||
def __init__(self, *args, autocdata=False, **kw):
|
def __init__(self, *args, autocdata=False, **kw):
|
||||||
|
|
@ -293,30 +305,20 @@ text
|
||||||
'Date().getTime()+\'"><\\/s\'+\'cript>\');\n//]]>'),
|
'Date().getTime()+\'"><\\/s\'+\'cript>\');\n//]]>'),
|
||||||
'\n<!-- //\nvar foo = 3.14;\n// -->\n',
|
'\n<!-- //\nvar foo = 3.14;\n// -->\n',
|
||||||
'<!-- \u2603 -->',
|
'<!-- \u2603 -->',
|
||||||
'foo = "</ script>"',
|
|
||||||
'foo = "</scripture>"',
|
|
||||||
'foo = "</script\v>"',
|
|
||||||
'foo = "</script\xa0>"',
|
|
||||||
'foo = "</ſcript>"',
|
|
||||||
'foo = "</scrıpt>"',
|
|
||||||
])
|
])
|
||||||
def test_script_content(self, content):
|
def test_script_content(self, content):
|
||||||
s = f'<script>{content}</script>'
|
s = f'<script>{content}</script>'
|
||||||
self._run_check(s, [("starttag", "script", []),
|
self._run_check(s, [
|
||||||
|
("starttag", "script", []),
|
||||||
("data", content),
|
("data", content),
|
||||||
("endtag", "script")])
|
("endtag", "script"),
|
||||||
|
])
|
||||||
|
|
||||||
@support.subTests('content', [
|
@support.subTests('content', [
|
||||||
'a::before { content: "<!-- not a comment -->"; }',
|
'a::before { content: "<!-- not a comment -->"; }',
|
||||||
'a::before { content: "¬-an-entity-ref;"; }',
|
'a::before { content: "¬-an-entity-ref;"; }',
|
||||||
'a::before { content: "<not a=\'start tag\'>"; }',
|
'a::before { content: "<not a=\'start tag\'>"; }',
|
||||||
'a::before { content: "\u2603"; }',
|
'a::before { content: "\u2603"; }',
|
||||||
'a::before { content: "< /style>"; }',
|
|
||||||
'a::before { content: "</ style>"; }',
|
|
||||||
'a::before { content: "</styled>"; }',
|
|
||||||
'a::before { content: "</style\v>"; }',
|
|
||||||
'a::before { content: "</style\xa0>"; }',
|
|
||||||
'a::before { content: "</ſtyle>"; }',
|
|
||||||
])
|
])
|
||||||
def test_style_content(self, content):
|
def test_style_content(self, content):
|
||||||
s = f'<style>{content}</style>'
|
s = f'<style>{content}</style>'
|
||||||
|
|
@ -324,47 +326,59 @@ text
|
||||||
("data", content),
|
("data", content),
|
||||||
("endtag", "style")])
|
("endtag", "style")])
|
||||||
|
|
||||||
@support.subTests('content', [
|
@support.subTests('tag', ['title', 'textarea'])
|
||||||
'<!-- not a comment -->',
|
def test_rcdata_content(self, tag):
|
||||||
"<not a='start tag'>",
|
source = f"<{tag}>{SAMPLE_RCDATA}</{tag}>"
|
||||||
'<![CDATA[not a cdata]]>',
|
|
||||||
'<!not a bogus comment>',
|
|
||||||
'</not a bogus comment>',
|
|
||||||
'\u2603',
|
|
||||||
'< /title>',
|
|
||||||
'</ title>',
|
|
||||||
'</titled>',
|
|
||||||
'</title\v>',
|
|
||||||
'</title\xa0>',
|
|
||||||
'</tıtle>',
|
|
||||||
])
|
|
||||||
def test_title_content(self, content):
|
|
||||||
source = f"<title>{content}</title>"
|
|
||||||
self._run_check(source, [
|
self._run_check(source, [
|
||||||
("starttag", "title", []),
|
("starttag", tag, []),
|
||||||
("data", content),
|
("data", SAMPLE_RCDATA),
|
||||||
("endtag", "title"),
|
("endtag", tag),
|
||||||
|
])
|
||||||
|
source = f"<{tag}>&</{tag}>"
|
||||||
|
self._run_check(source, [
|
||||||
|
("starttag", tag, []),
|
||||||
|
('entityref', 'amp'),
|
||||||
|
("endtag", tag),
|
||||||
])
|
])
|
||||||
|
|
||||||
@support.subTests('content', [
|
@support.subTests('tag',
|
||||||
'<!-- not a comment -->',
|
['style', 'xmp', 'iframe', 'noembed', 'noframes', 'script'])
|
||||||
"<not a='start tag'>",
|
def test_rawtext_content(self, tag):
|
||||||
'<![CDATA[not a cdata]]>',
|
source = f"<{tag}>{SAMPLE_RAWTEXT}</{tag}>"
|
||||||
'<!not a bogus comment>',
|
|
||||||
'</not a bogus comment>',
|
|
||||||
'\u2603',
|
|
||||||
'< /textarea>',
|
|
||||||
'</ textarea>',
|
|
||||||
'</textareable>',
|
|
||||||
'</textarea\v>',
|
|
||||||
'</textarea\xa0>',
|
|
||||||
])
|
|
||||||
def test_textarea_content(self, content):
|
|
||||||
source = f"<textarea>{content}</textarea>"
|
|
||||||
self._run_check(source, [
|
self._run_check(source, [
|
||||||
("starttag", "textarea", []),
|
("starttag", tag, []),
|
||||||
|
("data", SAMPLE_RAWTEXT),
|
||||||
|
("endtag", tag),
|
||||||
|
])
|
||||||
|
|
||||||
|
def test_noscript_content(self):
|
||||||
|
source = f"<noscript>{SAMPLE_RAWTEXT}</noscript>"
|
||||||
|
# scripting=False -- normal mode
|
||||||
|
self._run_check(source, [
|
||||||
|
('starttag', 'noscript', []),
|
||||||
|
('comment', ' not a comment '),
|
||||||
|
('starttag', 'not', [('a', 'start tag')]),
|
||||||
|
('unknown decl', 'CDATA[not a cdata'),
|
||||||
|
('comment', 'not a bogus comment'),
|
||||||
|
('endtag', 'not'),
|
||||||
|
('data', '☃'),
|
||||||
|
('entityref', 'amp'),
|
||||||
|
('charref', '9786'),
|
||||||
|
('endtag', 'noscript'),
|
||||||
|
])
|
||||||
|
# scripting=True -- RAWTEXT mode
|
||||||
|
self._run_check(source, [
|
||||||
|
("starttag", "noscript", []),
|
||||||
|
("data", SAMPLE_RAWTEXT),
|
||||||
|
("endtag", "noscript"),
|
||||||
|
], collector=EventCollector(scripting=True))
|
||||||
|
|
||||||
|
def test_plaintext_content(self):
|
||||||
|
content = SAMPLE_RAWTEXT + '</plaintext>' # not closing
|
||||||
|
source = f"<plaintext>{content}"
|
||||||
|
self._run_check(source, [
|
||||||
|
("starttag", "plaintext", []),
|
||||||
("data", content),
|
("data", content),
|
||||||
("endtag", "textarea"),
|
|
||||||
])
|
])
|
||||||
|
|
||||||
@support.subTests('endtag', ['script', 'SCRIPT', 'script ', 'script\n',
|
@support.subTests('endtag', ['script', 'SCRIPT', 'script ', 'script\n',
|
||||||
|
|
@ -381,52 +395,65 @@ text
|
||||||
("endtag", "script")],
|
("endtag", "script")],
|
||||||
collector=EventCollectorNoNormalize(convert_charrefs=False))
|
collector=EventCollectorNoNormalize(convert_charrefs=False))
|
||||||
|
|
||||||
@support.subTests('endtag', ['style', 'STYLE', 'style ', 'style\n',
|
@support.subTests('tag', [
|
||||||
'style/', 'style foo=bar', 'style foo=">"'])
|
'script', 'style', 'xmp', 'iframe', 'noembed', 'noframes',
|
||||||
def test_style_closing_tag(self, endtag):
|
'textarea', 'title', 'noscript',
|
||||||
content = """
|
])
|
||||||
b::before { content: "<!-- not a comment -->"; }
|
def test_closing_tag(self, tag):
|
||||||
p::before { content: "¬-an-entity-ref;"; }
|
for endtag in [tag, tag.upper(), f'{tag} ', f'{tag}\n',
|
||||||
a::before { content: "<i>"; }
|
f'{tag}/', f'{tag} foo=bar', f'{tag} foo=">"']:
|
||||||
a::after { content: "</i>"; }
|
content = "<!-- not a comment --><i>Spam</i>"
|
||||||
"""
|
s = f'<{tag.upper()}>{content}</{endtag}>'
|
||||||
s = f'<StyLE>{content}</{endtag}>'
|
self._run_check(s, [
|
||||||
self._run_check(s, [("starttag", "style", []),
|
("starttag", tag, []),
|
||||||
|
('data', content),
|
||||||
|
("endtag", tag),
|
||||||
|
], collector=EventCollectorNoNormalize(convert_charrefs=False, scripting=True))
|
||||||
|
|
||||||
|
@support.subTests('tag', [
|
||||||
|
'script', 'style', 'xmp', 'iframe', 'noembed', 'noframes',
|
||||||
|
'textarea', 'title', 'noscript',
|
||||||
|
])
|
||||||
|
def test_invalid_closing_tag(self, tag):
|
||||||
|
content = (
|
||||||
|
f'< /{tag}>'
|
||||||
|
f'</ {tag}>'
|
||||||
|
f'</{tag}x>'
|
||||||
|
f'</{tag}\v>'
|
||||||
|
f'</{tag}\xa0>'
|
||||||
|
)
|
||||||
|
source = f"<{tag}>{content}</{tag}>"
|
||||||
|
self._run_check(source, [
|
||||||
|
("starttag", tag, []),
|
||||||
("data", content),
|
("data", content),
|
||||||
("endtag", "style")],
|
("endtag", tag),
|
||||||
collector=EventCollectorNoNormalize(convert_charrefs=False))
|
], collector=EventCollector(convert_charrefs=False, scripting=True))
|
||||||
|
|
||||||
@support.subTests('endtag', ['title', 'TITLE', 'title ', 'title\n',
|
@support.subTests('tag,endtag', [
|
||||||
'title/', 'title foo=bar', 'title foo=">"'])
|
('title', 'tıtle'),
|
||||||
def test_title_closing_tag(self, endtag):
|
('style', 'ſtyle'),
|
||||||
content = "<!-- not a comment --><i>Egg & Spam</i>"
|
('style', 'ſtyle'),
|
||||||
s = f'<TitLe>{content}</{endtag}>'
|
('style', 'style'),
|
||||||
self._run_check(s, [("starttag", "title", []),
|
('iframe', 'ıframe'),
|
||||||
('data', '<!-- not a comment --><i>Egg & Spam</i>'),
|
('noframes', 'noframeſ'),
|
||||||
("endtag", "title")],
|
('noscript', 'noſcript'),
|
||||||
collector=EventCollectorNoNormalize(convert_charrefs=True))
|
('noscript', 'noscrıpt'),
|
||||||
self._run_check(s, [("starttag", "title", []),
|
('script', 'ſcript'),
|
||||||
('data', '<!-- not a comment --><i>Egg '),
|
('script', 'scrıpt'),
|
||||||
('entityref', 'amp'),
|
])
|
||||||
('data', ' Spam</i>'),
|
def test_invalid_nonascii_closing_tag(self, tag, endtag):
|
||||||
("endtag", "title")],
|
content = f"<br></{endtag}>"
|
||||||
collector=EventCollectorNoNormalize(convert_charrefs=False))
|
source = f"<{tag}>{content}"
|
||||||
|
self._run_check(source, [
|
||||||
@support.subTests('endtag', ['textarea', 'TEXTAREA', 'textarea ', 'textarea\n',
|
("starttag", tag, []),
|
||||||
'textarea/', 'textarea foo=bar', 'textarea foo=">"'])
|
("data", content),
|
||||||
def test_textarea_closing_tag(self, endtag):
|
], collector=EventCollector(convert_charrefs=False, scripting=True))
|
||||||
content = "<!-- not a comment --><i>Egg & Spam</i>"
|
source = f"<{tag}>{content}</{tag}>"
|
||||||
s = f'<TexTarEa>{content}</{endtag}>'
|
self._run_check(source, [
|
||||||
self._run_check(s, [("starttag", "textarea", []),
|
("starttag", tag, []),
|
||||||
('data', '<!-- not a comment --><i>Egg & Spam</i>'),
|
("data", content),
|
||||||
("endtag", "textarea")],
|
("endtag", tag),
|
||||||
collector=EventCollectorNoNormalize(convert_charrefs=True))
|
], collector=EventCollector(convert_charrefs=False, scripting=True))
|
||||||
self._run_check(s, [("starttag", "textarea", []),
|
|
||||||
('data', '<!-- not a comment --><i>Egg '),
|
|
||||||
('entityref', 'amp'),
|
|
||||||
('data', ' Spam</i>'),
|
|
||||||
("endtag", "textarea")],
|
|
||||||
collector=EventCollectorNoNormalize(convert_charrefs=False))
|
|
||||||
|
|
||||||
@support.subTests('tail,end', [
|
@support.subTests('tail,end', [
|
||||||
('', False),
|
('', False),
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
Add support of the "plaintext" element, RAWTEXT elements "xmp", "iframe",
|
||||||
|
"noembed" and "noframes", and optionally RAWTEXT element "noscript" in
|
||||||
|
:class:`html.parser.HTMLParser`.
|
||||||
Loading…
Add table
Add a link
Reference in a new issue