mirror of
https://github.com/python/cpython.git
synced 2025-08-22 17:55:18 +00:00
#15114: The html.parser module now raises a DeprecationWarning when the strict argument of HTMLParser or the HTMLParser.error method are used.
This commit is contained in:
parent
28f0beaff6
commit
88ebfb129b
4 changed files with 29 additions and 9 deletions
|
@ -74,7 +74,7 @@ as they are encountered::
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
print("Encountered some data :", data)
|
print("Encountered some data :", data)
|
||||||
|
|
||||||
parser = MyHTMLParser(strict=False)
|
parser = MyHTMLParser()
|
||||||
parser.feed('<html><head><title>Test</title></head>'
|
parser.feed('<html><head><title>Test</title></head>'
|
||||||
'<body><h1>Parse me!</h1></body></html>')
|
'<body><h1>Parse me!</h1></body></html>')
|
||||||
|
|
||||||
|
@ -272,7 +272,7 @@ examples::
|
||||||
def handle_decl(self, data):
|
def handle_decl(self, data):
|
||||||
print("Decl :", data)
|
print("Decl :", data)
|
||||||
|
|
||||||
parser = MyHTMLParser(strict=False)
|
parser = MyHTMLParser()
|
||||||
|
|
||||||
Parsing a doctype::
|
Parsing a doctype::
|
||||||
|
|
||||||
|
|
|
@ -94,6 +94,8 @@ class HTMLParseError(Exception):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
_strict_sentinel = object()
|
||||||
|
|
||||||
class HTMLParser(_markupbase.ParserBase):
|
class HTMLParser(_markupbase.ParserBase):
|
||||||
"""Find tags and other markup and call handler functions.
|
"""Find tags and other markup and call handler functions.
|
||||||
|
|
||||||
|
@ -116,16 +118,18 @@ class HTMLParser(_markupbase.ParserBase):
|
||||||
|
|
||||||
CDATA_CONTENT_ELEMENTS = ("script", "style")
|
CDATA_CONTENT_ELEMENTS = ("script", "style")
|
||||||
|
|
||||||
def __init__(self, strict=False):
|
def __init__(self, strict=_strict_sentinel):
|
||||||
"""Initialize and reset this instance.
|
"""Initialize and reset this instance.
|
||||||
|
|
||||||
If strict is set to False (the default) the parser will parse invalid
|
If strict is set to False (the default) the parser will parse invalid
|
||||||
markup, otherwise it will raise an error. Note that the strict mode
|
markup, otherwise it will raise an error. Note that the strict mode
|
||||||
is deprecated.
|
and argument are deprecated.
|
||||||
"""
|
"""
|
||||||
if strict:
|
if strict is not _strict_sentinel:
|
||||||
warnings.warn("The strict mode is deprecated.",
|
warnings.warn("The strict argument and mode are deprecated.",
|
||||||
DeprecationWarning, stacklevel=2)
|
DeprecationWarning, stacklevel=2)
|
||||||
|
else:
|
||||||
|
strict = False # default
|
||||||
self.strict = strict
|
self.strict = strict
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
|
@ -151,6 +155,8 @@ class HTMLParser(_markupbase.ParserBase):
|
||||||
self.goahead(1)
|
self.goahead(1)
|
||||||
|
|
||||||
def error(self, message):
|
def error(self, message):
|
||||||
|
warnings.warn("The 'error' method is deprecated.",
|
||||||
|
DeprecationWarning, stacklevel=2)
|
||||||
raise HTMLParseError(message, self.getpos())
|
raise HTMLParseError(message, self.getpos())
|
||||||
|
|
||||||
__starttag_text = None
|
__starttag_text = None
|
||||||
|
|
|
@ -96,7 +96,9 @@ class TestCaseBase(unittest.TestCase):
|
||||||
parser = self.get_collector()
|
parser = self.get_collector()
|
||||||
parser.feed(source)
|
parser.feed(source)
|
||||||
parser.close()
|
parser.close()
|
||||||
self.assertRaises(html.parser.HTMLParseError, parse)
|
with self.assertRaises(html.parser.HTMLParseError):
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
parse()
|
||||||
|
|
||||||
|
|
||||||
class HTMLParserStrictTestCase(TestCaseBase):
|
class HTMLParserStrictTestCase(TestCaseBase):
|
||||||
|
@ -360,7 +362,16 @@ text
|
||||||
class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
|
class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
|
||||||
|
|
||||||
def get_collector(self):
|
def get_collector(self):
|
||||||
return EventCollector(strict=False)
|
return EventCollector()
|
||||||
|
|
||||||
|
def test_deprecation_warnings(self):
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
EventCollector(strict=True)
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
EventCollector(strict=False)
|
||||||
|
with self.assertRaises(html.parser.HTMLParseError):
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
EventCollector().error('test')
|
||||||
|
|
||||||
def test_tolerant_parsing(self):
|
def test_tolerant_parsing(self):
|
||||||
self._run_check('<html <html>te>>xt&a<<bc</a></html>\n'
|
self._run_check('<html <html>te>>xt&a<<bc</a></html>\n'
|
||||||
|
@ -676,7 +687,7 @@ class AttributesStrictTestCase(TestCaseBase):
|
||||||
class AttributesTolerantTestCase(AttributesStrictTestCase):
|
class AttributesTolerantTestCase(AttributesStrictTestCase):
|
||||||
|
|
||||||
def get_collector(self):
|
def get_collector(self):
|
||||||
return EventCollector(strict=False)
|
return EventCollector()
|
||||||
|
|
||||||
def test_attr_funky_names2(self):
|
def test_attr_funky_names2(self):
|
||||||
self._run_check(
|
self._run_check(
|
||||||
|
|
|
@ -31,6 +31,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #15114: The html.parser module now raises a DeprecationWarning when the
|
||||||
|
strict argument of HTMLParser or the HTMLParser.error method are used.
|
||||||
|
|
||||||
- Issue #19410: Undo the special-casing removal of '' for
|
- Issue #19410: Undo the special-casing removal of '' for
|
||||||
importlib.machinery.FileFinder.
|
importlib.machinery.FileFinder.
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue