mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
Improve handling of declarations in HTMLParser.
This commit is contained in:
parent
86f67123be
commit
f4ab491901
2 changed files with 51 additions and 29 deletions
|
@ -122,7 +122,7 @@ comment1b-->
|
|||
<Img sRc='Bar' isMAP>sample
|
||||
text
|
||||
“
|
||||
<!--comment2a-- --comment2b--><!>
|
||||
<!--comment2a-- --comment2b-->
|
||||
</Html>
|
||||
""", [
|
||||
("data", "\n"),
|
||||
|
@ -157,24 +157,6 @@ text
|
|||
("data", " foo"),
|
||||
])
|
||||
|
||||
def test_doctype_decl(self):
|
||||
inside = """\
|
||||
DOCTYPE html [
|
||||
<!ELEMENT html - O EMPTY>
|
||||
<!ATTLIST html
|
||||
version CDATA #IMPLIED
|
||||
profile CDATA 'DublinCore'>
|
||||
<!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>
|
||||
<!ENTITY myEntity 'internal parsed entity'>
|
||||
<!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>
|
||||
<!ENTITY % paramEntity 'name|name|name'>
|
||||
%paramEntity;
|
||||
<!-- comment -->
|
||||
]"""
|
||||
self._run_check("<!%s>" % inside, [
|
||||
("decl", inside),
|
||||
])
|
||||
|
||||
def test_bad_nesting(self):
|
||||
# Strangely, this *is* supposed to test that overlapping
|
||||
# elements are allowed. HTMLParser is more geared toward
|
||||
|
@ -247,6 +229,30 @@ DOCTYPE html [
|
|||
self._parse_error("<a foo='>'")
|
||||
self._parse_error("<a foo='>")
|
||||
|
||||
def test_valid_doctypes(self):
|
||||
# from http://www.w3.org/QA/2002/04/valid-dtd-list.html
|
||||
dtds = ['HTML', # HTML5 doctype
|
||||
('HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
|
||||
'"http://www.w3.org/TR/html4/strict.dtd"'),
|
||||
('HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" '
|
||||
'"http://www.w3.org/TR/html4/loose.dtd"'),
|
||||
('html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" '
|
||||
'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"'),
|
||||
('html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" '
|
||||
'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"'),
|
||||
('math PUBLIC "-//W3C//DTD MathML 2.0//EN" '
|
||||
'"http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"'),
|
||||
('html PUBLIC "-//W3C//DTD '
|
||||
'XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" '
|
||||
'"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"'),
|
||||
('svg PUBLIC "-//W3C//DTD SVG 1.1//EN" '
|
||||
'"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"'),
|
||||
'html PUBLIC "-//IETF//DTD HTML 2.0//EN"',
|
||||
'html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"']
|
||||
for dtd in dtds:
|
||||
self._run_check("<!DOCTYPE %s>" % dtd,
|
||||
[('decl', 'DOCTYPE ' + dtd)])
|
||||
|
||||
def test_declaration_junk_chars(self):
|
||||
self._parse_error("<!DOCTYPE foo $ >")
|
||||
|
||||
|
@ -384,8 +390,7 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
|
|||
self._run_check("<a foo='>", [('data', "<a foo='>")])
|
||||
|
||||
def test_declaration_junk_chars(self):
|
||||
# XXX this is wrong
|
||||
self._run_check("<!DOCTYPE foo $ >", [('comment', 'DOCTYPE foo $ ')])
|
||||
self._run_check("<!DOCTYPE foo $ >", [('decl', 'DOCTYPE foo $ ')])
|
||||
|
||||
def test_illegal_declarations(self):
|
||||
# XXX this might be wrong
|
||||
|
@ -510,11 +515,14 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
|
|||
html = ('<! not really a comment >'
|
||||
'<! not a comment either -->'
|
||||
'<! -- close enough -->'
|
||||
'<!><!<-- this was an empty comment>'
|
||||
'<!!! another bogus comment !!!>')
|
||||
expected = [
|
||||
('comment', ' not really a comment '),
|
||||
('comment', ' not a comment either --'),
|
||||
('comment', ' -- close enough --'),
|
||||
('comment', ''),
|
||||
('comment', '<-- this was an empty comment'),
|
||||
('comment', '!! another bogus comment !!!'),
|
||||
]
|
||||
self._run_check(html, expected)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue