mirror of
https://github.com/python/cpython.git
synced 2025-08-21 17:25:34 +00:00
#13960: HTMLParser is now able to handle broken comments.
This commit is contained in:
parent
32b6371460
commit
4b92cc3f79
3 changed files with 74 additions and 22 deletions
|
@ -114,7 +114,7 @@ comment1b-->
|
|||
<Img sRc='Bar' isMAP>sample
|
||||
text
|
||||
“
|
||||
<!--comment2a-- --comment2b--><!>
|
||||
<!--comment2a-- --comment2b-->
|
||||
</Html>
|
||||
""", [
|
||||
("data", "\n"),
|
||||
|
@ -142,24 +142,6 @@ text
|
|||
("data", " foo"),
|
||||
])
|
||||
|
||||
def test_doctype_decl(self):
|
||||
inside = """\
|
||||
DOCTYPE html [
|
||||
<!ELEMENT html - O EMPTY>
|
||||
<!ATTLIST html
|
||||
version CDATA #IMPLIED
|
||||
profile CDATA 'DublinCore'>
|
||||
<!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>
|
||||
<!ENTITY myEntity 'internal parsed entity'>
|
||||
<!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>
|
||||
<!ENTITY % paramEntity 'name|name|name'>
|
||||
%paramEntity;
|
||||
<!-- comment -->
|
||||
]"""
|
||||
self._run_check("<!%s>" % inside, [
|
||||
("decl", inside),
|
||||
])
|
||||
|
||||
def test_bad_nesting(self):
|
||||
# Strangely, this *is* supposed to test that overlapping
|
||||
# elements are allowed. HTMLParser is more geared toward
|
||||
|
@ -182,7 +164,8 @@ DOCTYPE html [
|
|||
])
|
||||
|
||||
def test_illegal_declarations(self):
|
||||
self._parse_error('<!spacer type="block" height="25">')
|
||||
self._run_check('<!spacer type="block" height="25">',
|
||||
[('comment', 'spacer type="block" height="25"')])
|
||||
|
||||
def test_starttag_end_boundary(self):
|
||||
self._run_check("""<a b='<'>""", [("starttag", "a", [("b", "<")])])
|
||||
|
@ -233,7 +216,7 @@ DOCTYPE html [
|
|||
self._parse_error("<a foo='>")
|
||||
|
||||
def test_declaration_junk_chars(self):
|
||||
self._parse_error("<!DOCTYPE foo $ >")
|
||||
self._run_check("<!DOCTYPE foo $ >", [('decl', 'DOCTYPE foo $ ')])
|
||||
|
||||
def test_startendtag(self):
|
||||
self._run_check("<p/>", [
|
||||
|
@ -449,6 +432,39 @@ class AttributesTestCase(TestCaseBase):
|
|||
[("href", "http://www.example.org/\">;")]),
|
||||
("data", "spam"), ("endtag", "a")])
|
||||
|
||||
def test_comments(self):
|
||||
html = ("<!-- I'm a valid comment -->"
|
||||
'<!--me too!-->'
|
||||
'<!------>'
|
||||
'<!---->'
|
||||
'<!----I have many hyphens---->'
|
||||
'<!-- I have a > in the middle -->'
|
||||
'<!-- and I have -- in the middle! -->')
|
||||
expected = [('comment', " I'm a valid comment "),
|
||||
('comment', 'me too!'),
|
||||
('comment', '--'),
|
||||
('comment', ''),
|
||||
('comment', '--I have many hyphens--'),
|
||||
('comment', ' I have a > in the middle '),
|
||||
('comment', ' and I have -- in the middle! ')]
|
||||
self._run_check(html, expected)
|
||||
|
||||
def test_broken_comments(self):
|
||||
html = ('<! not really a comment >'
|
||||
'<! not a comment either -->'
|
||||
'<! -- close enough -->'
|
||||
'<!><!<-- this was an empty comment>'
|
||||
'<!!! another bogus comment !!!>')
|
||||
expected = [
|
||||
('comment', ' not really a comment '),
|
||||
('comment', ' not a comment either --'),
|
||||
('comment', ' -- close enough --'),
|
||||
('comment', ''),
|
||||
('comment', '<-- this was an empty comment'),
|
||||
('comment', '!! another bogus comment !!!'),
|
||||
]
|
||||
self._run_check(html, expected)
|
||||
|
||||
def test_condcoms(self):
|
||||
html = ('<!--[if IE & !(lte IE 8)]>aren\'t<![endif]-->'
|
||||
'<!--[if IE 8]>condcoms<![endif]-->'
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue