#13960: HTMLParser is now able to handle broken comments when strict=False.

This commit is contained in:
Ezio Melotti 2012-02-10 10:45:44 +02:00
parent 5b14d732d8
commit fa3702dc28
3 changed files with 58 additions and 2 deletions

View file

@ -323,6 +323,23 @@ DOCTYPE html [
("endtag", element_lower)],
collector=Collector())
def test_comments(self):
html = ("<!-- I'm a valid comment -->"
'<!--me too!-->'
'<!------>'
'<!---->'
'<!----I have many hyphens---->'
'<!-- I have a > in the middle -->'
'<!-- and I have -- in the middle! -->')
expected = [('comment', " I'm a valid comment "),
('comment', 'me too!'),
('comment', '--'),
('comment', ''),
('comment', '--I have many hyphens--'),
('comment', ' I have a > in the middle '),
('comment', ' and I have -- in the middle! ')]
self._run_check(html, expected)
def test_condcoms(self):
html = ('<!--[if IE & !(lte IE 8)]>aren\'t<![endif]-->'
'<!--[if IE 8]>condcoms<![endif]-->'
@ -426,6 +443,19 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
# see #12888
self.assertEqual(p.unescape('&#123; ' * 1050), '{ ' * 1050)
def test_broken_comments(self):
html = ('<! not really a comment >'
'<! not a comment either -->'
'<! -- close enough -->'
'<!!! another bogus comment !!!>')
expected = [
('comment', ' not really a comment '),
('comment', ' not a comment either --'),
('comment', ' -- close enough --'),
('comment', '!! another bogus comment !!!'),
]
self._run_check(html, expected)
def test_broken_condcoms(self):
# these condcoms are missing the '--' after '<!' and before the '>'
html = ('<![if !(IE)]>broken condcom<![endif]>'