Deal more appropriately with bare ampersands and pointy brackets; this

module has to deal with "class" HTML-as-deployed as well as XHTML, so we
cannot be as strict as XHTML allows.

This closes SF bug #453059, but uses a different fix than suggested in
the bug comments.
This commit is contained in:
Fred Drake 2001-08-20 21:24:19 +00:00
parent 18da1e1e7f
commit 029acfb922
2 changed files with 39 additions and 19 deletions

View file

@ -1,6 +1,7 @@
"""Tests for HTMLParser.py."""
import HTMLParser
import pprint
import sys
import test_support
import unittest
@ -83,9 +84,10 @@ class TestCaseBase(unittest.TestCase):
for c in self.epilogue:
parser.feed(c)
parser.close()
self.assert_(parser.get_events() ==
self.initial_events + events + self.final_events,
parser.get_events())
events = parser.get_events()
self.assertEqual(events,
self.initial_events + events + self.final_events,
"got events:\n" + pprint.pformat(events))
def _run_check_extra(self, source, events):
self._run_check(source, events, EventCollectorExtra)
@ -137,6 +139,18 @@ text
("data", "\n"),
])
def test_doctype_decl(self):
inside = """\
DOCTYPE html [
<!ELEMENT html - O EMPTY>
<!ATTLIST html
version CDATA #IMPLIED '4.0'>
<!-- comment -->
]"""
self._run_check("<!%s>" % inside, [
("decl", inside),
])
def test_bad_nesting(self):
# Strangely, this *is* supposed to test that overlapping
# elements are allowed. HTMLParser is more geared toward
@ -148,6 +162,16 @@ text
("endtag", "b"),
])
def test_bare_ampersands(self):
self._run_check("this text & contains & ampersands &", [
("data", "this text & contains & ampersands &"),
])
def test_bare_pointy_brackets(self):
self._run_check("this < text > contains < bare>pointy< brackets", [
("data", "this < text > contains < bare>pointy< brackets"),
])
def test_attr_syntax(self):
output = [
("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)])
@ -199,16 +223,12 @@ text
self._run_check(["<a b='>'", ">"], output)
def test_starttag_junk_chars(self):
self._parse_error("<")
self._parse_error("<>")
self._parse_error("</>")
self._parse_error("</$>")
self._parse_error("</")
self._parse_error("</a")
self._parse_error("<a<a>")
self._parse_error("</a<a>")
self._parse_error("<$")
self._parse_error("<$>")
self._parse_error("<!")
self._parse_error("<a $>")
self._parse_error("<a")