mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
Added several new tests to check the behavior with respect to doctype
declarations and weird markup that we used to accept & ignore that recent versions raised an exception for; the original behavior has been restored and augmented (the user can decide what to do if they care; the default is to ignore it as done in early versions).
This commit is contained in:
parent
e822049efc
commit
30c4849169
1 changed files with 77 additions and 6 deletions
|
@ -54,6 +54,9 @@ class EventCollector(sgmllib.SGMLParser):
|
|||
def handle_pi(self, data):
|
||||
self.append(("pi", data))
|
||||
|
||||
def unknown_decl(self, decl):
|
||||
self.append(("unknown decl", decl))
|
||||
|
||||
|
||||
class CDATAEventCollector(EventCollector):
|
||||
def start_cdata(self, attrs):
|
||||
|
@ -65,12 +68,24 @@ class SGMLParserTestCase(unittest.TestCase):
|
|||
|
||||
collector = EventCollector
|
||||
|
||||
def check_events(self, source, expected_events):
|
||||
def get_events(self, source):
|
||||
parser = self.collector()
|
||||
for s in source:
|
||||
parser.feed(s)
|
||||
parser.close()
|
||||
events = parser.get_events()
|
||||
try:
|
||||
for s in source:
|
||||
parser.feed(s)
|
||||
parser.close()
|
||||
except:
|
||||
#self.events = parser.events
|
||||
raise
|
||||
return parser.get_events()
|
||||
|
||||
def check_events(self, source, expected_events):
|
||||
try:
|
||||
events = self.get_events(source)
|
||||
except:
|
||||
import sys
|
||||
#print >>sys.stderr, pprint.pformat(self.events)
|
||||
raise
|
||||
if events != expected_events:
|
||||
self.fail("received events did not match expected events\n"
|
||||
"Expected:\n" + pprint.pformat(expected_events) +
|
||||
|
@ -87,6 +102,31 @@ class SGMLParserTestCase(unittest.TestCase):
|
|||
self.fail("expected SGMLParseError for %r\nReceived:\n%s"
|
||||
% (source, pprint.pformat(parser.get_events())))
|
||||
|
||||
def test_doctype_decl_internal(self):
|
||||
inside = """\
|
||||
DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
|
||||
SYSTEM 'http://www.w3.org/TR/html401/strict.dtd' [
|
||||
<!ELEMENT html - O EMPTY>
|
||||
<!ATTLIST html
|
||||
version CDATA #IMPLIED
|
||||
profile CDATA 'DublinCore'>
|
||||
<!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>
|
||||
<!ENTITY myEntity 'internal parsed entity'>
|
||||
<!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>
|
||||
<!ENTITY % paramEntity 'name|name|name'>
|
||||
%paramEntity;
|
||||
<!-- comment -->
|
||||
]"""
|
||||
self.check_events(["<!%s>" % inside], [
|
||||
("decl", inside),
|
||||
])
|
||||
|
||||
def test_doctype_decl_external(self):
|
||||
inside = "DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'"
|
||||
self.check_events("<!%s>" % inside, [
|
||||
("decl", inside),
|
||||
])
|
||||
|
||||
def test_underscore_in_attrname(self):
|
||||
# SF bug #436621
|
||||
"""Make sure attribute names with underscores are accepted"""
|
||||
|
@ -132,6 +172,16 @@ class SGMLParserTestCase(unittest.TestCase):
|
|||
("endtag", "b"),
|
||||
])
|
||||
|
||||
def test_bare_ampersands(self):
|
||||
self.check_events("this text & contains & ampersands &", [
|
||||
("data", "this text & contains & ampersands &"),
|
||||
])
|
||||
|
||||
def test_bare_pointy_brackets(self):
|
||||
self.check_events("this < text > contains < bare>pointy< brackets", [
|
||||
("data", "this < text > contains < bare>pointy< brackets"),
|
||||
])
|
||||
|
||||
def test_attr_syntax(self):
|
||||
output = [
|
||||
("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", "e")])
|
||||
|
@ -156,6 +206,14 @@ class SGMLParserTestCase(unittest.TestCase):
|
|||
("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),
|
||||
])
|
||||
|
||||
def test_illegal_declarations(self):
|
||||
s = 'abc<!spacer type="block" height="25">def'
|
||||
self.check_events(s, [
|
||||
("data", "abc"),
|
||||
("unknown decl", 'spacer type="block" height="25"'),
|
||||
("data", "def"),
|
||||
])
|
||||
|
||||
def test_weird_starttags(self):
|
||||
self.check_events("<a<a>", [
|
||||
("starttag", "a", []),
|
||||
|
@ -196,6 +254,14 @@ class SGMLParserTestCase(unittest.TestCase):
|
|||
("endtag", "cdata"),
|
||||
])
|
||||
|
||||
def test_illegal_declarations(self):
|
||||
s = 'abc<!spacer type="block" height="25">def'
|
||||
self.check_events(s, [
|
||||
("data", "abc"),
|
||||
("unknown decl", 'spacer type="block" height="25"'),
|
||||
("data", "def"),
|
||||
])
|
||||
|
||||
# XXX These tests have been disabled by prefixing their names with
|
||||
# an underscore. The first two exercise outstanding bugs in the
|
||||
# sgmllib module, and the third exhibits questionable behavior
|
||||
|
@ -240,4 +306,9 @@ class SGMLParserTestCase(unittest.TestCase):
|
|||
self.check_parse_error("<a foo=>")
|
||||
|
||||
|
||||
test_support.run_unittest(SGMLParserTestCase)
|
||||
def test_main():
|
||||
test_support.run_unittest(SGMLParserTestCase)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_main()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue