Issue #17741: Add ElementTree.IncrementalParser, an event-driven parser for non-blocking applications.

This commit is contained in:
Antoine Pitrou 2013-04-18 19:37:06 +02:00
parent 323d2927f0
commit 5b235d0923
4 changed files with 272 additions and 109 deletions

View file

@ -903,6 +903,134 @@ class ElementTreeTest(unittest.TestCase):
self.assertEqual(serialized, expected)
class IncrementalParserTest(unittest.TestCase):
def _feed(self, parser, data, chunk_size=None):
if chunk_size is None:
parser.data_received(data)
else:
for i in range(0, len(data), chunk_size):
parser.data_received(data[i:i+chunk_size])
def assert_event_tags(self, parser, expected):
events = parser.events()
self.assertEqual([(action, elem.tag) for action, elem in events],
expected)
def test_simple_xml(self):
for chunk_size in (None, 1, 5):
with self.subTest(chunk_size=chunk_size):
parser = ET.IncrementalParser()
self.assert_event_tags(parser, [])
self._feed(parser, "<!-- comment -->\n", chunk_size)
self.assert_event_tags(parser, [])
self._feed(parser,
"<root>\n <element key='value'>text</element",
chunk_size)
self.assert_event_tags(parser, [])
self._feed(parser, ">\n", chunk_size)
self.assert_event_tags(parser, [('end', 'element')])
self._feed(parser, "<element>text</element>tail\n", chunk_size)
self._feed(parser, "<empty-element/>\n", chunk_size)
self.assert_event_tags(parser, [
('end', 'element'),
('end', 'empty-element'),
])
self._feed(parser, "</root>\n", chunk_size)
self.assert_event_tags(parser, [('end', 'root')])
# Receiving EOF sets the `root` attribute
self.assertIs(parser.root, None)
parser.eof_received()
self.assertEqual(parser.root.tag, 'root')
def test_data_received_while_iterating(self):
parser = ET.IncrementalParser()
it = parser.events()
self._feed(parser, "<root>\n <element key='value'>text</element>\n")
action, elem = next(it)
self.assertEqual((action, elem.tag), ('end', 'element'))
self._feed(parser, "</root>\n")
action, elem = next(it)
self.assertEqual((action, elem.tag), ('end', 'root'))
with self.assertRaises(StopIteration):
next(it)
def test_simple_xml_with_ns(self):
parser = ET.IncrementalParser()
self.assert_event_tags(parser, [])
self._feed(parser, "<!-- comment -->\n")
self.assert_event_tags(parser, [])
self._feed(parser, "<root xmlns='namespace'>\n")
self.assert_event_tags(parser, [])
self._feed(parser, "<element key='value'>text</element")
self.assert_event_tags(parser, [])
self._feed(parser, ">\n")
self.assert_event_tags(parser, [('end', '{namespace}element')])
self._feed(parser, "<element>text</element>tail\n")
self._feed(parser, "<empty-element/>\n")
self.assert_event_tags(parser, [
('end', '{namespace}element'),
('end', '{namespace}empty-element'),
])
self._feed(parser, "</root>\n")
self.assert_event_tags(parser, [('end', '{namespace}root')])
# Receiving EOF sets the `root` attribute
self.assertIs(parser.root, None)
parser.eof_received()
self.assertEqual(parser.root.tag, '{namespace}root')
def test_events(self):
parser = ET.IncrementalParser(events=())
self._feed(parser, "<root/>\n")
self.assert_event_tags(parser, [])
parser = ET.IncrementalParser(events=('start', 'end'))
self._feed(parser, "<!-- comment -->\n")
self.assert_event_tags(parser, [])
self._feed(parser, "<root>\n")
self.assert_event_tags(parser, [('start', 'root')])
self._feed(parser, "<element key='value'>text</element")
self.assert_event_tags(parser, [('start', 'element')])
self._feed(parser, ">\n")
self.assert_event_tags(parser, [('end', 'element')])
self._feed(parser,
"<element xmlns='foo'>text<empty-element/></element>tail\n")
self.assert_event_tags(parser, [
('start', '{foo}element'),
('start', '{foo}empty-element'),
('end', '{foo}empty-element'),
('end', '{foo}element'),
])
self._feed(parser, "</root>")
parser.eof_received()
self.assertIs(parser.root, None)
self.assert_event_tags(parser, [('end', 'root')])
self.assertEqual(parser.root.tag, 'root')
parser = ET.IncrementalParser(events=('start',))
self._feed(parser, "<!-- comment -->\n")
self.assert_event_tags(parser, [])
self._feed(parser, "<root>\n")
self.assert_event_tags(parser, [('start', 'root')])
self._feed(parser, "<element key='value'>text</element")
self.assert_event_tags(parser, [('start', 'element')])
self._feed(parser, ">\n")
self.assert_event_tags(parser, [])
self._feed(parser,
"<element xmlns='foo'>text<empty-element/></element>tail\n")
self.assert_event_tags(parser, [
('start', '{foo}element'),
('start', '{foo}empty-element'),
])
self._feed(parser, "</root>")
parser.eof_received()
self.assertEqual(parser.root.tag, 'root')
def test_unknown_event(self):
with self.assertRaises(ValueError):
ET.IncrementalParser(events=('start', 'end', 'bogus'))
#
# xinclude tests (samples from appendix C of the xinclude specification)
@ -1406,6 +1534,7 @@ class BugsTest(unittest.TestCase):
ET.register_namespace('test10777', 'http://myuri/')
ET.register_namespace('test10777', 'http://myuri/')
# --------------------------------------------------------------------
@ -2301,6 +2430,7 @@ def test_main(module=None):
ElementSlicingTest,
BasicElementTest,
ElementTreeTest,
IncrementalParserTest,
IOTest,
ParseErrorTest,
XIncludeTest,