mirror of
https://github.com/python/cpython.git
synced 2025-08-04 17:08:35 +00:00
Issue #25638: Optimized ElementTree.iterparse(); it is now 2x faster.
ElementTree.XMLParser._setevents now accepts any objects with the append method, not just a list.
This commit is contained in:
parent
f2fdfe1f88
commit
9ec5e25f26
4 changed files with 56 additions and 80 deletions
|
@ -95,6 +95,7 @@ import sys
|
|||
import re
|
||||
import warnings
|
||||
import io
|
||||
import collections
|
||||
import contextlib
|
||||
|
||||
from . import ElementPath
|
||||
|
@ -1198,16 +1199,37 @@ def iterparse(source, events=None, parser=None):
|
|||
Returns an iterator providing (event, elem) pairs.
|
||||
|
||||
"""
|
||||
# Use the internal, undocumented _parser argument for now; When the
|
||||
# parser argument of iterparse is removed, this can be killed.
|
||||
pullparser = XMLPullParser(events=events, _parser=parser)
|
||||
def iterator():
|
||||
try:
|
||||
while True:
|
||||
yield from pullparser.read_events()
|
||||
# load event buffer
|
||||
data = source.read(16 * 1024)
|
||||
if not data:
|
||||
break
|
||||
pullparser.feed(data)
|
||||
root = pullparser._close_and_return_root()
|
||||
yield from pullparser.read_events()
|
||||
it.root = root
|
||||
finally:
|
||||
if close_source:
|
||||
source.close()
|
||||
|
||||
class IterParseIterator(collections.Iterator):
|
||||
__next__ = iterator().__next__
|
||||
it = IterParseIterator()
|
||||
it.root = None
|
||||
del iterator, IterParseIterator
|
||||
|
||||
close_source = False
|
||||
if not hasattr(source, "read"):
|
||||
source = open(source, "rb")
|
||||
close_source = True
|
||||
try:
|
||||
return _IterParseIterator(source, events, parser, close_source)
|
||||
except:
|
||||
if close_source:
|
||||
source.close()
|
||||
raise
|
||||
|
||||
return it
|
||||
|
||||
|
||||
class XMLPullParser:
|
||||
|
@ -1217,9 +1239,7 @@ class XMLPullParser:
|
|||
# upon in user code. It will be removed in a future release.
|
||||
# See http://bugs.python.org/issue17741 for more details.
|
||||
|
||||
# _elementtree.c expects a list, not a deque
|
||||
self._events_queue = []
|
||||
self._index = 0
|
||||
self._events_queue = collections.deque()
|
||||
self._parser = _parser or XMLParser(target=TreeBuilder())
|
||||
# wire up the parser for event reporting
|
||||
if events is None:
|
||||
|
@ -1257,64 +1277,14 @@ class XMLPullParser:
|
|||
retrieved from the iterator.
|
||||
"""
|
||||
events = self._events_queue
|
||||
while True:
|
||||
index = self._index
|
||||
try:
|
||||
event = events[self._index]
|
||||
# Avoid retaining references to past events
|
||||
events[self._index] = None
|
||||
except IndexError:
|
||||
break
|
||||
index += 1
|
||||
# Compact the list in a O(1) amortized fashion
|
||||
# As noted above, _elementree.c needs a list, not a deque
|
||||
if index * 2 >= len(events):
|
||||
events[:index] = []
|
||||
self._index = 0
|
||||
else:
|
||||
self._index = index
|
||||
while events:
|
||||
event = events.popleft()
|
||||
if isinstance(event, Exception):
|
||||
raise event
|
||||
else:
|
||||
yield event
|
||||
|
||||
|
||||
class _IterParseIterator:
|
||||
|
||||
def __init__(self, source, events, parser, close_source=False):
|
||||
# Use the internal, undocumented _parser argument for now; When the
|
||||
# parser argument of iterparse is removed, this can be killed.
|
||||
self._parser = XMLPullParser(events=events, _parser=parser)
|
||||
self._file = source
|
||||
self._close_file = close_source
|
||||
self.root = self._root = None
|
||||
|
||||
def __next__(self):
|
||||
try:
|
||||
while 1:
|
||||
for event in self._parser.read_events():
|
||||
return event
|
||||
if self._parser._parser is None:
|
||||
break
|
||||
# load event buffer
|
||||
data = self._file.read(16 * 1024)
|
||||
if data:
|
||||
self._parser.feed(data)
|
||||
else:
|
||||
self._root = self._parser._close_and_return_root()
|
||||
self.root = self._root
|
||||
except:
|
||||
if self._close_file:
|
||||
self._file.close()
|
||||
raise
|
||||
if self._close_file:
|
||||
self._file.close()
|
||||
raise StopIteration
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
|
||||
def XML(text, parser=None):
|
||||
"""Parse XML document from string constant.
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue