mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
Adjust PullDOM to use a DOMImplementation instance to create new Document
objects; uses minidom if one is not provided to the constructor. parse(): Pick up the default_bufsize default value dynamically so that the value in the module may be (meaningfully) changed at runtime. This (partially) closes patch #102477.
This commit is contained in:
parent
adf5410dc4
commit
c16adce273
1 changed files with 37 additions and 26 deletions
|
@ -1,5 +1,5 @@
|
|||
import minidom
|
||||
import xml.sax,xml.sax.handler
|
||||
import xml.sax
|
||||
import xml.sax.handler
|
||||
|
||||
START_ELEMENT = "START_ELEMENT"
|
||||
END_ELEMENT = "END_ELEMENT"
|
||||
|
@ -11,23 +11,28 @@ IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
|
|||
CHARACTERS = "CHARACTERS"
|
||||
|
||||
class PullDOM(xml.sax.ContentHandler):
|
||||
def __init__(self):
|
||||
_locator = None
|
||||
document = None
|
||||
|
||||
def __init__(self, documentFactory=None):
|
||||
self.documentFactory = documentFactory
|
||||
self.firstEvent = [None, None]
|
||||
self.lastEvent = self.firstEvent
|
||||
self._ns_contexts = [{}] # contains uri -> prefix dicts
|
||||
self._current_context = self._ns_contexts[-1]
|
||||
|
||||
def setDocumentLocator(self, locator): pass
|
||||
def setDocumentLocator(self, locator):
|
||||
self._locator = locator
|
||||
|
||||
def startPrefixMapping(self, prefix, uri):
|
||||
self._ns_contexts.append(self._current_context.copy())
|
||||
self._current_context[uri] = prefix
|
||||
self._current_context[uri] = prefix or ''
|
||||
|
||||
def endPrefixMapping(self, prefix):
|
||||
del self._ns_contexts[-1]
|
||||
self._current_context = self._ns_contexts.pop()
|
||||
|
||||
def startElementNS(self, name, tagName , attrs):
|
||||
uri,localname = name
|
||||
uri, localname = name
|
||||
if uri:
|
||||
# When using namespaces, the reader may or may not
|
||||
# provide us with the original name. If not, create
|
||||
|
@ -50,8 +55,7 @@ class PullDOM(xml.sax.ContentHandler):
|
|||
attr.value = value
|
||||
node.setAttributeNode(attr)
|
||||
|
||||
parent = self.curNode
|
||||
node.parentNode = parent
|
||||
node.parentNode = self.curNode
|
||||
self.curNode = node
|
||||
|
||||
self.lastEvent[1] = [(START_ELEMENT, node), None]
|
||||
|
@ -63,7 +67,7 @@ class PullDOM(xml.sax.ContentHandler):
|
|||
self.lastEvent[1] = [(END_ELEMENT, node), None]
|
||||
self.lastEvent = self.lastEvent[1]
|
||||
#self.events.append((END_ELEMENT, node))
|
||||
self.curNode = node.parentNode
|
||||
self.curNode = self.curNode.parentNode
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
node = self.document.createElement(name)
|
||||
|
@ -73,8 +77,7 @@ class PullDOM(xml.sax.ContentHandler):
|
|||
attr.value = value
|
||||
node.setAttributeNode(attr)
|
||||
|
||||
parent = self.curNode
|
||||
node.parentNode = parent
|
||||
node.parentNode = self.curNode
|
||||
self.curNode = node
|
||||
|
||||
self.lastEvent[1] = [(START_ELEMENT, node), None]
|
||||
|
@ -106,7 +109,7 @@ class PullDOM(xml.sax.ContentHandler):
|
|||
#self.events.append((PROCESSING_INSTRUCTION, node))
|
||||
|
||||
def ignorableWhitespace(self, chars):
|
||||
node = self.document.createTextNode(chars[start:start + length])
|
||||
node = self.document.createTextNode(chars)
|
||||
parent = self.curNode
|
||||
node.parentNode = parent
|
||||
self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
|
||||
|
@ -121,20 +124,25 @@ class PullDOM(xml.sax.ContentHandler):
|
|||
self.lastEvent = self.lastEvent[1]
|
||||
|
||||
def startDocument(self):
|
||||
node = self.curNode = self.document = minidom.Document()
|
||||
node.parentNode = None
|
||||
publicId = systemId = None
|
||||
if self._locator:
|
||||
publicId = self._locator.getPublicId()
|
||||
systemId = self._locator.getSystemId()
|
||||
if self.documentFactory is None:
|
||||
import xml.dom.minidom
|
||||
self.documentFactory = xml.dom.minidom.Document.implementation
|
||||
node = self.documentFactory.createDocument(None, publicId, systemId)
|
||||
self.curNode = self.document = node
|
||||
self.lastEvent[1] = [(START_DOCUMENT, node), None]
|
||||
self.lastEvent = self.lastEvent[1]
|
||||
#self.events.append((START_DOCUMENT, node))
|
||||
|
||||
def endDocument(self):
|
||||
assert not self.curNode.parentNode
|
||||
for node in self.curNode.childNodes:
|
||||
if node.nodeType == node.ELEMENT_NODE:
|
||||
self.document.documentElement = node
|
||||
#if not self.document.documentElement:
|
||||
# raise Error, "No document element"
|
||||
|
||||
assert self.curNode.parentNode is None, \
|
||||
"not all elements have been properly closed"
|
||||
assert self.curNode.documentElement is not None, \
|
||||
"document does not contain a root element"
|
||||
node = self.curNode.documentElement
|
||||
self.lastEvent[1] = [(END_DOCUMENT, node), None]
|
||||
#self.events.append((END_DOCUMENT, self.curNode))
|
||||
|
||||
|
@ -156,7 +164,7 @@ class DOMEventStream:
|
|||
def reset(self):
|
||||
self.pulldom = PullDOM()
|
||||
# This content handler relies on namespace support
|
||||
self.parser.setFeature(xml.sax.handler.feature_namespaces,1)
|
||||
self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
|
||||
self.parser.setContentHandler(self.pulldom)
|
||||
|
||||
def __getitem__(self, pos):
|
||||
|
@ -179,7 +187,7 @@ class DOMEventStream:
|
|||
if not self.pulldom.firstEvent[1]:
|
||||
self.pulldom.lastEvent = self.pulldom.firstEvent
|
||||
while not self.pulldom.firstEvent[1]:
|
||||
buf=self.stream.read(self.bufsize)
|
||||
buf = self.stream.read(self.bufsize)
|
||||
if not buf:
|
||||
#FIXME: why doesn't Expat close work?
|
||||
#self.parser.close()
|
||||
|
@ -214,10 +222,13 @@ class SAX2DOM(PullDOM):
|
|||
node = self.lastEvent[0][1]
|
||||
node.parentNode.appendChild(node)
|
||||
|
||||
|
||||
default_bufsize = (2 ** 14) - 20
|
||||
|
||||
def parse(stream_or_string, parser=None, bufsize=default_bufsize):
|
||||
if type(stream_or_string) is type(""):
|
||||
def parse(stream_or_string, parser=None, bufsize=None):
|
||||
if bufsize is None:
|
||||
bufsize = default_bufsize
|
||||
if type(stream_or_string) in [type(""), type(u"")]:
|
||||
stream = open(stream_or_string)
|
||||
else:
|
||||
stream = stream_or_string
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue