mirror of
https://github.com/python/cpython.git
synced 2025-07-19 09:15:34 +00:00

svn+ssh://pythondev@svn.python.org/python/trunk ........ r73004 | jeffrey.yasskin | 2009-05-28 22:44:31 -0500 (Thu, 28 May 2009) | 5 lines Fix nearly all compilation warnings under Apple gcc-4.0. Tested with OPT="-g -Wall -Wstrict-prototypes -Werror" in both --with-pydebug mode and --without. There's still a batch of non-prototype warnings in Xlib.h that I don't know how to fix. ........ r73439 | benjamin.peterson | 2009-06-15 19:29:31 -0500 (Mon, 15 Jun 2009) | 1 line don't mask encoding errors when decoding a string #6289 ........ r73496 | vinay.sajip | 2009-06-21 12:37:27 -0500 (Sun, 21 Jun 2009) | 1 line Issue #6314: logging.basicConfig() performs extra checks on the "level" argument. ........ r73509 | amaury.forgeotdarc | 2009-06-22 14:33:48 -0500 (Mon, 22 Jun 2009) | 2 lines #4490 Fix sample code run by "python -m xml.sax.xmlreader" ........ r73529 | r.david.murray | 2009-06-23 13:02:46 -0500 (Tue, 23 Jun 2009) | 4 lines Fix issue 5230 by having pydoc's safeimport check to see if the import error was thrown from itself in order to decide if the module can't be found. Thanks to Lucas Prado Melo for collaborating on the fix and tests. ........ r73564 | amaury.forgeotdarc | 2009-06-25 17:29:29 -0500 (Thu, 25 Jun 2009) | 6 lines #2016 Fix a crash in function call when the **kwargs dictionary is mutated during the function call setup. This even gives a slight speedup, probably because tuple allocation is faster than PyMem_NEW. ........ r73576 | benjamin.peterson | 2009-06-26 18:37:06 -0500 (Fri, 26 Jun 2009) | 1 line document is_declared_global() ........ r73577 | benjamin.peterson | 2009-06-27 09:16:23 -0500 (Sat, 27 Jun 2009) | 1 line link to extensive generator docs in the reference manual ........ r73595 | ezio.melotti | 2009-06-27 18:45:39 -0500 (Sat, 27 Jun 2009) | 1 line stmt and setup can contain multiple statements, see #5896 ........ r73596 | ezio.melotti | 2009-06-27 19:07:45 -0500 (Sat, 27 Jun 2009) | 1 line Fixed a wrong apostrophe ........ r73605 | georg.brandl | 2009-06-28 07:10:18 -0500 (Sun, 28 Jun 2009) | 1 line Remove stray pychecker directive. ........
414 lines
14 KiB
Python
414 lines
14 KiB
Python
"""
|
|
SAX driver for the pyexpat C module. This driver works with
|
|
pyexpat.__version__ == '2.22'.
|
|
"""
|
|
|
|
version = "0.20"
|
|
|
|
from xml.sax._exceptions import *
|
|
from xml.sax.handler import feature_validation, feature_namespaces
|
|
from xml.sax.handler import feature_namespace_prefixes
|
|
from xml.sax.handler import feature_external_ges, feature_external_pes
|
|
from xml.sax.handler import feature_string_interning
|
|
from xml.sax.handler import property_xml_string, property_interning_dict
|
|
|
|
# xml.parsers.expat does not raise ImportError in Jython
|
|
import sys
|
|
if sys.platform[:4] == "java":
|
|
raise SAXReaderNotAvailable("expat not available in Java", None)
|
|
del sys
|
|
|
|
try:
|
|
from xml.parsers import expat
|
|
except ImportError:
|
|
raise SAXReaderNotAvailable("expat not supported", None)
|
|
else:
|
|
if not hasattr(expat, "ParserCreate"):
|
|
raise SAXReaderNotAvailable("expat not supported", None)
|
|
from xml.sax import xmlreader, saxutils, handler
|
|
|
|
AttributesImpl = xmlreader.AttributesImpl
|
|
AttributesNSImpl = xmlreader.AttributesNSImpl
|
|
|
|
# If we're using a sufficiently recent version of Python, we can use
|
|
# weak references to avoid cycles between the parser and content
|
|
# handler, otherwise we'll just have to pretend.
|
|
try:
|
|
import _weakref
|
|
except ImportError:
|
|
def _mkproxy(o):
|
|
return o
|
|
else:
|
|
import weakref
|
|
_mkproxy = weakref.proxy
|
|
del weakref, _weakref
|
|
|
|
# --- ExpatLocator
|
|
|
|
class ExpatLocator(xmlreader.Locator):
|
|
"""Locator for use with the ExpatParser class.
|
|
|
|
This uses a weak reference to the parser object to avoid creating
|
|
a circular reference between the parser and the content handler.
|
|
"""
|
|
def __init__(self, parser):
|
|
self._ref = _mkproxy(parser)
|
|
|
|
def getColumnNumber(self):
|
|
parser = self._ref
|
|
if parser._parser is None:
|
|
return None
|
|
return parser._parser.ErrorColumnNumber
|
|
|
|
def getLineNumber(self):
|
|
parser = self._ref
|
|
if parser._parser is None:
|
|
return 1
|
|
return parser._parser.ErrorLineNumber
|
|
|
|
def getPublicId(self):
|
|
parser = self._ref
|
|
if parser is None:
|
|
return None
|
|
return parser._source.getPublicId()
|
|
|
|
def getSystemId(self):
|
|
parser = self._ref
|
|
if parser is None:
|
|
return None
|
|
return parser._source.getSystemId()
|
|
|
|
|
|
# --- ExpatParser
|
|
|
|
class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
|
|
"""SAX driver for the pyexpat C module."""
|
|
|
|
def __init__(self, namespaceHandling=0, bufsize=2**16-20):
|
|
xmlreader.IncrementalParser.__init__(self, bufsize)
|
|
self._source = xmlreader.InputSource()
|
|
self._parser = None
|
|
self._namespaces = namespaceHandling
|
|
self._lex_handler_prop = None
|
|
self._parsing = 0
|
|
self._entity_stack = []
|
|
self._external_ges = 1
|
|
self._interning = None
|
|
|
|
# XMLReader methods
|
|
|
|
def parse(self, source):
|
|
"Parse an XML document from a URL or an InputSource."
|
|
source = saxutils.prepare_input_source(source)
|
|
|
|
self._source = source
|
|
self.reset()
|
|
self._cont_handler.setDocumentLocator(ExpatLocator(self))
|
|
xmlreader.IncrementalParser.parse(self, source)
|
|
|
|
def prepareParser(self, source):
|
|
if source.getSystemId() is not None:
|
|
self._parser.SetBase(source.getSystemId())
|
|
|
|
# Redefined setContentHandler to allow changing handlers during parsing
|
|
|
|
def setContentHandler(self, handler):
|
|
xmlreader.IncrementalParser.setContentHandler(self, handler)
|
|
if self._parsing:
|
|
self._reset_cont_handler()
|
|
|
|
def getFeature(self, name):
|
|
if name == feature_namespaces:
|
|
return self._namespaces
|
|
elif name == feature_string_interning:
|
|
return self._interning is not None
|
|
elif name in (feature_validation, feature_external_pes,
|
|
feature_namespace_prefixes):
|
|
return 0
|
|
elif name == feature_external_ges:
|
|
return self._external_ges
|
|
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
|
|
|
|
def setFeature(self, name, state):
|
|
if self._parsing:
|
|
raise SAXNotSupportedException("Cannot set features while parsing")
|
|
|
|
if name == feature_namespaces:
|
|
self._namespaces = state
|
|
elif name == feature_external_ges:
|
|
self._external_ges = state
|
|
elif name == feature_string_interning:
|
|
if state:
|
|
if self._interning is None:
|
|
self._interning = {}
|
|
else:
|
|
self._interning = None
|
|
elif name == feature_validation:
|
|
if state:
|
|
raise SAXNotSupportedException(
|
|
"expat does not support validation")
|
|
elif name == feature_external_pes:
|
|
if state:
|
|
raise SAXNotSupportedException(
|
|
"expat does not read external parameter entities")
|
|
elif name == feature_namespace_prefixes:
|
|
if state:
|
|
raise SAXNotSupportedException(
|
|
"expat does not report namespace prefixes")
|
|
else:
|
|
raise SAXNotRecognizedException(
|
|
"Feature '%s' not recognized" % name)
|
|
|
|
def getProperty(self, name):
|
|
if name == handler.property_lexical_handler:
|
|
return self._lex_handler_prop
|
|
elif name == property_interning_dict:
|
|
return self._interning
|
|
elif name == property_xml_string:
|
|
if self._parser:
|
|
if hasattr(self._parser, "GetInputContext"):
|
|
return self._parser.GetInputContext()
|
|
else:
|
|
raise SAXNotRecognizedException(
|
|
"This version of expat does not support getting"
|
|
" the XML string")
|
|
else:
|
|
raise SAXNotSupportedException(
|
|
"XML string cannot be returned when not parsing")
|
|
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
|
|
|
def setProperty(self, name, value):
|
|
if name == handler.property_lexical_handler:
|
|
self._lex_handler_prop = value
|
|
if self._parsing:
|
|
self._reset_lex_handler_prop()
|
|
elif name == property_interning_dict:
|
|
self._interning = value
|
|
elif name == property_xml_string:
|
|
raise SAXNotSupportedException("Property '%s' cannot be set" %
|
|
name)
|
|
else:
|
|
raise SAXNotRecognizedException("Property '%s' not recognized" %
|
|
name)
|
|
|
|
# IncrementalParser methods
|
|
|
|
def feed(self, data, isFinal = 0):
|
|
if not self._parsing:
|
|
self.reset()
|
|
self._parsing = 1
|
|
self._cont_handler.startDocument()
|
|
|
|
try:
|
|
# The isFinal parameter is internal to the expat reader.
|
|
# If it is set to true, expat will check validity of the entire
|
|
# document. When feeding chunks, they are not normally final -
|
|
# except when invoked from close.
|
|
self._parser.Parse(data, isFinal)
|
|
except expat.error as e:
|
|
exc = SAXParseException(expat.ErrorString(e.code), e, self)
|
|
# FIXME: when to invoke error()?
|
|
self._err_handler.fatalError(exc)
|
|
|
|
def close(self):
|
|
if self._entity_stack:
|
|
# If we are completing an external entity, do nothing here
|
|
return
|
|
self.feed("", isFinal = 1)
|
|
self._cont_handler.endDocument()
|
|
self._parsing = 0
|
|
# break cycle created by expat handlers pointing to our methods
|
|
self._parser = None
|
|
|
|
def _reset_cont_handler(self):
|
|
self._parser.ProcessingInstructionHandler = \
|
|
self._cont_handler.processingInstruction
|
|
self._parser.CharacterDataHandler = self._cont_handler.characters
|
|
|
|
def _reset_lex_handler_prop(self):
|
|
lex = self._lex_handler_prop
|
|
parser = self._parser
|
|
if lex is None:
|
|
parser.CommentHandler = None
|
|
parser.StartCdataSectionHandler = None
|
|
parser.EndCdataSectionHandler = None
|
|
parser.StartDoctypeDeclHandler = None
|
|
parser.EndDoctypeDeclHandler = None
|
|
else:
|
|
parser.CommentHandler = lex.comment
|
|
parser.StartCdataSectionHandler = lex.startCDATA
|
|
parser.EndCdataSectionHandler = lex.endCDATA
|
|
parser.StartDoctypeDeclHandler = self.start_doctype_decl
|
|
parser.EndDoctypeDeclHandler = lex.endDTD
|
|
|
|
def reset(self):
|
|
if self._namespaces:
|
|
self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
|
|
intern=self._interning)
|
|
self._parser.namespace_prefixes = 1
|
|
self._parser.StartElementHandler = self.start_element_ns
|
|
self._parser.EndElementHandler = self.end_element_ns
|
|
else:
|
|
self._parser = expat.ParserCreate(self._source.getEncoding(),
|
|
intern = self._interning)
|
|
self._parser.StartElementHandler = self.start_element
|
|
self._parser.EndElementHandler = self.end_element
|
|
|
|
self._reset_cont_handler()
|
|
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
|
|
self._parser.NotationDeclHandler = self.notation_decl
|
|
self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
|
|
self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
|
|
|
|
self._decl_handler_prop = None
|
|
if self._lex_handler_prop:
|
|
self._reset_lex_handler_prop()
|
|
# self._parser.DefaultHandler =
|
|
# self._parser.DefaultHandlerExpand =
|
|
# self._parser.NotStandaloneHandler =
|
|
self._parser.ExternalEntityRefHandler = self.external_entity_ref
|
|
try:
|
|
self._parser.SkippedEntityHandler = self.skipped_entity_handler
|
|
except AttributeError:
|
|
# This pyexpat does not support SkippedEntity
|
|
pass
|
|
self._parser.SetParamEntityParsing(
|
|
expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
|
|
|
|
self._parsing = 0
|
|
self._entity_stack = []
|
|
|
|
# Locator methods
|
|
|
|
def getColumnNumber(self):
|
|
if self._parser is None:
|
|
return None
|
|
return self._parser.ErrorColumnNumber
|
|
|
|
def getLineNumber(self):
|
|
if self._parser is None:
|
|
return 1
|
|
return self._parser.ErrorLineNumber
|
|
|
|
def getPublicId(self):
|
|
return self._source.getPublicId()
|
|
|
|
def getSystemId(self):
|
|
return self._source.getSystemId()
|
|
|
|
# event handlers
|
|
def start_element(self, name, attrs):
|
|
self._cont_handler.startElement(name, AttributesImpl(attrs))
|
|
|
|
def end_element(self, name):
|
|
self._cont_handler.endElement(name)
|
|
|
|
def start_element_ns(self, name, attrs):
|
|
pair = name.split()
|
|
if len(pair) == 1:
|
|
# no namespace
|
|
pair = (None, name)
|
|
elif len(pair) == 3:
|
|
pair = pair[0], pair[1]
|
|
else:
|
|
# default namespace
|
|
pair = tuple(pair)
|
|
|
|
newattrs = {}
|
|
qnames = {}
|
|
for (aname, value) in attrs.items():
|
|
parts = aname.split()
|
|
length = len(parts)
|
|
if length == 1:
|
|
# no namespace
|
|
qname = aname
|
|
apair = (None, aname)
|
|
elif length == 3:
|
|
qname = "%s:%s" % (parts[2], parts[1])
|
|
apair = parts[0], parts[1]
|
|
else:
|
|
# default namespace
|
|
qname = parts[1]
|
|
apair = tuple(parts)
|
|
|
|
newattrs[apair] = value
|
|
qnames[apair] = qname
|
|
|
|
self._cont_handler.startElementNS(pair, None,
|
|
AttributesNSImpl(newattrs, qnames))
|
|
|
|
def end_element_ns(self, name):
|
|
pair = name.split()
|
|
if len(pair) == 1:
|
|
pair = (None, name)
|
|
elif len(pair) == 3:
|
|
pair = pair[0], pair[1]
|
|
else:
|
|
pair = tuple(pair)
|
|
|
|
self._cont_handler.endElementNS(pair, None)
|
|
|
|
# this is not used (call directly to ContentHandler)
|
|
def processing_instruction(self, target, data):
|
|
self._cont_handler.processingInstruction(target, data)
|
|
|
|
# this is not used (call directly to ContentHandler)
|
|
def character_data(self, data):
|
|
self._cont_handler.characters(data)
|
|
|
|
def start_namespace_decl(self, prefix, uri):
|
|
self._cont_handler.startPrefixMapping(prefix, uri)
|
|
|
|
def end_namespace_decl(self, prefix):
|
|
self._cont_handler.endPrefixMapping(prefix)
|
|
|
|
def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
|
|
self._lex_handler_prop.startDTD(name, pubid, sysid)
|
|
|
|
def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
|
|
self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
|
|
|
|
def notation_decl(self, name, base, sysid, pubid):
|
|
self._dtd_handler.notationDecl(name, pubid, sysid)
|
|
|
|
def external_entity_ref(self, context, base, sysid, pubid):
|
|
if not self._external_ges:
|
|
return 1
|
|
|
|
source = self._ent_handler.resolveEntity(pubid, sysid)
|
|
source = saxutils.prepare_input_source(source,
|
|
self._source.getSystemId() or
|
|
"")
|
|
|
|
self._entity_stack.append((self._parser, self._source))
|
|
self._parser = self._parser.ExternalEntityParserCreate(context)
|
|
self._source = source
|
|
|
|
try:
|
|
xmlreader.IncrementalParser.parse(self, source)
|
|
except:
|
|
return 0 # FIXME: save error info here?
|
|
|
|
(self._parser, self._source) = self._entity_stack[-1]
|
|
del self._entity_stack[-1]
|
|
return 1
|
|
|
|
def skipped_entity_handler(self, name, is_pe):
|
|
if is_pe:
|
|
# The SAX spec requires to report skipped PEs with a '%'
|
|
name = '%'+name
|
|
self._cont_handler.skippedEntity(name)
|
|
|
|
# ---
|
|
|
|
def create_parser(*args, **kwargs):
|
|
return ExpatParser(*args, **kwargs)
|
|
|
|
# ---
|
|
|
|
if __name__ == "__main__":
|
|
import xml.sax.saxutils
|
|
p = create_parser()
|
|
p.setContentHandler(xml.sax.saxutils.XMLGenerator())
|
|
p.setErrorHandler(xml.sax.ErrorHandler())
|
|
p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml")
|