mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 11:49:12 +00:00 
			
		
		
		
	There's one major and one minor category still unfixed: doctests are the major category (and I hope to be able to augment the refactoring tool to refactor bona fide doctests soon); other code generating print statements in strings is the minor category. (Oh, and I don't know if the compiler package works.)
		
			
				
	
	
		
			342 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			342 lines
		
	
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
"""
 | 
						|
This module contains the core classes of version 2.0 of SAX for Python.
 | 
						|
This file provides only default classes with absolutely minimum
 | 
						|
functionality, from which drivers and applications can be subclassed.
 | 
						|
 | 
						|
Many of these classes are empty and are included only as documentation
 | 
						|
of the interfaces.
 | 
						|
 | 
						|
$Id$
 | 
						|
"""
 | 
						|
 | 
						|
version = '2.0beta'
 | 
						|
 | 
						|
#============================================================================
 | 
						|
#
 | 
						|
# HANDLER INTERFACES
 | 
						|
#
 | 
						|
#============================================================================
 | 
						|
 | 
						|
# ===== ERRORHANDLER =====
 | 
						|
 | 
						|
class ErrorHandler:
 | 
						|
    """Basic interface for SAX error handlers.
 | 
						|
 | 
						|
    If you create an object that implements this interface, then
 | 
						|
    register the object with your XMLReader, the parser will call the
 | 
						|
    methods in your object to report all warnings and errors. There
 | 
						|
    are three levels of errors available: warnings, (possibly)
 | 
						|
    recoverable errors, and unrecoverable errors. All methods take a
 | 
						|
    SAXParseException as the only parameter."""
 | 
						|
 | 
						|
    def error(self, exception):
 | 
						|
        "Handle a recoverable error."
 | 
						|
        raise exception
 | 
						|
 | 
						|
    def fatalError(self, exception):
 | 
						|
        "Handle a non-recoverable error."
 | 
						|
        raise exception
 | 
						|
 | 
						|
    def warning(self, exception):
 | 
						|
        "Handle a warning."
 | 
						|
        print(exception)
 | 
						|
 | 
						|
 | 
						|
# ===== CONTENTHANDLER =====
 | 
						|
 | 
						|
class ContentHandler:
 | 
						|
    """Interface for receiving logical document content events.
 | 
						|
 | 
						|
    This is the main callback interface in SAX, and the one most
 | 
						|
    important to applications. The order of events in this interface
 | 
						|
    mirrors the order of the information in the document."""
 | 
						|
 | 
						|
    def __init__(self):
 | 
						|
        self._locator = None
 | 
						|
 | 
						|
    def setDocumentLocator(self, locator):
 | 
						|
        """Called by the parser to give the application a locator for
 | 
						|
        locating the origin of document events.
 | 
						|
 | 
						|
        SAX parsers are strongly encouraged (though not absolutely
 | 
						|
        required) to supply a locator: if it does so, it must supply
 | 
						|
        the locator to the application by invoking this method before
 | 
						|
        invoking any of the other methods in the DocumentHandler
 | 
						|
        interface.
 | 
						|
 | 
						|
        The locator allows the application to determine the end
 | 
						|
        position of any document-related event, even if the parser is
 | 
						|
        not reporting an error. Typically, the application will use
 | 
						|
        this information for reporting its own errors (such as
 | 
						|
        character content that does not match an application's
 | 
						|
        business rules). The information returned by the locator is
 | 
						|
        probably not sufficient for use with a search engine.
 | 
						|
 | 
						|
        Note that the locator will return correct information only
 | 
						|
        during the invocation of the events in this interface. The
 | 
						|
        application should not attempt to use it at any other time."""
 | 
						|
        self._locator = locator
 | 
						|
 | 
						|
    def startDocument(self):
 | 
						|
        """Receive notification of the beginning of a document.
 | 
						|
 | 
						|
        The SAX parser will invoke this method only once, before any
 | 
						|
        other methods in this interface or in DTDHandler (except for
 | 
						|
        setDocumentLocator)."""
 | 
						|
 | 
						|
    def endDocument(self):
 | 
						|
        """Receive notification of the end of a document.
 | 
						|
 | 
						|
        The SAX parser will invoke this method only once, and it will
 | 
						|
        be the last method invoked during the parse. The parser shall
 | 
						|
        not invoke this method until it has either abandoned parsing
 | 
						|
        (because of an unrecoverable error) or reached the end of
 | 
						|
        input."""
 | 
						|
 | 
						|
    def startPrefixMapping(self, prefix, uri):
 | 
						|
        """Begin the scope of a prefix-URI Namespace mapping.
 | 
						|
 | 
						|
        The information from this event is not necessary for normal
 | 
						|
        Namespace processing: the SAX XML reader will automatically
 | 
						|
        replace prefixes for element and attribute names when the
 | 
						|
        http://xml.org/sax/features/namespaces feature is true (the
 | 
						|
        default).
 | 
						|
 | 
						|
        There are cases, however, when applications need to use
 | 
						|
        prefixes in character data or in attribute values, where they
 | 
						|
        cannot safely be expanded automatically; the
 | 
						|
        start/endPrefixMapping event supplies the information to the
 | 
						|
        application to expand prefixes in those contexts itself, if
 | 
						|
        necessary.
 | 
						|
 | 
						|
        Note that start/endPrefixMapping events are not guaranteed to
 | 
						|
        be properly nested relative to each-other: all
 | 
						|
        startPrefixMapping events will occur before the corresponding
 | 
						|
        startElement event, and all endPrefixMapping events will occur
 | 
						|
        after the corresponding endElement event, but their order is
 | 
						|
        not guaranteed."""
 | 
						|
 | 
						|
    def endPrefixMapping(self, prefix):
 | 
						|
        """End the scope of a prefix-URI mapping.
 | 
						|
 | 
						|
        See startPrefixMapping for details. This event will always
 | 
						|
        occur after the corresponding endElement event, but the order
 | 
						|
        of endPrefixMapping events is not otherwise guaranteed."""
 | 
						|
 | 
						|
    def startElement(self, name, attrs):
 | 
						|
        """Signals the start of an element in non-namespace mode.
 | 
						|
 | 
						|
        The name parameter contains the raw XML 1.0 name of the
 | 
						|
        element type as a string and the attrs parameter holds an
 | 
						|
        instance of the Attributes class containing the attributes of
 | 
						|
        the element."""
 | 
						|
 | 
						|
    def endElement(self, name):
 | 
						|
        """Signals the end of an element in non-namespace mode.
 | 
						|
 | 
						|
        The name parameter contains the name of the element type, just
 | 
						|
        as with the startElement event."""
 | 
						|
 | 
						|
    def startElementNS(self, name, qname, attrs):
 | 
						|
        """Signals the start of an element in namespace mode.
 | 
						|
 | 
						|
        The name parameter contains the name of the element type as a
 | 
						|
        (uri, localname) tuple, the qname parameter the raw XML 1.0
 | 
						|
        name used in the source document, and the attrs parameter
 | 
						|
        holds an instance of the Attributes class containing the
 | 
						|
        attributes of the element.
 | 
						|
 | 
						|
        The uri part of the name tuple is None for elements which have
 | 
						|
        no namespace."""
 | 
						|
 | 
						|
    def endElementNS(self, name, qname):
 | 
						|
        """Signals the end of an element in namespace mode.
 | 
						|
 | 
						|
        The name parameter contains the name of the element type, just
 | 
						|
        as with the startElementNS event."""
 | 
						|
 | 
						|
    def characters(self, content):
 | 
						|
        """Receive notification of character data.
 | 
						|
 | 
						|
        The Parser will call this method to report each chunk of
 | 
						|
        character data. SAX parsers may return all contiguous
 | 
						|
        character data in a single chunk, or they may split it into
 | 
						|
        several chunks; however, all of the characters in any single
 | 
						|
        event must come from the same external entity so that the
 | 
						|
        Locator provides useful information."""
 | 
						|
 | 
						|
    def ignorableWhitespace(self, whitespace):
 | 
						|
        """Receive notification of ignorable whitespace in element content.
 | 
						|
 | 
						|
        Validating Parsers must use this method to report each chunk
 | 
						|
        of ignorable whitespace (see the W3C XML 1.0 recommendation,
 | 
						|
        section 2.10): non-validating parsers may also use this method
 | 
						|
        if they are capable of parsing and using content models.
 | 
						|
 | 
						|
        SAX parsers may return all contiguous whitespace in a single
 | 
						|
        chunk, or they may split it into several chunks; however, all
 | 
						|
        of the characters in any single event must come from the same
 | 
						|
        external entity, so that the Locator provides useful
 | 
						|
        information."""
 | 
						|
 | 
						|
    def processingInstruction(self, target, data):
 | 
						|
        """Receive notification of a processing instruction.
 | 
						|
 | 
						|
        The Parser will invoke this method once for each processing
 | 
						|
        instruction found: note that processing instructions may occur
 | 
						|
        before or after the main document element.
 | 
						|
 | 
						|
        A SAX parser should never report an XML declaration (XML 1.0,
 | 
						|
        section 2.8) or a text declaration (XML 1.0, section 4.3.1)
 | 
						|
        using this method."""
 | 
						|
 | 
						|
    def skippedEntity(self, name):
 | 
						|
        """Receive notification of a skipped entity.
 | 
						|
 | 
						|
        The Parser will invoke this method once for each entity
 | 
						|
        skipped. Non-validating processors may skip entities if they
 | 
						|
        have not seen the declarations (because, for example, the
 | 
						|
        entity was declared in an external DTD subset). All processors
 | 
						|
        may skip external entities, depending on the values of the
 | 
						|
        http://xml.org/sax/features/external-general-entities and the
 | 
						|
        http://xml.org/sax/features/external-parameter-entities
 | 
						|
        properties."""
 | 
						|
 | 
						|
 | 
						|
# ===== DTDHandler =====
 | 
						|
 | 
						|
class DTDHandler:
 | 
						|
    """Handle DTD events.
 | 
						|
 | 
						|
    This interface specifies only those DTD events required for basic
 | 
						|
    parsing (unparsed entities and attributes)."""
 | 
						|
 | 
						|
    def notationDecl(self, name, publicId, systemId):
 | 
						|
        "Handle a notation declaration event."
 | 
						|
 | 
						|
    def unparsedEntityDecl(self, name, publicId, systemId, ndata):
 | 
						|
        "Handle an unparsed entity declaration event."
 | 
						|
 | 
						|
 | 
						|
# ===== ENTITYRESOLVER =====
 | 
						|
 | 
						|
class EntityResolver:
 | 
						|
    """Basic interface for resolving entities. If you create an object
 | 
						|
    implementing this interface, then register the object with your
 | 
						|
    Parser, the parser will call the method in your object to
 | 
						|
    resolve all external entities. Note that DefaultHandler implements
 | 
						|
    this interface with the default behaviour."""
 | 
						|
 | 
						|
    def resolveEntity(self, publicId, systemId):
 | 
						|
        """Resolve the system identifier of an entity and return either
 | 
						|
        the system identifier to read from as a string, or an InputSource
 | 
						|
        to read from."""
 | 
						|
        return systemId
 | 
						|
 | 
						|
 | 
						|
#============================================================================
 | 
						|
#
 | 
						|
# CORE FEATURES
 | 
						|
#
 | 
						|
#============================================================================
 | 
						|
 | 
						|
feature_namespaces = "http://xml.org/sax/features/namespaces"
 | 
						|
# true: Perform Namespace processing (default).
 | 
						|
# false: Optionally do not perform Namespace processing
 | 
						|
#        (implies namespace-prefixes).
 | 
						|
# access: (parsing) read-only; (not parsing) read/write
 | 
						|
 | 
						|
feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes"
 | 
						|
# true: Report the original prefixed names and attributes used for Namespace
 | 
						|
#       declarations.
 | 
						|
# false: Do not report attributes used for Namespace declarations, and
 | 
						|
#        optionally do not report original prefixed names (default).
 | 
						|
# access: (parsing) read-only; (not parsing) read/write
 | 
						|
 | 
						|
feature_string_interning = "http://xml.org/sax/features/string-interning"
 | 
						|
# true: All element names, prefixes, attribute names, Namespace URIs, and
 | 
						|
#       local names are interned using the built-in intern function.
 | 
						|
# false: Names are not necessarily interned, although they may be (default).
 | 
						|
# access: (parsing) read-only; (not parsing) read/write
 | 
						|
 | 
						|
feature_validation = "http://xml.org/sax/features/validation"
 | 
						|
# true: Report all validation errors (implies external-general-entities and
 | 
						|
#       external-parameter-entities).
 | 
						|
# false: Do not report validation errors.
 | 
						|
# access: (parsing) read-only; (not parsing) read/write
 | 
						|
 | 
						|
feature_external_ges = "http://xml.org/sax/features/external-general-entities"
 | 
						|
# true: Include all external general (text) entities.
 | 
						|
# false: Do not include external general entities.
 | 
						|
# access: (parsing) read-only; (not parsing) read/write
 | 
						|
 | 
						|
feature_external_pes = "http://xml.org/sax/features/external-parameter-entities"
 | 
						|
# true: Include all external parameter entities, including the external
 | 
						|
#       DTD subset.
 | 
						|
# false: Do not include any external parameter entities, even the external
 | 
						|
#        DTD subset.
 | 
						|
# access: (parsing) read-only; (not parsing) read/write
 | 
						|
 | 
						|
all_features = [feature_namespaces,
 | 
						|
                feature_namespace_prefixes,
 | 
						|
                feature_string_interning,
 | 
						|
                feature_validation,
 | 
						|
                feature_external_ges,
 | 
						|
                feature_external_pes]
 | 
						|
 | 
						|
 | 
						|
#============================================================================
 | 
						|
#
 | 
						|
# CORE PROPERTIES
 | 
						|
#
 | 
						|
#============================================================================
 | 
						|
 | 
						|
property_lexical_handler = "http://xml.org/sax/properties/lexical-handler"
 | 
						|
# data type: xml.sax.sax2lib.LexicalHandler
 | 
						|
# description: An optional extension handler for lexical events like comments.
 | 
						|
# access: read/write
 | 
						|
 | 
						|
property_declaration_handler = "http://xml.org/sax/properties/declaration-handler"
 | 
						|
# data type: xml.sax.sax2lib.DeclHandler
 | 
						|
# description: An optional extension handler for DTD-related events other
 | 
						|
#              than notations and unparsed entities.
 | 
						|
# access: read/write
 | 
						|
 | 
						|
property_dom_node = "http://xml.org/sax/properties/dom-node"
 | 
						|
# data type: org.w3c.dom.Node
 | 
						|
# description: When parsing, the current DOM node being visited if this is
 | 
						|
#              a DOM iterator; when not parsing, the root DOM node for
 | 
						|
#              iteration.
 | 
						|
# access: (parsing) read-only; (not parsing) read/write
 | 
						|
 | 
						|
property_xml_string = "http://xml.org/sax/properties/xml-string"
 | 
						|
# data type: String
 | 
						|
# description: The literal string of characters that was the source for
 | 
						|
#              the current event.
 | 
						|
# access: read-only
 | 
						|
 | 
						|
property_encoding = "http://www.python.org/sax/properties/encoding"
 | 
						|
# data type: String
 | 
						|
# description: The name of the encoding to assume for input data.
 | 
						|
# access: write: set the encoding, e.g. established by a higher-level
 | 
						|
#                protocol. May change during parsing (e.g. after
 | 
						|
#                processing a META tag)
 | 
						|
#         read:  return the current encoding (possibly established through
 | 
						|
#                auto-detection.
 | 
						|
# initial value: UTF-8
 | 
						|
#
 | 
						|
 | 
						|
property_interning_dict = "http://www.python.org/sax/properties/interning-dict"
 | 
						|
# data type: Dictionary
 | 
						|
# description: The dictionary used to intern common strings in the document
 | 
						|
# access: write: Request that the parser uses a specific dictionary, to
 | 
						|
#                allow interning across different documents
 | 
						|
#         read:  return the current interning dictionary, or None
 | 
						|
#
 | 
						|
 | 
						|
all_properties = [property_lexical_handler,
 | 
						|
                  property_dom_node,
 | 
						|
                  property_declaration_handler,
 | 
						|
                  property_xml_string,
 | 
						|
                  property_encoding,
 | 
						|
                  property_interning_dict]
 |