mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 19:34:08 +00:00 
			
		
		
		
	in the running text.
For computed attribute and method names (where there's a \var{} part to
the name), use the non-indexing forms of \datadesc{} and \funcdesc{}.
This doesn't change the printed output, but removes 3 rejections from the
makeindex run and allows the LaTeX2HTML support to exclude these from the
index.
		
	
			
		
			
				
	
	
		
			221 lines
		
	
	
	
		
			9.3 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			221 lines
		
	
	
	
		
			9.3 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
\section{Standard Module \sectcode{xmllib}}
 | 
						|
% Author: Sjoerd Mullender
 | 
						|
\label{module-xmllib}
 | 
						|
\stmodindex{xmllib}
 | 
						|
\index{XML}
 | 
						|
 | 
						|
This module defines a class \code{XMLParser} which serves as the basis 
 | 
						|
for parsing text files formatted in XML (eXtended Markup Language).
 | 
						|
 | 
						|
The \code{XMLParser} class must be instantiated without arguments.  It 
 | 
						|
has the following interface methods:
 | 
						|
 | 
						|
\setindexsubitem{(XMLParser method)}
 | 
						|
 | 
						|
\begin{funcdesc}{reset}{}
 | 
						|
Reset the instance.  Loses all unprocessed data.  This is called
 | 
						|
implicitly at the instantiation time.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{setnomoretags}{}
 | 
						|
Stop processing tags.  Treat all following input as literal input
 | 
						|
(CDATA).
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{setliteral}{}
 | 
						|
Enter literal mode (CDATA mode).
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{feed}{data}
 | 
						|
Feed some text to the parser.  It is processed insofar as it consists
 | 
						|
of complete elements; incomplete data is buffered until more data is
 | 
						|
fed or \code{close()} is called.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{close}{}
 | 
						|
Force processing of all buffered data as if it were followed by an
 | 
						|
end-of-file mark.  This method may be redefined by a derived class to
 | 
						|
define additional processing at the end of the input, but the
 | 
						|
redefined version should always call \code{XMLParser.close()}.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{translate_references}{data}
 | 
						|
Translate all entity and character references in \code{data} and
 | 
						|
returns the translated string.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{handle_xml}{encoding\, standalone}
 | 
						|
This method is called when the \code{<?xml ...?>} tag is processed.
 | 
						|
The arguments are the values of the encoding and standalone attributes 
 | 
						|
in the tag.  Both encoding and standalone are optional.  The values
 | 
						|
passed to \code{handle_xml} default to \code{None} and the string
 | 
						|
\code{'no'} respectively.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{handle_doctype}{tag\, data}
 | 
						|
This method is called when the \code{<!DOCTYPE...>} tag is processed.
 | 
						|
The arguments are the name of the root element and the uninterpreted
 | 
						|
contents of the tag, starting after the white space after the name of
 | 
						|
the root element.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{handle_starttag}{tag\, method\, attributes}
 | 
						|
This method is called to handle start tags for which a
 | 
						|
\code{start_\var{tag}()} method has been defined.  The \code{tag}
 | 
						|
argument is the name of the tag, and the \code{method} argument is the
 | 
						|
bound method which should be used to support semantic interpretation
 | 
						|
of the start tag.  The \var{attributes} argument is a dictionary of
 | 
						|
attributes, the key being the \var{name} and the value being the
 | 
						|
\var{value} of the attribute found inside the tag's \code{<>} brackets.
 | 
						|
Character and entity references in the \var{value} have
 | 
						|
been interpreted.  For instance, for the tag
 | 
						|
\code{<A HREF="http://www.cwi.nl/">}, this method would be called as
 | 
						|
\code{handle_starttag('A', self.start_A, \{'HREF': 'http://www.cwi.nl/'\})}.
 | 
						|
The base implementation simply calls \code{method} with \code{attributes}
 | 
						|
as the only argument.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{handle_endtag}{tag\, method}
 | 
						|
This method is called to handle endtags for which an
 | 
						|
\code{end_\var{tag}()} method has been defined.  The \code{tag}
 | 
						|
argument is the name of the tag, and the
 | 
						|
\code{method} argument is the bound method which should be used to
 | 
						|
support semantic interpretation of the end tag.  If no
 | 
						|
\code{end_\var{tag}()} method is defined for the closing element, this
 | 
						|
handler is not called.  The base implementation simply calls
 | 
						|
\code{method}.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{handle_data}{data}
 | 
						|
This method is called to process arbitrary data.  It is intended to be
 | 
						|
overridden by a derived class; the base class implementation does
 | 
						|
nothing.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{handle_charref}{ref}
 | 
						|
This method is called to process a character reference of the form
 | 
						|
\samp{\&\#\var{ref};}.  \var{ref} can either be a decimal number,
 | 
						|
or a hexadecimal number when preceded by \code{x}.
 | 
						|
In the base implementation, \var{ref} must be a number in the
 | 
						|
range 0-255.  It translates the character to \ASCII{} and calls the
 | 
						|
method \code{handle_data()} with the character as argument.  If
 | 
						|
\var{ref} is invalid or out of range, the method
 | 
						|
\code{unknown_charref(\var{ref})} is called to handle the error.  A
 | 
						|
subclass must override this method to provide support for character
 | 
						|
references outside of the \ASCII{} range.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{handle_entityref}{ref}
 | 
						|
This method is called to process a general entity reference of the form
 | 
						|
\samp{\&\var{ref};} where \var{ref} is an general entity
 | 
						|
reference.  It looks for \var{ref} in the instance (or class)
 | 
						|
variable \code{entitydefs} which should be a mapping from entity names
 | 
						|
to corresponding translations.
 | 
						|
If a translation is found, it calls the method \code{handle_data()}
 | 
						|
with the translation; otherwise, it calls the method
 | 
						|
\code{unknown_entityref(\var{ref})}.  The default \code{entitydefs}
 | 
						|
defines translations for \code{\&}, \code{\&apos}, \code{\>},
 | 
						|
\code{\<}, and \code{\"}.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{handle_comment}{comment}
 | 
						|
This method is called when a comment is encountered.  The
 | 
						|
\code{comment} argument is a string containing the text between the
 | 
						|
\samp{<!--} and \samp{-->} delimiters, but not the delimiters
 | 
						|
themselves.  For example, the comment \samp{<!--text-->} will
 | 
						|
cause this method to be called with the argument \code{'text'}.  The
 | 
						|
default method does nothing.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{handle_cdata}{data}
 | 
						|
This method is called when a CDATA element is encountered.  The
 | 
						|
\code{data} argument is a string containing the text between the
 | 
						|
\samp{<![CDATA[} and \samp{]]>} delimiters, but not the delimiters
 | 
						|
themselves.  For example, the entity \samp{<![CDATA[text]]>} will
 | 
						|
cause this method to be called with the argument \code{'text'}.  The
 | 
						|
default method does nothing.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{handle_proc}{name\, data}
 | 
						|
This method is called when a processing instruction (PI) is encountered.  The
 | 
						|
\code{name} is the PI target, and the \code{data} argument is a
 | 
						|
string containing the text between the PI target and the closing delimiter,
 | 
						|
but not the delimiter itself.  For example, the instruction
 | 
						|
\samp{<?XML text?>} will cause this method to be called with the
 | 
						|
arguments \code{'XML'} and \code{'text'}.  The default method does
 | 
						|
nothing.  Note that if a document starts with a \code{<?xml ...?>}
 | 
						|
tag, \code{handle_xml} is called to handle it.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{handle_special}{data}
 | 
						|
This method is called when a declaration is encountered.  The
 | 
						|
\code{data} argument is a string containing the text between the
 | 
						|
\samp{<!} and \samp{>} delimiters, but not the delimiters
 | 
						|
themselves.  For example, the entity \samp{<!ENTITY text>} will
 | 
						|
cause this method to be called with the argument \code{'ENTITY text'}.  The
 | 
						|
default method does nothing.  Note that \code{<!DOCTYPE ...>} is
 | 
						|
handled separately if it is located at the start of the document.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{syntax_error}{message}
 | 
						|
This method is called when a syntax error is encountered.  The
 | 
						|
\code{message} is a description of what was wrong.  The default method 
 | 
						|
raises a \code{RuntimeError} exception.  If this method is overridden, 
 | 
						|
it is permissable for it to return.  This method is only called when
 | 
						|
the error can be recovered from.  Unrecoverable errors raise a
 | 
						|
\code{RuntimeError} without first calling \code{syntax_error}.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{unknown_starttag}{tag\, attributes}
 | 
						|
This method is called to process an unknown start tag.  It is intended
 | 
						|
to be overridden by a derived class; the base class implementation
 | 
						|
does nothing.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{unknown_endtag}{tag}
 | 
						|
This method is called to process an unknown end tag.  It is intended
 | 
						|
to be overridden by a derived class; the base class implementation
 | 
						|
does nothing.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{unknown_charref}{ref}
 | 
						|
This method is called to process unresolvable numeric character
 | 
						|
references.  It is intended to be overridden by a derived class; the
 | 
						|
base class implementation does nothing.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{unknown_entityref}{ref}
 | 
						|
This method is called to process an unknown entity reference.  It is
 | 
						|
intended to be overridden by a derived class; the base class
 | 
						|
implementation does nothing.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
Apart from overriding or extending the methods listed above, derived
 | 
						|
classes may also define methods and variables of the following form to
 | 
						|
define processing of specific tags.  Tag names in the input stream are
 | 
						|
case dependent; the \var{tag} occurring in method names must be in the
 | 
						|
correct case:
 | 
						|
 | 
						|
\begin{funcdescni}{start_\var{tag}}{attributes}
 | 
						|
This method is called to process an opening tag \var{tag}.  The
 | 
						|
\var{attributes} argument has the same meaning as described for
 | 
						|
\code{handle_starttag()} above.  In fact, the base implementation of
 | 
						|
\code{handle_starttag()} calls this method.
 | 
						|
\end{funcdescni}
 | 
						|
 | 
						|
\begin{funcdescni}{end_\var{tag}}{}
 | 
						|
This method is called to process a closing tag \var{tag}.
 | 
						|
\end{funcdescni}
 | 
						|
 | 
						|
\begin{datadescni}{\var{tag}_attributes}
 | 
						|
If a class or instance variable \code{\var{tag}_attributes} exists, it 
 | 
						|
should be a list or a dictionary.  If a list, the elements of the list 
 | 
						|
are the valid attributes for the element \var{tag}; if a dictionary,
 | 
						|
the keys are the valid attributes for the element \var{tag}, and the
 | 
						|
values the default values of the attributes, or \code{None} if there
 | 
						|
is no default.
 | 
						|
In addition to the attributes that were present in the tag, the
 | 
						|
attribute dictionary that is passed to \code{handle_starttag()} and
 | 
						|
\code{unknown_starttag()} contains values for all attributes that have a
 | 
						|
default value.
 | 
						|
\end{datadescni}
 |