mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 19:34:08 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			171 lines
		
	
	
	
		
			6.3 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			171 lines
		
	
	
	
		
			6.3 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
\section{\module{multifile} ---
 | 
						|
         Support for files containing distinct parts}
 | 
						|
 | 
						|
\declaremodule{standard}{multifile}
 | 
						|
\modulesynopsis{Support for reading files which contain distinct
 | 
						|
                parts, such as some MIME data.}
 | 
						|
\sectionauthor{Eric S. Raymond}{esr@snark.thyrsus.com}
 | 
						|
 | 
						|
 | 
						|
The \class{MultiFile} object enables you to treat sections of a text
 | 
						|
file as file-like input objects, with \code{''} being returned by
 | 
						|
\method{readline()} when a given delimiter pattern is encountered.  The
 | 
						|
defaults of this class are designed to make it useful for parsing
 | 
						|
MIME multipart messages, but by subclassing it and overriding methods 
 | 
						|
it can be easily adapted for more general use.
 | 
						|
 | 
						|
\begin{classdesc}{MultiFile}{fp\optional{, seekable}}
 | 
						|
Create a multi-file.  You must instantiate this class with an input
 | 
						|
object argument for the \class{MultiFile} instance to get lines from,
 | 
						|
such as as a file object returned by \function{open()}.
 | 
						|
 | 
						|
\class{MultiFile} only ever looks at the input object's
 | 
						|
\method{readline()}, \method{seek()} and \method{tell()} methods, and
 | 
						|
the latter two are only needed if you want random access to the
 | 
						|
individual MIME parts. To use \class{MultiFile} on a non-seekable
 | 
						|
stream object, set the optional \var{seekable} argument to false; this
 | 
						|
will prevent using the input object's \method{seek()} and
 | 
						|
\method{tell()} methods.
 | 
						|
\end{classdesc}
 | 
						|
 | 
						|
It will be useful to know that in \class{MultiFile}'s view of the world, text
 | 
						|
is composed of three kinds of lines: data, section-dividers, and
 | 
						|
end-markers.  MultiFile is designed to support parsing of
 | 
						|
messages that may have multiple nested message parts, each with its
 | 
						|
own pattern for section-divider and end-marker lines.
 | 
						|
 | 
						|
\begin{seealso}
 | 
						|
  \seemodule{email}{Comprehensive email handling package; supercedes
 | 
						|
                    the \module{multifile} module.}
 | 
						|
\end{seealso}
 | 
						|
 | 
						|
 | 
						|
\subsection{MultiFile Objects \label{MultiFile-objects}}
 | 
						|
 | 
						|
A \class{MultiFile} instance has the following methods:
 | 
						|
 | 
						|
\begin{methoddesc}{readline}{str}
 | 
						|
Read a line.  If the line is data (not a section-divider or end-marker
 | 
						|
or real EOF) return it.  If the line matches the most-recently-stacked
 | 
						|
boundary, return \code{''} and set \code{self.last} to 1 or 0 according as
 | 
						|
the match is or is not an end-marker.  If the line matches any other
 | 
						|
stacked boundary, raise an error.  On encountering end-of-file on the
 | 
						|
underlying stream object, the method raises \exception{Error} unless
 | 
						|
all boundaries have been popped.
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{readlines}{str}
 | 
						|
Return all lines remaining in this part as a list of strings.
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{read}{}
 | 
						|
Read all lines, up to the next section.  Return them as a single
 | 
						|
(multiline) string.  Note that this doesn't take a size argument!
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{seek}{pos\optional{, whence}}
 | 
						|
Seek.  Seek indices are relative to the start of the current section.
 | 
						|
The \var{pos} and \var{whence} arguments are interpreted as for a file
 | 
						|
seek.
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{tell}{}
 | 
						|
Return the file position relative to the start of the current section.
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{next}{}
 | 
						|
Skip lines to the next section (that is, read lines until a
 | 
						|
section-divider or end-marker has been consumed).  Return true if
 | 
						|
there is such a section, false if an end-marker is seen.  Re-enable
 | 
						|
the most-recently-pushed boundary.
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{is_data}{str}
 | 
						|
Return true if \var{str} is data and false if it might be a section
 | 
						|
boundary.  As written, it tests for a prefix other than \code{'-}\code{-'} at
 | 
						|
start of line (which all MIME boundaries have) but it is declared so
 | 
						|
it can be overridden in derived classes.
 | 
						|
 | 
						|
Note that this test is used intended as a fast guard for the real
 | 
						|
boundary tests; if it always returns false it will merely slow
 | 
						|
processing, not cause it to fail.
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{push}{str}
 | 
						|
Push a boundary string.  When an appropriately decorated version of
 | 
						|
this boundary is found as an input line, it will be interpreted as a
 | 
						|
section-divider or end-marker.  All subsequent
 | 
						|
reads will return the empty string to indicate end-of-file, until a
 | 
						|
call to \method{pop()} removes the boundary a or \method{next()} call
 | 
						|
reenables it.
 | 
						|
 | 
						|
It is possible to push more than one boundary.  Encountering the
 | 
						|
most-recently-pushed boundary will return EOF; encountering any other
 | 
						|
boundary will raise an error.
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{pop}{}
 | 
						|
Pop a section boundary.  This boundary will no longer be interpreted
 | 
						|
as EOF.
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{section_divider}{str}
 | 
						|
Turn a boundary into a section-divider line.  By default, this
 | 
						|
method prepends \code{'-}\code{-'} (which MIME section boundaries have) but
 | 
						|
it is declared so it can be overridden in derived classes.  This
 | 
						|
method need not append LF or CR-LF, as comparison with the result
 | 
						|
ignores trailing whitespace. 
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{end_marker}{str}
 | 
						|
Turn a boundary string into an end-marker line.  By default, this
 | 
						|
method prepends \code{'-}\code{-'} and appends \code{'-}\code{-'} (like a
 | 
						|
MIME-multipart end-of-message marker) but it is declared so it can be
 | 
						|
be overridden in derived classes.  This method need not append LF or
 | 
						|
CR-LF, as comparison with the result ignores trailing whitespace.
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
Finally, \class{MultiFile} instances have two public instance variables:
 | 
						|
 | 
						|
\begin{memberdesc}{level}
 | 
						|
Nesting depth of the current part.
 | 
						|
\end{memberdesc}
 | 
						|
 | 
						|
\begin{memberdesc}{last}
 | 
						|
True if the last end-of-file was for an end-of-message marker. 
 | 
						|
\end{memberdesc}
 | 
						|
 | 
						|
 | 
						|
\subsection{\class{MultiFile} Example \label{multifile-example}}
 | 
						|
\sectionauthor{Skip Montanaro}{skip@mojam.com}
 | 
						|
 | 
						|
\begin{verbatim}
 | 
						|
import mimetools
 | 
						|
import multifile
 | 
						|
import StringIO
 | 
						|
 | 
						|
def extract_mime_part_matching(stream, mimetype):
 | 
						|
    """Return the first element in a multipart MIME message on stream
 | 
						|
    matching mimetype."""
 | 
						|
 | 
						|
    msg = mimetools.Message(stream)
 | 
						|
    msgtype = msg.gettype()
 | 
						|
    params = msg.getplist()
 | 
						|
 | 
						|
    data = StringIO.StringIO()
 | 
						|
    if msgtype[:10] == "multipart/":
 | 
						|
 | 
						|
        file = multifile.MultiFile(stream)
 | 
						|
        file.push(msg.getparam("boundary"))
 | 
						|
        while file.next():
 | 
						|
            submsg = mimetools.Message(file)
 | 
						|
            try:
 | 
						|
                data = StringIO.StringIO()
 | 
						|
                mimetools.decode(file, data, submsg.getencoding())
 | 
						|
            except ValueError:
 | 
						|
                continue
 | 
						|
            if submsg.gettype() == mimetype:
 | 
						|
                break
 | 
						|
        file.pop()
 | 
						|
    return data.getvalue()
 | 
						|
\end{verbatim}
 |