mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 02:15:10 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			171 lines
		
	
	
	
		
			6.3 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			171 lines
		
	
	
	
		
			6.3 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
| \section{\module{multifile} ---
 | |
|          Support for files containing distinct parts}
 | |
| 
 | |
| \declaremodule{standard}{multifile}
 | |
| \modulesynopsis{Support for reading files which contain distinct
 | |
|                 parts, such as some MIME data.}
 | |
| \sectionauthor{Eric S. Raymond}{esr@snark.thyrsus.com}
 | |
| 
 | |
| 
 | |
| The \class{MultiFile} object enables you to treat sections of a text
 | |
| file as file-like input objects, with \code{''} being returned by
 | |
| \method{readline()} when a given delimiter pattern is encountered.  The
 | |
| defaults of this class are designed to make it useful for parsing
 | |
| MIME multipart messages, but by subclassing it and overriding methods 
 | |
| it can be easily adapted for more general use.
 | |
| 
 | |
| \begin{classdesc}{MultiFile}{fp\optional{, seekable}}
 | |
| Create a multi-file.  You must instantiate this class with an input
 | |
| object argument for the \class{MultiFile} instance to get lines from,
 | |
| such as a file object returned by \function{open()}.
 | |
| 
 | |
| \class{MultiFile} only ever looks at the input object's
 | |
| \method{readline()}, \method{seek()} and \method{tell()} methods, and
 | |
| the latter two are only needed if you want random access to the
 | |
| individual MIME parts. To use \class{MultiFile} on a non-seekable
 | |
| stream object, set the optional \var{seekable} argument to false; this
 | |
| will prevent using the input object's \method{seek()} and
 | |
| \method{tell()} methods.
 | |
| \end{classdesc}
 | |
| 
 | |
| It will be useful to know that in \class{MultiFile}'s view of the world, text
 | |
| is composed of three kinds of lines: data, section-dividers, and
 | |
| end-markers.  MultiFile is designed to support parsing of
 | |
| messages that may have multiple nested message parts, each with its
 | |
| own pattern for section-divider and end-marker lines.
 | |
| 
 | |
| \begin{seealso}
 | |
|   \seemodule{email}{Comprehensive email handling package; supercedes
 | |
|                     the \module{multifile} module.}
 | |
| \end{seealso}
 | |
| 
 | |
| 
 | |
| \subsection{MultiFile Objects \label{MultiFile-objects}}
 | |
| 
 | |
| A \class{MultiFile} instance has the following methods:
 | |
| 
 | |
| \begin{methoddesc}{readline}{str}
 | |
| Read a line.  If the line is data (not a section-divider or end-marker
 | |
| or real EOF) return it.  If the line matches the most-recently-stacked
 | |
| boundary, return \code{''} and set \code{self.last} to 1 or 0 according as
 | |
| the match is or is not an end-marker.  If the line matches any other
 | |
| stacked boundary, raise an error.  On encountering end-of-file on the
 | |
| underlying stream object, the method raises \exception{Error} unless
 | |
| all boundaries have been popped.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{readlines}{str}
 | |
| Return all lines remaining in this part as a list of strings.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{read}{}
 | |
| Read all lines, up to the next section.  Return them as a single
 | |
| (multiline) string.  Note that this doesn't take a size argument!
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{seek}{pos\optional{, whence}}
 | |
| Seek.  Seek indices are relative to the start of the current section.
 | |
| The \var{pos} and \var{whence} arguments are interpreted as for a file
 | |
| seek.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{tell}{}
 | |
| Return the file position relative to the start of the current section.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{next}{}
 | |
| Skip lines to the next section (that is, read lines until a
 | |
| section-divider or end-marker has been consumed).  Return true if
 | |
| there is such a section, false if an end-marker is seen.  Re-enable
 | |
| the most-recently-pushed boundary.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{is_data}{str}
 | |
| Return true if \var{str} is data and false if it might be a section
 | |
| boundary.  As written, it tests for a prefix other than \code{'-}\code{-'} at
 | |
| start of line (which all MIME boundaries have) but it is declared so
 | |
| it can be overridden in derived classes.
 | |
| 
 | |
| Note that this test is used intended as a fast guard for the real
 | |
| boundary tests; if it always returns false it will merely slow
 | |
| processing, not cause it to fail.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{push}{str}
 | |
| Push a boundary string.  When an appropriately decorated version of
 | |
| this boundary is found as an input line, it will be interpreted as a
 | |
| section-divider or end-marker.  All subsequent
 | |
| reads will return the empty string to indicate end-of-file, until a
 | |
| call to \method{pop()} removes the boundary a or \method{next()} call
 | |
| reenables it.
 | |
| 
 | |
| It is possible to push more than one boundary.  Encountering the
 | |
| most-recently-pushed boundary will return EOF; encountering any other
 | |
| boundary will raise an error.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{pop}{}
 | |
| Pop a section boundary.  This boundary will no longer be interpreted
 | |
| as EOF.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{section_divider}{str}
 | |
| Turn a boundary into a section-divider line.  By default, this
 | |
| method prepends \code{'-}\code{-'} (which MIME section boundaries have) but
 | |
| it is declared so it can be overridden in derived classes.  This
 | |
| method need not append LF or CR-LF, as comparison with the result
 | |
| ignores trailing whitespace. 
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{end_marker}{str}
 | |
| Turn a boundary string into an end-marker line.  By default, this
 | |
| method prepends \code{'-}\code{-'} and appends \code{'-}\code{-'} (like a
 | |
| MIME-multipart end-of-message marker) but it is declared so it can be
 | |
| overridden in derived classes.  This method need not append LF or
 | |
| CR-LF, as comparison with the result ignores trailing whitespace.
 | |
| \end{methoddesc}
 | |
| 
 | |
| Finally, \class{MultiFile} instances have two public instance variables:
 | |
| 
 | |
| \begin{memberdesc}{level}
 | |
| Nesting depth of the current part.
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}{last}
 | |
| True if the last end-of-file was for an end-of-message marker. 
 | |
| \end{memberdesc}
 | |
| 
 | |
| 
 | |
| \subsection{\class{MultiFile} Example \label{multifile-example}}
 | |
| \sectionauthor{Skip Montanaro}{skip@mojam.com}
 | |
| 
 | |
| \begin{verbatim}
 | |
| import mimetools
 | |
| import multifile
 | |
| import StringIO
 | |
| 
 | |
| def extract_mime_part_matching(stream, mimetype):
 | |
|     """Return the first element in a multipart MIME message on stream
 | |
|     matching mimetype."""
 | |
| 
 | |
|     msg = mimetools.Message(stream)
 | |
|     msgtype = msg.gettype()
 | |
|     params = msg.getplist()
 | |
| 
 | |
|     data = StringIO.StringIO()
 | |
|     if msgtype[:10] == "multipart/":
 | |
| 
 | |
|         file = multifile.MultiFile(stream)
 | |
|         file.push(msg.getparam("boundary"))
 | |
|         while file.next():
 | |
|             submsg = mimetools.Message(file)
 | |
|             try:
 | |
|                 data = StringIO.StringIO()
 | |
|                 mimetools.decode(file, data, submsg.getencoding())
 | |
|             except ValueError:
 | |
|                 continue
 | |
|             if submsg.gettype() == mimetype:
 | |
|                 break
 | |
|         file.pop()
 | |
|     return data.getvalue()
 | |
| \end{verbatim}
 | 
