mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 19:34:08 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			185 lines
		
	
	
	
		
			7.1 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			185 lines
		
	
	
	
		
			7.1 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
\section{\module{shlex} ---
 | 
						|
         Simple lexical analysis}
 | 
						|
 | 
						|
\declaremodule{standard}{shlex}
 | 
						|
\modulesynopsis{Simple lexical analysis for \UNIX\ shell-like languages.}
 | 
						|
\moduleauthor{Eric S. Raymond}{esr@snark.thyrsus.com}
 | 
						|
\sectionauthor{Eric S. Raymond}{esr@snark.thyrsus.com}
 | 
						|
 | 
						|
\versionadded{1.5.2}
 | 
						|
 | 
						|
The \class{shlex} class makes it easy to write lexical analyzers for
 | 
						|
simple syntaxes resembling that of the \UNIX{} shell.  This will often
 | 
						|
be useful for writing minilanguages, e.g.\ in run control files for
 | 
						|
Python applications.
 | 
						|
 | 
						|
\begin{classdesc}{shlex}{\optional{stream\optional{, file}}}
 | 
						|
A \class{shlex} instance or subclass instance is a lexical analyzer
 | 
						|
object.  The initialization argument, if present, specifies where to
 | 
						|
read characters from. It must be a file- or stream-like object with
 | 
						|
\method{read()} and \method{readline()} methods.  If no argument is given,
 | 
						|
input will be taken from \code{sys.stdin}.  The second optional 
 | 
						|
argument is a filename string, which sets the initial value of the
 | 
						|
\member{infile} member.  If the stream argument is omitted or
 | 
						|
equal to \code{sys.stdin}, this second argument defaults to ``stdin''.
 | 
						|
\end{classdesc}
 | 
						|
 | 
						|
 | 
						|
\begin{seealso}
 | 
						|
  \seemodule{ConfigParser}{Parser for configuration files similar to the
 | 
						|
                           Windows \file{.ini} files.}
 | 
						|
\end{seealso}
 | 
						|
 | 
						|
 | 
						|
\subsection{shlex Objects \label{shlex-objects}}
 | 
						|
 | 
						|
A \class{shlex} instance has the following methods:
 | 
						|
 | 
						|
 | 
						|
\begin{methoddesc}{get_token}{}
 | 
						|
Return a token.  If tokens have been stacked using
 | 
						|
\method{push_token()}, pop a token off the stack.  Otherwise, read one
 | 
						|
from the input stream.  If reading encounters an immediate
 | 
						|
end-of-file, an empty string is returned. 
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{push_token}{str}
 | 
						|
Push the argument onto the token stack.
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{read_token}{}
 | 
						|
Read a raw token.  Ignore the pushback stack, and do not interpret source
 | 
						|
requests.  (This is not ordinarily a useful entry point, and is
 | 
						|
documented here only for the sake of completeness.)
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{sourcehook}{filename}
 | 
						|
When \class{shlex} detects a source request (see
 | 
						|
\member{source} below) this method is given the following token as
 | 
						|
argument, and expected to return a tuple consisting of a filename and
 | 
						|
an open file-like object.
 | 
						|
 | 
						|
Normally, this method first strips any quotes off the argument.  If
 | 
						|
the result is an absolute pathname, or there was no previous source
 | 
						|
request in effect, or the previous source was a stream
 | 
						|
(e.g. \code{sys.stdin}), the result is left alone.  Otherwise, if the
 | 
						|
result is a relative pathname, the directory part of the name of the
 | 
						|
file immediately before it on the source inclusion stack is prepended
 | 
						|
(this behavior is like the way the C preprocessor handles
 | 
						|
\code{\#include "file.h"}).
 | 
						|
 | 
						|
The result of the manipulations is treated as a filename, and returned
 | 
						|
as the first component of the tuple, with
 | 
						|
\function{open()} called on it to yield the second component. (Note:
 | 
						|
this is the reverse of the order of arguments in instance initialization!)
 | 
						|
 | 
						|
This hook is exposed so that you can use it to implement directory
 | 
						|
search paths, addition of file extensions, and other namespace hacks.
 | 
						|
There is no corresponding `close' hook, but a shlex instance will call
 | 
						|
the \method{close()} method of the sourced input stream when it
 | 
						|
returns \EOF.
 | 
						|
 | 
						|
For more explicit control of source stacking, use the
 | 
						|
\method{push_source()} and \method{pop_source()} methods. 
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{push_source}{stream\optional{, filename}}
 | 
						|
Push an input source stream onto the input stack.  If the filename
 | 
						|
argument is specified it will later be available for use in error
 | 
						|
messages.  This is the same method used internally by the
 | 
						|
\method{sourcehook} method.
 | 
						|
\versionadded{2.1}
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{pop_source}{}
 | 
						|
Pop the last-pushed input source from the input stack.
 | 
						|
This is the same method used internally when the lexer reaches
 | 
						|
\EOF on a stacked input stream.
 | 
						|
\versionadded{2.1}
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
\begin{methoddesc}{error_leader}{\optional{file\optional{, line}}}
 | 
						|
This method generates an error message leader in the format of a
 | 
						|
\UNIX{} C compiler error label; the format is \code{'"\%s", line \%d: '},
 | 
						|
where the \samp{\%s} is replaced with the name of the current source
 | 
						|
file and the \samp{\%d} with the current input line number (the
 | 
						|
optional arguments can be used to override these).
 | 
						|
 | 
						|
This convenience is provided to encourage \module{shlex} users to
 | 
						|
generate error messages in the standard, parseable format understood
 | 
						|
by Emacs and other \UNIX{} tools.
 | 
						|
\end{methoddesc}
 | 
						|
 | 
						|
Instances of \class{shlex} subclasses have some public instance
 | 
						|
variables which either control lexical analysis or can be used for
 | 
						|
debugging:
 | 
						|
 | 
						|
\begin{memberdesc}{commenters}
 | 
						|
The string of characters that are recognized as comment beginners.
 | 
						|
All characters from the comment beginner to end of line are ignored.
 | 
						|
Includes just \character{\#} by default.   
 | 
						|
\end{memberdesc}
 | 
						|
 | 
						|
\begin{memberdesc}{wordchars}
 | 
						|
The string of characters that will accumulate into multi-character
 | 
						|
tokens.  By default, includes all \ASCII{} alphanumerics and
 | 
						|
underscore.
 | 
						|
\end{memberdesc}
 | 
						|
 | 
						|
\begin{memberdesc}{whitespace}
 | 
						|
Characters that will be considered whitespace and skipped.  Whitespace
 | 
						|
bounds tokens.  By default, includes space, tab, linefeed and
 | 
						|
carriage-return.
 | 
						|
\end{memberdesc}
 | 
						|
 | 
						|
\begin{memberdesc}{quotes}
 | 
						|
Characters that will be considered string quotes.  The token
 | 
						|
accumulates until the same quote is encountered again (thus, different
 | 
						|
quote types protect each other as in the shell.)  By default, includes
 | 
						|
\ASCII{} single and double quotes.
 | 
						|
\end{memberdesc}
 | 
						|
 | 
						|
\begin{memberdesc}{infile}
 | 
						|
The name of the current input file, as initially set at class
 | 
						|
instantiation time or stacked by later source requests.  It may
 | 
						|
be useful to examine this when constructing error messages.
 | 
						|
\end{memberdesc}
 | 
						|
 | 
						|
\begin{memberdesc}{instream}
 | 
						|
The input stream from which this \class{shlex} instance is reading
 | 
						|
characters.
 | 
						|
\end{memberdesc}
 | 
						|
 | 
						|
\begin{memberdesc}{source}
 | 
						|
This member is \code{None} by default.  If you assign a string to it,
 | 
						|
that string will be recognized as a lexical-level inclusion request
 | 
						|
similar to the \samp{source} keyword in various shells.  That is, the
 | 
						|
immediately following token will opened as a filename and input taken
 | 
						|
from that stream until \EOF, at which point the \method{close()}
 | 
						|
method of that stream will be called and the input source will again
 | 
						|
become the original input stream. Source requests may be stacked any
 | 
						|
number of levels deep.
 | 
						|
\end{memberdesc}
 | 
						|
 | 
						|
\begin{memberdesc}{debug}
 | 
						|
If this member is numeric and \code{1} or more, a \class{shlex}
 | 
						|
instance will print verbose progress output on its behavior.  If you
 | 
						|
need to use this, you can read the module source code to learn the
 | 
						|
details.
 | 
						|
\end{memberdesc}
 | 
						|
 | 
						|
Note that any character not declared to be a word character,
 | 
						|
whitespace, or a quote will be returned as a single-character token.
 | 
						|
 | 
						|
Quote and comment characters are not recognized within words.  Thus,
 | 
						|
the bare words \samp{ain't} and \samp{ain\#t} would be returned as single
 | 
						|
tokens by the default parser.
 | 
						|
 | 
						|
\begin{memberdesc}{lineno}
 | 
						|
Source line number (count of newlines seen so far plus one).
 | 
						|
\end{memberdesc}
 | 
						|
 | 
						|
\begin{memberdesc}{token}
 | 
						|
The token buffer.  It may be useful to examine this when catching
 | 
						|
exceptions.
 | 
						|
\end{memberdesc}
 |