mirror of
https://github.com/python/cpython.git
synced 2025-09-25 01:43:11 +00:00
Patch 560023 adding docstrings. 2.2 Candidate (after verifying modules were not updated after 2.2).
This commit is contained in:
parent
d68f5171eb
commit
aef22fb9cd
9 changed files with 499 additions and 8 deletions
|
@ -1,8 +1,11 @@
|
||||||
"""Generic MIME writer.
|
"""Generic MIME writer.
|
||||||
|
|
||||||
Classes:
|
This module defines the class MimeWriter. The MimeWriter class implements
|
||||||
|
a basic formatter for creating MIME multi-part files. It doesn't seek around
|
||||||
MimeWriter - the only thing here.
|
the output file nor does it use large amounts of buffer space. You must write
|
||||||
|
the parts out in the order that they should occur in the final file.
|
||||||
|
MimeWriter does buffer the headers you add, allowing you to rearrange their
|
||||||
|
order.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -86,6 +89,14 @@ class MimeWriter:
|
||||||
self._headers = []
|
self._headers = []
|
||||||
|
|
||||||
def addheader(self, key, value, prefix=0):
|
def addheader(self, key, value, prefix=0):
|
||||||
|
"""Add a header line to the MIME message.
|
||||||
|
|
||||||
|
The key is the name of the header, where the value obviously provides
|
||||||
|
the value of the header. The optional argument prefix determines
|
||||||
|
where the header is inserted; 0 means append at the end, 1 means
|
||||||
|
insert at the start. The default is to append.
|
||||||
|
|
||||||
|
"""
|
||||||
lines = value.split("\n")
|
lines = value.split("\n")
|
||||||
while lines and not lines[-1]: del lines[-1]
|
while lines and not lines[-1]: del lines[-1]
|
||||||
while lines and not lines[0]: del lines[0]
|
while lines and not lines[0]: del lines[0]
|
||||||
|
@ -99,10 +110,26 @@ class MimeWriter:
|
||||||
self._headers.append(line)
|
self._headers.append(line)
|
||||||
|
|
||||||
def flushheaders(self):
|
def flushheaders(self):
|
||||||
|
"""Writes out and forgets all headers accumulated so far.
|
||||||
|
|
||||||
|
This is useful if you don't need a body part at all; for example,
|
||||||
|
for a subpart of type message/rfc822 that's (mis)used to store some
|
||||||
|
header-like information.
|
||||||
|
|
||||||
|
"""
|
||||||
self._fp.writelines(self._headers)
|
self._fp.writelines(self._headers)
|
||||||
self._headers = []
|
self._headers = []
|
||||||
|
|
||||||
def startbody(self, ctype, plist=[], prefix=1):
|
def startbody(self, ctype, plist=[], prefix=1):
|
||||||
|
"""Returns a file-like object for writing the body of the message.
|
||||||
|
|
||||||
|
The content-type is set to the provided ctype, and the optional
|
||||||
|
parameter, plist, provides additional parameters for the
|
||||||
|
content-type declaration. The optional argument prefix determines
|
||||||
|
where the header is inserted; 0 means append at the end, 1 means
|
||||||
|
insert at the start. The default is to insert at the start.
|
||||||
|
|
||||||
|
"""
|
||||||
for name, value in plist:
|
for name, value in plist:
|
||||||
ctype = ctype + ';\n %s=\"%s\"' % (name, value)
|
ctype = ctype + ';\n %s=\"%s\"' % (name, value)
|
||||||
self.addheader("Content-Type", ctype, prefix=prefix)
|
self.addheader("Content-Type", ctype, prefix=prefix)
|
||||||
|
@ -111,16 +138,42 @@ class MimeWriter:
|
||||||
return self._fp
|
return self._fp
|
||||||
|
|
||||||
def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1):
|
def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1):
|
||||||
|
"""Returns a file-like object for writing the body of the message.
|
||||||
|
|
||||||
|
Additionally, this method initializes the multi-part code, where the
|
||||||
|
subtype parameter provides the multipart subtype, the boundary
|
||||||
|
parameter may provide a user-defined boundary specification, and the
|
||||||
|
plist parameter provides optional parameters for the subtype. The
|
||||||
|
optional argument, prefix, determines where the header is inserted;
|
||||||
|
0 means append at the end, 1 means insert at the start. The default
|
||||||
|
is to insert at the start. Subparts should be created using the
|
||||||
|
nextpart() method.
|
||||||
|
|
||||||
|
"""
|
||||||
self._boundary = boundary or mimetools.choose_boundary()
|
self._boundary = boundary or mimetools.choose_boundary()
|
||||||
return self.startbody("multipart/" + subtype,
|
return self.startbody("multipart/" + subtype,
|
||||||
[("boundary", self._boundary)] + plist,
|
[("boundary", self._boundary)] + plist,
|
||||||
prefix=prefix)
|
prefix=prefix)
|
||||||
|
|
||||||
def nextpart(self):
|
def nextpart(self):
|
||||||
|
"""Returns a new instance of MimeWriter which represents an
|
||||||
|
individual part in a multipart message.
|
||||||
|
|
||||||
|
This may be used to write the part as well as used for creating
|
||||||
|
recursively complex multipart messages. The message must first be
|
||||||
|
initialized with the startmultipartbody() method before using the
|
||||||
|
nextpart() method.
|
||||||
|
|
||||||
|
"""
|
||||||
self._fp.write("\n--" + self._boundary + "\n")
|
self._fp.write("\n--" + self._boundary + "\n")
|
||||||
return self.__class__(self._fp)
|
return self.__class__(self._fp)
|
||||||
|
|
||||||
def lastpart(self):
|
def lastpart(self):
|
||||||
|
"""This is used to designate the last part of a multipart message.
|
||||||
|
|
||||||
|
It should always be used when writing multipart messages.
|
||||||
|
|
||||||
|
"""
|
||||||
self._fp.write("\n--" + self._boundary + "--\n")
|
self._fp.write("\n--" + self._boundary + "--\n")
|
||||||
|
|
||||||
|
|
||||||
|
|
62
Lib/cmd.py
62
Lib/cmd.py
|
@ -53,6 +53,17 @@ PROMPT = '(Cmd) '
|
||||||
IDENTCHARS = string.ascii_letters + string.digits + '_'
|
IDENTCHARS = string.ascii_letters + string.digits + '_'
|
||||||
|
|
||||||
class Cmd:
|
class Cmd:
|
||||||
|
"""A simple framework for writing line-oriented command interpreters.
|
||||||
|
|
||||||
|
These are often useful for test harnesses, administrative tools, and
|
||||||
|
prototypes that will later be wrapped in a more sophisticated interface.
|
||||||
|
|
||||||
|
A Cmd instance or subclass instance is a line-oriented interpreter
|
||||||
|
framework. There is no good reason to instantiate Cmd itself; rather,
|
||||||
|
it's useful as a superclass of an interpreter class you define yourself
|
||||||
|
in order to inherit Cmd's methods and encapsulate action methods.
|
||||||
|
|
||||||
|
"""
|
||||||
prompt = PROMPT
|
prompt = PROMPT
|
||||||
identchars = IDENTCHARS
|
identchars = IDENTCHARS
|
||||||
ruler = '='
|
ruler = '='
|
||||||
|
@ -67,6 +78,14 @@ class Cmd:
|
||||||
use_rawinput = 1
|
use_rawinput = 1
|
||||||
|
|
||||||
def __init__(self, completekey='tab'):
|
def __init__(self, completekey='tab'):
|
||||||
|
"""Instantiate a line-oriented interpreter framework.
|
||||||
|
|
||||||
|
The optional argument is the readline name of a completion key;
|
||||||
|
it defaults to the Tab key. If completekey is not None and the
|
||||||
|
readline module is available, command completion is done
|
||||||
|
automatically.
|
||||||
|
|
||||||
|
"""
|
||||||
if completekey:
|
if completekey:
|
||||||
try:
|
try:
|
||||||
import readline
|
import readline
|
||||||
|
@ -76,6 +95,12 @@ class Cmd:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def cmdloop(self, intro=None):
|
def cmdloop(self, intro=None):
|
||||||
|
"""Repeatedly issue a prompt, accept input, parse an initial prefix
|
||||||
|
off the received input, and dispatch to action methods, passing them
|
||||||
|
the remainder of the line as argument.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
self.preloop()
|
self.preloop()
|
||||||
if intro is not None:
|
if intro is not None:
|
||||||
self.intro = intro
|
self.intro = intro
|
||||||
|
@ -106,15 +131,25 @@ class Cmd:
|
||||||
self.postloop()
|
self.postloop()
|
||||||
|
|
||||||
def precmd(self, line):
|
def precmd(self, line):
|
||||||
|
"""Hook method executed just before the command line is
|
||||||
|
interpreted, but after the input prompt is generated and issued.
|
||||||
|
|
||||||
|
"""
|
||||||
return line
|
return line
|
||||||
|
|
||||||
def postcmd(self, stop, line):
|
def postcmd(self, stop, line):
|
||||||
|
"""Hook method executed just after a command dispatch is finished."""
|
||||||
return stop
|
return stop
|
||||||
|
|
||||||
def preloop(self):
|
def preloop(self):
|
||||||
|
"""Hook method executed once when the cmdloop() method is called."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def postloop(self):
|
def postloop(self):
|
||||||
|
"""Hook method executed once when the cmdloop() method is about to
|
||||||
|
return.
|
||||||
|
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def parseline(self, line):
|
def parseline(self, line):
|
||||||
|
@ -134,6 +169,15 @@ class Cmd:
|
||||||
return cmd, arg, line
|
return cmd, arg, line
|
||||||
|
|
||||||
def onecmd(self, line):
|
def onecmd(self, line):
|
||||||
|
"""Interpret the argument as though it had been typed in response
|
||||||
|
to the prompt.
|
||||||
|
|
||||||
|
This may be overridden, but should not normally need to be;
|
||||||
|
see the precmd() and postcmd() methods for useful execution hooks.
|
||||||
|
The return value is a flag indicating whether interpretation of
|
||||||
|
commands by the interpreter should stop.
|
||||||
|
|
||||||
|
"""
|
||||||
cmd, arg, line = self.parseline(line)
|
cmd, arg, line = self.parseline(line)
|
||||||
if not line:
|
if not line:
|
||||||
return self.emptyline()
|
return self.emptyline()
|
||||||
|
@ -150,13 +194,31 @@ class Cmd:
|
||||||
return func(arg)
|
return func(arg)
|
||||||
|
|
||||||
def emptyline(self):
|
def emptyline(self):
|
||||||
|
"""Called when an empty line is entered in response to the prompt.
|
||||||
|
|
||||||
|
If this method is not overridden, it repeats the last nonempty
|
||||||
|
command entered.
|
||||||
|
|
||||||
|
"""
|
||||||
if self.lastcmd:
|
if self.lastcmd:
|
||||||
return self.onecmd(self.lastcmd)
|
return self.onecmd(self.lastcmd)
|
||||||
|
|
||||||
def default(self, line):
|
def default(self, line):
|
||||||
|
"""Called on an input line when the command prefix is not recognized.
|
||||||
|
|
||||||
|
If this method is not overridden, it prints an error message and
|
||||||
|
returns.
|
||||||
|
|
||||||
|
"""
|
||||||
print '*** Unknown syntax:', line
|
print '*** Unknown syntax:', line
|
||||||
|
|
||||||
def completedefault(self, *ignored):
|
def completedefault(self, *ignored):
|
||||||
|
"""Method called to complete an input line when no command-specific
|
||||||
|
complete_*() method is available.
|
||||||
|
|
||||||
|
By default, it returns an empty list.
|
||||||
|
|
||||||
|
"""
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def completenames(self, text, *ignored):
|
def completenames(self, text, *ignored):
|
||||||
|
|
|
@ -154,5 +154,17 @@ class _Database:
|
||||||
|
|
||||||
|
|
||||||
def open(file, flag=None, mode=0666):
|
def open(file, flag=None, mode=0666):
|
||||||
|
"""Open the database file, filename, and return corresponding object.
|
||||||
|
|
||||||
|
The flag argument, used to control how the database is opened in the
|
||||||
|
other DBM implementations, is ignored in the dumbdbm module; the
|
||||||
|
database is always opened for update, and will be created if it does
|
||||||
|
not exist.
|
||||||
|
|
||||||
|
The optional mode argument is the UNIX mode of the file, used only when
|
||||||
|
the database has to be created. It defaults to octal code 0666 (and
|
||||||
|
will be modified by the prevailing umask).
|
||||||
|
|
||||||
|
"""
|
||||||
# flag, mode arguments are currently ignored
|
# flag, mode arguments are currently ignored
|
||||||
return _Database(file, mode)
|
return _Database(file, mode)
|
||||||
|
|
|
@ -27,6 +27,15 @@ AS_IS = None
|
||||||
|
|
||||||
|
|
||||||
class NullFormatter:
|
class NullFormatter:
|
||||||
|
"""A formatter which does nothing.
|
||||||
|
|
||||||
|
If the writer parameter is omitted, a NullWriter instance is created.
|
||||||
|
No methods of the writer are called by NullFormatter instances.
|
||||||
|
|
||||||
|
Implementations should inherit from this class if implementing a writer
|
||||||
|
interface but don't need to inherit any implementation.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, writer=None):
|
def __init__(self, writer=None):
|
||||||
if not writer:
|
if not writer:
|
||||||
|
@ -52,6 +61,13 @@ class NullFormatter:
|
||||||
|
|
||||||
|
|
||||||
class AbstractFormatter:
|
class AbstractFormatter:
|
||||||
|
"""The standard formatter.
|
||||||
|
|
||||||
|
This implementation has demonstrated wide applicability to many writers,
|
||||||
|
and may be used directly in most circumstances. It has been used to
|
||||||
|
implement a full-featured World Wide Web browser.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
# Space handling policy: blank spaces at the boundary between elements
|
# Space handling policy: blank spaces at the boundary between elements
|
||||||
# are handled by the outermost context. "Literal" data is not checked
|
# are handled by the outermost context. "Literal" data is not checked
|
||||||
|
@ -283,7 +299,13 @@ class AbstractFormatter:
|
||||||
|
|
||||||
|
|
||||||
class NullWriter:
|
class NullWriter:
|
||||||
"""Minimal writer interface to use in testing & inheritance."""
|
"""Minimal writer interface to use in testing & inheritance.
|
||||||
|
|
||||||
|
A writer which only provides the interface definition; no actions are
|
||||||
|
taken on any methods. This should be the base class for all writers
|
||||||
|
which do not need to inherit any implementation methods.
|
||||||
|
|
||||||
|
"""
|
||||||
def __init__(self): pass
|
def __init__(self): pass
|
||||||
def flush(self): pass
|
def flush(self): pass
|
||||||
def new_alignment(self, align): pass
|
def new_alignment(self, align): pass
|
||||||
|
@ -300,6 +322,12 @@ class NullWriter:
|
||||||
|
|
||||||
|
|
||||||
class AbstractWriter(NullWriter):
|
class AbstractWriter(NullWriter):
|
||||||
|
"""A writer which can be used in debugging formatters, but not much else.
|
||||||
|
|
||||||
|
Each method simply announces itself by printing its name and
|
||||||
|
arguments on standard output.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def new_alignment(self, align):
|
def new_alignment(self, align):
|
||||||
print "new_alignment(%s)" % `align`
|
print "new_alignment(%s)" % `align`
|
||||||
|
@ -336,6 +364,13 @@ class AbstractWriter(NullWriter):
|
||||||
|
|
||||||
|
|
||||||
class DumbWriter(NullWriter):
|
class DumbWriter(NullWriter):
|
||||||
|
"""Simple writer class which writes output on the file object passed in
|
||||||
|
as the file parameter or, if file is omitted, on standard output. The
|
||||||
|
output is simply word-wrapped to the number of columns specified by
|
||||||
|
the maxcol parameter. This class is suitable for reflowing a sequence
|
||||||
|
of paragraphs.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, file=None, maxcol=72):
|
def __init__(self, file=None, maxcol=72):
|
||||||
self.file = file or sys.stdout
|
self.file = file or sys.stdout
|
||||||
|
|
38
Lib/gzip.py
38
Lib/gzip.py
|
@ -27,14 +27,52 @@ def read32(input):
|
||||||
return struct.unpack("<l", input.read(4))[0]
|
return struct.unpack("<l", input.read(4))[0]
|
||||||
|
|
||||||
def open(filename, mode="rb", compresslevel=9):
|
def open(filename, mode="rb", compresslevel=9):
|
||||||
|
"""Shorthand for GzipFile(filename, mode, compresslevel).
|
||||||
|
|
||||||
|
The filename argument is required; mode defaults to 'rb'
|
||||||
|
and compresslevel defaults to 9.
|
||||||
|
|
||||||
|
"""
|
||||||
return GzipFile(filename, mode, compresslevel)
|
return GzipFile(filename, mode, compresslevel)
|
||||||
|
|
||||||
class GzipFile:
|
class GzipFile:
|
||||||
|
"""The GzipFile class simulates most of the methods of a file object with
|
||||||
|
the exception of the readinto(), truncate(), and xreadlines() methods.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
myfileobj = None
|
myfileobj = None
|
||||||
|
|
||||||
def __init__(self, filename=None, mode=None,
|
def __init__(self, filename=None, mode=None,
|
||||||
compresslevel=9, fileobj=None):
|
compresslevel=9, fileobj=None):
|
||||||
|
"""Constructor for the GzipFile class.
|
||||||
|
|
||||||
|
At least one of fileobj and filename must be given a
|
||||||
|
non-trivial value.
|
||||||
|
|
||||||
|
The new class instance is based on fileobj, which can be a regular
|
||||||
|
file, a StringIO object, or any other object which simulates a file.
|
||||||
|
It defaults to None, in which case filename is opened to provide
|
||||||
|
a file object.
|
||||||
|
|
||||||
|
When fileobj is not None, the filename argument is only used to be
|
||||||
|
included in the gzip file header, which may includes the original
|
||||||
|
filename of the uncompressed file. It defaults to the filename of
|
||||||
|
fileobj, if discernible; otherwise, it defaults to the empty string,
|
||||||
|
and in this case the original filename is not included in the header.
|
||||||
|
|
||||||
|
The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
|
||||||
|
depending on whether the file will be read or written. The default
|
||||||
|
is the mode of fileobj if discernible; otherwise, the default is 'rb'.
|
||||||
|
Be aware that only the 'rb', 'ab', and 'wb' values should be used
|
||||||
|
for cross-platform portability.
|
||||||
|
|
||||||
|
The compresslevel argument is an integer from 1 to 9 controlling the
|
||||||
|
level of compression; 1 is fastest and produces the least compression,
|
||||||
|
and 9 is slowest and produces the most compression. The default is 9.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
# guarantee the file is opened in binary mode on platforms
|
# guarantee the file is opened in binary mode on platforms
|
||||||
# that care about that sort of thing
|
# that care about that sort of thing
|
||||||
if mode and 'b' not in mode:
|
if mode and 'b' not in mode:
|
||||||
|
|
|
@ -11,10 +11,23 @@ from formatter import AS_IS
|
||||||
__all__ = ["HTMLParser"]
|
__all__ = ["HTMLParser"]
|
||||||
|
|
||||||
class HTMLParser(SGMLParser):
|
class HTMLParser(SGMLParser):
|
||||||
|
"""This is the basic HTML parser class.
|
||||||
|
|
||||||
|
It supports all entity names required by the HTML 2.0 specification
|
||||||
|
RFC 1866. It also defines handlers for all HTML 2.0 and many HTML 3.0
|
||||||
|
and 3.2 elements.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
from htmlentitydefs import entitydefs
|
from htmlentitydefs import entitydefs
|
||||||
|
|
||||||
def __init__(self, formatter, verbose=0):
|
def __init__(self, formatter, verbose=0):
|
||||||
|
"""Creates an instance of the HTMLParser class.
|
||||||
|
|
||||||
|
The formatter parameter is the formatter instance associated with
|
||||||
|
the parser.
|
||||||
|
|
||||||
|
"""
|
||||||
SGMLParser.__init__(self, verbose)
|
SGMLParser.__init__(self, verbose)
|
||||||
self.formatter = formatter
|
self.formatter = formatter
|
||||||
self.savedata = None
|
self.savedata = None
|
||||||
|
@ -43,9 +56,24 @@ class HTMLParser(SGMLParser):
|
||||||
# --- Hooks to save data; shouldn't need to be overridden
|
# --- Hooks to save data; shouldn't need to be overridden
|
||||||
|
|
||||||
def save_bgn(self):
|
def save_bgn(self):
|
||||||
|
"""Begins saving character data in a buffer instead of sending it
|
||||||
|
to the formatter object.
|
||||||
|
|
||||||
|
Retrieve the stored data via the save_end() method. Use of the
|
||||||
|
save_bgn() / save_end() pair may not be nested.
|
||||||
|
|
||||||
|
"""
|
||||||
self.savedata = ''
|
self.savedata = ''
|
||||||
|
|
||||||
def save_end(self):
|
def save_end(self):
|
||||||
|
"""Ends buffering character data and returns all data saved since
|
||||||
|
the preceding call to the save_bgn() method.
|
||||||
|
|
||||||
|
If the nofill flag is false, whitespace is collapsed to single
|
||||||
|
spaces. A call to this method without a preceding call to the
|
||||||
|
save_bgn() method will raise a TypeError exception.
|
||||||
|
|
||||||
|
"""
|
||||||
data = self.savedata
|
data = self.savedata
|
||||||
self.savedata = None
|
self.savedata = None
|
||||||
if not self.nofill:
|
if not self.nofill:
|
||||||
|
@ -55,11 +83,26 @@ class HTMLParser(SGMLParser):
|
||||||
# --- Hooks for anchors; should probably be overridden
|
# --- Hooks for anchors; should probably be overridden
|
||||||
|
|
||||||
def anchor_bgn(self, href, name, type):
|
def anchor_bgn(self, href, name, type):
|
||||||
|
"""This method is called at the start of an anchor region.
|
||||||
|
|
||||||
|
The arguments correspond to the attributes of the <A> tag with
|
||||||
|
the same names. The default implementation maintains a list of
|
||||||
|
hyperlinks (defined by the HREF attribute for <A> tags) within
|
||||||
|
the document. The list of hyperlinks is available as the data
|
||||||
|
attribute anchorlist.
|
||||||
|
|
||||||
|
"""
|
||||||
self.anchor = href
|
self.anchor = href
|
||||||
if self.anchor:
|
if self.anchor:
|
||||||
self.anchorlist.append(href)
|
self.anchorlist.append(href)
|
||||||
|
|
||||||
def anchor_end(self):
|
def anchor_end(self):
|
||||||
|
"""This method is called at the end of an anchor region.
|
||||||
|
|
||||||
|
The default implementation adds a textual footnote marker using an
|
||||||
|
index into the list of hyperlinks created by the anchor_bgn()method.
|
||||||
|
|
||||||
|
"""
|
||||||
if self.anchor:
|
if self.anchor:
|
||||||
self.handle_data("[%d]" % len(self.anchorlist))
|
self.handle_data("[%d]" % len(self.anchorlist))
|
||||||
self.anchor = None
|
self.anchor = None
|
||||||
|
@ -67,6 +110,12 @@ class HTMLParser(SGMLParser):
|
||||||
# --- Hook for images; should probably be overridden
|
# --- Hook for images; should probably be overridden
|
||||||
|
|
||||||
def handle_image(self, src, alt, *args):
|
def handle_image(self, src, alt, *args):
|
||||||
|
"""This method is called to handle images.
|
||||||
|
|
||||||
|
The default implementation simply passes the alt value to the
|
||||||
|
handle_data() method.
|
||||||
|
|
||||||
|
"""
|
||||||
self.handle_data(alt)
|
self.handle_data(alt)
|
||||||
|
|
||||||
# --------- Top level elememts
|
# --------- Top level elememts
|
||||||
|
|
|
@ -41,9 +41,31 @@ compatible_formats = ["1.0", "1.1", "1.2"] # Old format versions we can read
|
||||||
mdumps = marshal.dumps
|
mdumps = marshal.dumps
|
||||||
mloads = marshal.loads
|
mloads = marshal.loads
|
||||||
|
|
||||||
class PickleError(Exception): pass
|
class PickleError(Exception):
|
||||||
class PicklingError(PickleError): pass
|
"""A common base class for the other pickling exceptions.
|
||||||
class UnpicklingError(PickleError): pass
|
|
||||||
|
Inherits from \exception{Exception}.
|
||||||
|
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
class PicklingError(PickleError):
|
||||||
|
"""This exception is raised when an unpicklable object is passed to the
|
||||||
|
dump() method.
|
||||||
|
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
class UnpicklingError(PickleError):
|
||||||
|
"""This exception is raised when there is a problem unpickling an object,
|
||||||
|
such as a security violation.
|
||||||
|
|
||||||
|
Note that other exceptions may also be raised during unpickling, including
|
||||||
|
(but not necessarily limited to) AttributeError, EOFError, ImportError,
|
||||||
|
and IndexError.
|
||||||
|
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
class _Stop(Exception):
|
class _Stop(Exception):
|
||||||
def __init__(self, value):
|
def __init__(self, value):
|
||||||
|
@ -111,14 +133,39 @@ del x
|
||||||
class Pickler:
|
class Pickler:
|
||||||
|
|
||||||
def __init__(self, file, bin = 0):
|
def __init__(self, file, bin = 0):
|
||||||
|
"""This takes a file-like object for writing a pickle data stream.
|
||||||
|
|
||||||
|
The optional bin parameter if true, tells the pickler to use the more
|
||||||
|
efficient binary pickle format, otherwise the ASCII format is used
|
||||||
|
(this is the default).
|
||||||
|
|
||||||
|
The file parameter must have a write() method that accepts a single
|
||||||
|
string argument. It can thus be an open file object, a StringIO
|
||||||
|
object, or any other custom object that meets this interface.
|
||||||
|
|
||||||
|
"""
|
||||||
self.write = file.write
|
self.write = file.write
|
||||||
self.memo = {}
|
self.memo = {}
|
||||||
self.bin = bin
|
self.bin = bin
|
||||||
|
|
||||||
def clear_memo(self):
|
def clear_memo(self):
|
||||||
|
"""Clears the pickler's "memo".
|
||||||
|
|
||||||
|
The memo is the data structure that remembers which objects the
|
||||||
|
pickler has already seen, so that shared or recursive objects pickled
|
||||||
|
by reference and not by value. This method is useful when re-using
|
||||||
|
picklers.
|
||||||
|
|
||||||
|
"""
|
||||||
self.memo.clear()
|
self.memo.clear()
|
||||||
|
|
||||||
def dump(self, object):
|
def dump(self, object):
|
||||||
|
"""Write a pickled representation of object to the open file object.
|
||||||
|
|
||||||
|
Either the binary or ASCII format will be used, depending on the
|
||||||
|
value of the bin flag passed to the constructor.
|
||||||
|
|
||||||
|
"""
|
||||||
self.save(object)
|
self.save(object)
|
||||||
self.write(STOP)
|
self.write(STOP)
|
||||||
|
|
||||||
|
@ -594,11 +641,30 @@ def whichmodule(cls, clsname):
|
||||||
class Unpickler:
|
class Unpickler:
|
||||||
|
|
||||||
def __init__(self, file):
|
def __init__(self, file):
|
||||||
|
"""This takes a file-like object for reading a pickle data stream.
|
||||||
|
|
||||||
|
This class automatically determines whether the data stream was
|
||||||
|
written in binary mode or not, so it does not need a flag as in
|
||||||
|
the Pickler class factory.
|
||||||
|
|
||||||
|
The file-like object must have two methods, a read() method that
|
||||||
|
takes an integer argument, and a readline() method that requires no
|
||||||
|
arguments. Both methods should return a string. Thus file-like
|
||||||
|
object can be a file object opened for reading, a StringIO object,
|
||||||
|
or any other custom object that meets this interface.
|
||||||
|
|
||||||
|
"""
|
||||||
self.readline = file.readline
|
self.readline = file.readline
|
||||||
self.read = file.read
|
self.read = file.read
|
||||||
self.memo = {}
|
self.memo = {}
|
||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
|
"""Read a pickled object representation from the open file object.
|
||||||
|
|
||||||
|
Return the reconstituted object hierarchy specified in the file
|
||||||
|
object.
|
||||||
|
|
||||||
|
"""
|
||||||
self.mark = object() # any new unique object
|
self.mark = object() # any new unique object
|
||||||
self.stack = []
|
self.stack = []
|
||||||
self.append = self.stack.append
|
self.append = self.stack.append
|
||||||
|
|
161
Lib/rexec.py
161
Lib/rexec.py
|
@ -114,8 +114,18 @@ RModuleImporter = ihooks.ModuleImporter
|
||||||
|
|
||||||
|
|
||||||
class RExec(ihooks._Verbose):
|
class RExec(ihooks._Verbose):
|
||||||
|
"""Basic restricted execution framework.
|
||||||
|
|
||||||
"""Restricted Execution environment."""
|
Code executed in this restricted environment will only have access to
|
||||||
|
modules and functions that are deemed safe; you can subclass RExec to
|
||||||
|
add or remove capabilities as desired.
|
||||||
|
|
||||||
|
The RExec class can prevent code from performing unsafe operations like
|
||||||
|
reading or writing disk files, or using TCP/IP sockets. However, it does
|
||||||
|
not protect against code using extremely large amounts of memory or
|
||||||
|
processor time.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
ok_path = tuple(sys.path) # That's a policy decision
|
ok_path = tuple(sys.path) # That's a policy decision
|
||||||
|
|
||||||
|
@ -135,6 +145,33 @@ class RExec(ihooks._Verbose):
|
||||||
nok_builtin_names = ('open', 'file', 'reload', '__import__')
|
nok_builtin_names = ('open', 'file', 'reload', '__import__')
|
||||||
|
|
||||||
def __init__(self, hooks = None, verbose = 0):
|
def __init__(self, hooks = None, verbose = 0):
|
||||||
|
"""Returns an instance of the RExec class.
|
||||||
|
|
||||||
|
The hooks parameter is an instance of the RHooks class or a subclass
|
||||||
|
of it. If it is omitted or None, the default RHooks class is
|
||||||
|
instantiated.
|
||||||
|
|
||||||
|
Whenever the RExec module searches for a module (even a built-in one)
|
||||||
|
or reads a module's code, it doesn't actually go out to the file
|
||||||
|
system itself. Rather, it calls methods of an RHooks instance that
|
||||||
|
was passed to or created by its constructor. (Actually, the RExec
|
||||||
|
object doesn't make these calls --- they are made by a module loader
|
||||||
|
object that's part of the RExec object. This allows another level of
|
||||||
|
flexibility, which can be useful when changing the mechanics of
|
||||||
|
import within the restricted environment.)
|
||||||
|
|
||||||
|
By providing an alternate RHooks object, we can control the file
|
||||||
|
system accesses made to import a module, without changing the
|
||||||
|
actual algorithm that controls the order in which those accesses are
|
||||||
|
made. For instance, we could substitute an RHooks object that
|
||||||
|
passes all filesystem requests to a file server elsewhere, via some
|
||||||
|
RPC mechanism such as ILU. Grail's applet loader uses this to support
|
||||||
|
importing applets from a URL for a directory.
|
||||||
|
|
||||||
|
If the verbose parameter is true, additional debugging output may be
|
||||||
|
sent to standard output.
|
||||||
|
|
||||||
|
"""
|
||||||
ihooks._Verbose.__init__(self, verbose)
|
ihooks._Verbose.__init__(self, verbose)
|
||||||
# XXX There's a circular reference here:
|
# XXX There's a circular reference here:
|
||||||
self.hooks = hooks or RHooks(verbose)
|
self.hooks = hooks or RHooks(verbose)
|
||||||
|
@ -250,24 +287,67 @@ class RExec(ihooks._Verbose):
|
||||||
# The r* methods are public interfaces
|
# The r* methods are public interfaces
|
||||||
|
|
||||||
def r_exec(self, code):
|
def r_exec(self, code):
|
||||||
|
"""Execute code within a restricted environment.
|
||||||
|
|
||||||
|
The code parameter must either be a string containing one or more
|
||||||
|
lines of Python code, or a compiled code object, which will be
|
||||||
|
executed in the restricted environment's __main__ module.
|
||||||
|
|
||||||
|
"""
|
||||||
m = self.add_module('__main__')
|
m = self.add_module('__main__')
|
||||||
exec code in m.__dict__
|
exec code in m.__dict__
|
||||||
|
|
||||||
def r_eval(self, code):
|
def r_eval(self, code):
|
||||||
|
"""Evaluate code within a restricted environment.
|
||||||
|
|
||||||
|
The code parameter must either be a string containing a Python
|
||||||
|
expression, or a compiled code object, which will be evaluated in
|
||||||
|
the restricted environment's __main__ module. The value of the
|
||||||
|
expression or code object will be returned.
|
||||||
|
|
||||||
|
"""
|
||||||
m = self.add_module('__main__')
|
m = self.add_module('__main__')
|
||||||
return eval(code, m.__dict__)
|
return eval(code, m.__dict__)
|
||||||
|
|
||||||
def r_execfile(self, file):
|
def r_execfile(self, file):
|
||||||
|
"""Execute the Python code in the file in the restricted
|
||||||
|
environment's __main__ module.
|
||||||
|
|
||||||
|
"""
|
||||||
m = self.add_module('__main__')
|
m = self.add_module('__main__')
|
||||||
execfile(file, m.__dict__)
|
execfile(file, m.__dict__)
|
||||||
|
|
||||||
def r_import(self, mname, globals={}, locals={}, fromlist=[]):
|
def r_import(self, mname, globals={}, locals={}, fromlist=[]):
|
||||||
|
"""Import a module, raising an ImportError exception if the module
|
||||||
|
is considered unsafe.
|
||||||
|
|
||||||
|
This method is implicitly called by code executing in the
|
||||||
|
restricted environment. Overriding this method in a subclass is
|
||||||
|
used to change the policies enforced by a restricted environment.
|
||||||
|
|
||||||
|
"""
|
||||||
return self.importer.import_module(mname, globals, locals, fromlist)
|
return self.importer.import_module(mname, globals, locals, fromlist)
|
||||||
|
|
||||||
def r_reload(self, m):
|
def r_reload(self, m):
|
||||||
|
"""Reload the module object, re-parsing and re-initializing it.
|
||||||
|
|
||||||
|
This method is implicitly called by code executing in the
|
||||||
|
restricted environment. Overriding this method in a subclass is
|
||||||
|
used to change the policies enforced by a restricted environment.
|
||||||
|
|
||||||
|
"""
|
||||||
return self.importer.reload(m)
|
return self.importer.reload(m)
|
||||||
|
|
||||||
def r_unload(self, m):
|
def r_unload(self, m):
|
||||||
|
"""Unload the module.
|
||||||
|
|
||||||
|
Removes it from the restricted environment's sys.modules dictionary.
|
||||||
|
|
||||||
|
This method is implicitly called by code executing in the
|
||||||
|
restricted environment. Overriding this method in a subclass is
|
||||||
|
used to change the policies enforced by a restricted environment.
|
||||||
|
|
||||||
|
"""
|
||||||
return self.importer.unload(m)
|
return self.importer.unload(m)
|
||||||
|
|
||||||
# The s_* methods are similar but also swap std{in,out,err}
|
# The s_* methods are similar but also swap std{in,out,err}
|
||||||
|
@ -325,26 +405,105 @@ class RExec(ihooks._Verbose):
|
||||||
return r
|
return r
|
||||||
|
|
||||||
def s_exec(self, *args):
|
def s_exec(self, *args):
|
||||||
|
"""Execute code within a restricted environment.
|
||||||
|
|
||||||
|
Similar to the r_exec() method, but the code will be granted access
|
||||||
|
to restricted versions of the standard I/O streams sys.stdin,
|
||||||
|
sys.stderr, and sys.stdout.
|
||||||
|
|
||||||
|
The code parameter must either be a string containing one or more
|
||||||
|
lines of Python code, or a compiled code object, which will be
|
||||||
|
executed in the restricted environment's __main__ module.
|
||||||
|
|
||||||
|
"""
|
||||||
return self.s_apply(self.r_exec, args)
|
return self.s_apply(self.r_exec, args)
|
||||||
|
|
||||||
def s_eval(self, *args):
|
def s_eval(self, *args):
|
||||||
|
"""Evaluate code within a restricted environment.
|
||||||
|
|
||||||
|
Similar to the r_eval() method, but the code will be granted access
|
||||||
|
to restricted versions of the standard I/O streams sys.stdin,
|
||||||
|
sys.stderr, and sys.stdout.
|
||||||
|
|
||||||
|
The code parameter must either be a string containing a Python
|
||||||
|
expression, or a compiled code object, which will be evaluated in
|
||||||
|
the restricted environment's __main__ module. The value of the
|
||||||
|
expression or code object will be returned.
|
||||||
return self.s_apply(self.r_eval, args)
|
return self.s_apply(self.r_eval, args)
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def s_execfile(self, *args):
|
def s_execfile(self, *args):
|
||||||
|
"""Execute the Python code in the file in the restricted
|
||||||
|
environment's __main__ module.
|
||||||
|
|
||||||
|
Similar to the r_execfile() method, but the code will be granted
|
||||||
|
access to restricted versions of the standard I/O streams sys.stdin,
|
||||||
|
sys.stderr, and sys.stdout.
|
||||||
|
|
||||||
|
"""
|
||||||
return self.s_apply(self.r_execfile, args)
|
return self.s_apply(self.r_execfile, args)
|
||||||
|
|
||||||
def s_import(self, *args):
|
def s_import(self, *args):
|
||||||
|
"""Import a module, raising an ImportError exception if the module
|
||||||
|
is considered unsafe.
|
||||||
|
|
||||||
|
This method is implicitly called by code executing in the
|
||||||
|
restricted environment. Overriding this method in a subclass is
|
||||||
|
used to change the policies enforced by a restricted environment.
|
||||||
|
|
||||||
|
Similar to the r_import() method, but has access to restricted
|
||||||
|
versions of the standard I/O streams sys.stdin, sys.stderr, and
|
||||||
|
sys.stdout.
|
||||||
|
|
||||||
|
"""
|
||||||
return self.s_apply(self.r_import, args)
|
return self.s_apply(self.r_import, args)
|
||||||
|
|
||||||
def s_reload(self, *args):
|
def s_reload(self, *args):
|
||||||
|
"""Reload the module object, re-parsing and re-initializing it.
|
||||||
|
|
||||||
|
This method is implicitly called by code executing in the
|
||||||
|
restricted environment. Overriding this method in a subclass is
|
||||||
|
used to change the policies enforced by a restricted environment.
|
||||||
|
|
||||||
|
Similar to the r_reload() method, but has access to restricted
|
||||||
|
versions of the standard I/O streams sys.stdin, sys.stderr, and
|
||||||
|
sys.stdout.
|
||||||
|
|
||||||
|
"""
|
||||||
return self.s_apply(self.r_reload, args)
|
return self.s_apply(self.r_reload, args)
|
||||||
|
|
||||||
def s_unload(self, *args):
|
def s_unload(self, *args):
|
||||||
|
"""Unload the module.
|
||||||
|
|
||||||
|
Removes it from the restricted environment's sys.modules dictionary.
|
||||||
|
|
||||||
|
This method is implicitly called by code executing in the
|
||||||
|
restricted environment. Overriding this method in a subclass is
|
||||||
|
used to change the policies enforced by a restricted environment.
|
||||||
|
|
||||||
|
Similar to the r_unload() method, but has access to restricted
|
||||||
|
versions of the standard I/O streams sys.stdin, sys.stderr, and
|
||||||
|
sys.stdout.
|
||||||
|
|
||||||
|
"""
|
||||||
return self.s_apply(self.r_unload, args)
|
return self.s_apply(self.r_unload, args)
|
||||||
|
|
||||||
# Restricted open(...)
|
# Restricted open(...)
|
||||||
|
|
||||||
def r_open(self, file, mode='r', buf=-1):
|
def r_open(self, file, mode='r', buf=-1):
|
||||||
|
"""Method called when open() is called in the restricted environment.
|
||||||
|
|
||||||
|
The arguments are identical to those of the open() function, and a
|
||||||
|
file object (or a class instance compatible with file objects)
|
||||||
|
should be returned. RExec's default behaviour is allow opening
|
||||||
|
any file for reading, but forbidding any attempt to write a file.
|
||||||
|
|
||||||
|
This method is implicitly called by code executing in the
|
||||||
|
restricted environment. Overriding this method in a subclass is
|
||||||
|
used to change the policies enforced by a restricted environment.
|
||||||
|
|
||||||
|
"""
|
||||||
if mode not in ('r', 'rb'):
|
if mode not in ('r', 'rb'):
|
||||||
raise IOError, "can't open files for writing in restricted mode"
|
raise IOError, "can't open files for writing in restricted mode"
|
||||||
return open(file, mode, buf)
|
return open(file, mode, buf)
|
||||||
|
|
|
@ -20,6 +20,11 @@ def _debug(msg):
|
||||||
|
|
||||||
|
|
||||||
class RobotFileParser:
|
class RobotFileParser:
|
||||||
|
""" This class provides a set of methods to read, parse and answer
|
||||||
|
questions about a single robots.txt file.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, url=''):
|
def __init__(self, url=''):
|
||||||
self.entries = []
|
self.entries = []
|
||||||
self.default_entry = None
|
self.default_entry = None
|
||||||
|
@ -29,17 +34,29 @@ class RobotFileParser:
|
||||||
self.last_checked = 0
|
self.last_checked = 0
|
||||||
|
|
||||||
def mtime(self):
|
def mtime(self):
|
||||||
|
"""Returns the time the robots.txt file was last fetched.
|
||||||
|
|
||||||
|
This is useful for long-running web spiders that need to
|
||||||
|
check for new robots.txt files periodically.
|
||||||
|
|
||||||
|
"""
|
||||||
return self.last_checked
|
return self.last_checked
|
||||||
|
|
||||||
def modified(self):
|
def modified(self):
|
||||||
|
"""Sets the time the robots.txt file was last fetched to the
|
||||||
|
current time.
|
||||||
|
|
||||||
|
"""
|
||||||
import time
|
import time
|
||||||
self.last_checked = time.time()
|
self.last_checked = time.time()
|
||||||
|
|
||||||
def set_url(self, url):
|
def set_url(self, url):
|
||||||
|
"""Sets the URL referring to a robots.txt file."""
|
||||||
self.url = url
|
self.url = url
|
||||||
self.host, self.path = urlparse.urlparse(url)[1:3]
|
self.host, self.path = urlparse.urlparse(url)[1:3]
|
||||||
|
|
||||||
def read(self):
|
def read(self):
|
||||||
|
"""Reads the robots.txt URL and feeds it to the parser."""
|
||||||
opener = URLopener()
|
opener = URLopener()
|
||||||
f = opener.open(self.url)
|
f = opener.open(self.url)
|
||||||
lines = []
|
lines = []
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue