#4661: add bytes parsing and generation to email (email version bump to 5.1.0)

The work on this is not 100% complete, but everything is present to
allow real-world testing of the code.  The only remaining major todo
item is to (hopefully!) enhance the handling of non-ASCII bytes in headers
converted to unicode by RFC2047 encoding them rather than replacing them with
'?'s.
This commit is contained in:
R. David Murray 2010-10-08 15:55:28 +00:00
parent 59fdd6736b
commit 96fd54eaec
11 changed files with 708 additions and 85 deletions

View file

@ -7,7 +7,7 @@
__all__ = ['Parser', 'HeaderParser']
import warnings
from io import StringIO
from io import StringIO, TextIOWrapper
from email.feedparser import FeedParser
from email.message import Message
@ -89,3 +89,47 @@ class HeaderParser(Parser):
def parsestr(self, text, headersonly=True):
return Parser.parsestr(self, text, True)
class BytesParser:
def __init__(self, *args, **kw):
"""Parser of binary RFC 2822 and MIME email messages.
Creates an in-memory object tree representing the email message, which
can then be manipulated and turned over to a Generator to return the
textual representation of the message.
The input must be formatted as a block of RFC 2822 headers and header
continuation lines, optionally preceeded by a `Unix-from' header. The
header block is terminated either by the end of the input or by a
blank line.
_class is the class to instantiate for new message objects when they
must be created. This class must have a constructor that can take
zero arguments. Default is Message.Message.
"""
self.parser = Parser(*args, **kw)
def parse(self, fp, headersonly=False):
"""Create a message structure from the data in a binary file.
Reads all the data from the file and returns the root of the message
structure. Optional headersonly is a flag specifying whether to stop
parsing after reading the headers or not. The default is False,
meaning it parses the entire contents of the file.
"""
fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
return self.parser.parse(fp, headersonly)
def parsebytes(self, text, headersonly=False):
"""Create a message structure from a byte string.
Returns the root of the message structure. Optional headersonly is a
flag specifying whether to stop parsing after reading the headers or
not. The default is False, meaning it parses the entire contents of
the file.
"""
text = text.decode('ASCII', errors='surrogateescape')
return self.parser.parsestr(text, headersonly)