mirror of
https://github.com/python/cpython.git
synced 2025-08-31 05:58:33 +00:00
Sync'ing with standalone email package 2.0.1. This adds support for
non-us-ascii character sets in headers and bodies. Some API changes (with DeprecationWarnings for the old APIs). Better RFC-compliant implementations of base64 and quoted-printable. Updated test cases. Documentation updates to follow (after I finish writing them ;).
This commit is contained in:
parent
68e69338ae
commit
409a4c08b5
20 changed files with 2209 additions and 143 deletions
327
Lib/email/Charset.py
Normal file
327
Lib/email/Charset.py
Normal file
|
@ -0,0 +1,327 @@
|
|||
# Copyright (C) 2001,2002 Python Software Foundation
|
||||
# Author: che@debian.org (Ben Gertzfield)
|
||||
|
||||
from types import UnicodeType
|
||||
from email.Encoders import encode_7or8bit
|
||||
import email.base64MIME
|
||||
import email.quopriMIME
|
||||
|
||||
|
||||
|
||||
# Flags for types of header encodings
|
||||
QP = 1 # Quoted-Printable
|
||||
BASE64 = 2 # Base64
|
||||
|
||||
# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
|
||||
MISC_LEN = 7
|
||||
|
||||
DEFAULT_CHARSET = 'us-ascii'
|
||||
|
||||
|
||||
|
||||
# Defaults
|
||||
CHARSETS = {
|
||||
# input header enc body enc output conv
|
||||
'iso-8859-1': (QP, QP, None),
|
||||
'iso-8859-2': (QP, QP, None),
|
||||
'us-ascii': (None, None, None),
|
||||
'big5': (BASE64, BASE64, None),
|
||||
'gb2312': (BASE64, BASE64, None),
|
||||
'euc-jp': (BASE64, None, 'iso-2022-jp'),
|
||||
'shift_jis': (BASE64, None, 'iso-2022-jp'),
|
||||
'iso-2022-jp': (BASE64, None, None),
|
||||
'koi8-r': (BASE64, BASE64, None),
|
||||
'utf-8': (BASE64, BASE64, 'utf-8'),
|
||||
}
|
||||
|
||||
# Aliases for other commonly-used names for character sets. Map
|
||||
# them to the real ones used in email.
|
||||
ALIASES = {
|
||||
'latin_1': 'iso-8859-1',
|
||||
'latin-1': 'iso-8859-1',
|
||||
'ascii': 'us-ascii',
|
||||
}
|
||||
|
||||
# Map charsets to their Unicode codec strings. Note that the Japanese
|
||||
# examples included below do not (yet) come with Python! They are available
|
||||
# from http://pseudo.grad.sccs.chukyo-u.ac.jp/~kajiyama/python/
|
||||
|
||||
# The Chinese and Korean codecs are available from SourceForge:
|
||||
#
|
||||
# http://sourceforge.net/projects/python-codecs/
|
||||
#
|
||||
# although you'll need to check them out of cvs since they haven't been file
|
||||
# released yet. You might also try to use
|
||||
#
|
||||
# http://www.freshports.org/port-description.php3?port=6702
|
||||
#
|
||||
# if you can get logged in. AFAICT, both the Chinese and Korean codecs are
|
||||
# fairly experimental at this point.
|
||||
CODEC_MAP = {
|
||||
'euc-jp': 'japanese.euc-jp',
|
||||
'iso-2022-jp': 'japanese.iso-2022-jp',
|
||||
'shift_jis': 'japanese.shift_jis',
|
||||
'gb2132': 'eucgb2312_cn',
|
||||
'big5': 'big5_tw',
|
||||
'utf-8': 'utf-8',
|
||||
# Hack: We don't want *any* conversion for stuff marked us-ascii, as all
|
||||
# sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
|
||||
# Let that stuff pass through without conversion to/from Unicode.
|
||||
'us-ascii': None,
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Convenience functions for extending the above mappings
|
||||
def add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
|
||||
"""Add charset properties to the global map.
|
||||
|
||||
charset is the input character set, and must be the canonical name of a
|
||||
character set.
|
||||
|
||||
Optional header_enc and body_enc is either Charset.QP for
|
||||
quoted-printable, Charset.BASE64 for base64 encoding, or None for no
|
||||
encoding. It describes how message headers and message bodies in the
|
||||
input charset are to be encoded. Default is no encoding.
|
||||
|
||||
Optional output_charset is the character set that the output should be
|
||||
in. Conversions will proceed from input charset, to Unicode, to the
|
||||
output charset when the method Charset.convert() is called. The default
|
||||
is to output in the same character set as the input.
|
||||
|
||||
Both input_charset and output_charset must have Unicode codec entries in
|
||||
the module's charset-to-codec mapping; use add_codec(charset, codecname)
|
||||
to add codecs the module does not know about. See the codec module's
|
||||
documentation for more information.
|
||||
"""
|
||||
CHARSETS[charset] = (header_enc, body_enc, output_charset)
|
||||
|
||||
|
||||
def add_alias(alias, canonical):
|
||||
"""Add a character set alias.
|
||||
|
||||
alias is the alias name, e.g. latin-1
|
||||
canonical is the character set's canonical name, e.g. iso-8859-1
|
||||
"""
|
||||
ALIASES[alias] = canonical
|
||||
|
||||
|
||||
def add_codec(charset, codecname):
|
||||
"""Add a codec that map characters in the given charset to/from Unicode.
|
||||
|
||||
charset is the canonical name of a character set. codecname is the name
|
||||
of a Python codec, as appropriate for the second argument to the unicode()
|
||||
built-in, or to the .encode() method of a Unicode string.
|
||||
"""
|
||||
CODEC_MAP[charset] = codecname
|
||||
|
||||
|
||||
|
||||
class Charset:
|
||||
"""Map character sets to their email properties.
|
||||
|
||||
This class provides information about the requirements imposed on email
|
||||
for a specific character set. It also provides convenience routines for
|
||||
converting between character sets, given the availability of the
|
||||
applicable codecs. Given an character set, it will do its best to provide
|
||||
information on how to use that character set in an email.
|
||||
|
||||
Certain character sets must be encoded with quoted-printable or base64
|
||||
when used in email headers or bodies. Certain character sets must be
|
||||
converted outright, and are not allowed in email. Instances of this
|
||||
module expose the following information about a character set:
|
||||
|
||||
input_charset: The initial character set specified. Common aliases
|
||||
are converted to their `official' email names (e.g. latin_1
|
||||
is converted to iso-8859-1). Defaults to 7-bit us-ascii.
|
||||
|
||||
header_encoding: If the character set must be encoded before it can be
|
||||
used in an email header, this attribute will be set to
|
||||
Charset.QP (for quoted-printable) or Charset.BASE64 (for
|
||||
base64 encoding). Otherwise, it will be None.
|
||||
|
||||
body_encoding: Same as header_encoding, but describes the encoding for the
|
||||
mail message's body, which indeed may be different than the
|
||||
header encoding.
|
||||
|
||||
output_charset: Some character sets must be converted before the can be
|
||||
used in email headers or bodies. If the input_charset is
|
||||
one of them, this attribute will contain the name of the
|
||||
charset output will be converted to. Otherwise, it will
|
||||
be None.
|
||||
|
||||
input_codec: The name of the Python codec used to convert the
|
||||
input_charset to Unicode. If no conversion codec is
|
||||
necessary, this attribute will be None.
|
||||
|
||||
output_codec: The name of the Python codec used to convert Unicode
|
||||
to the output_charset. If no conversion codec is necessary,
|
||||
this attribute will have the same value as the input_codec.
|
||||
"""
|
||||
def __init__(self, input_charset=DEFAULT_CHARSET):
|
||||
# Set the input charset after filtering through the aliases
|
||||
self.input_charset = ALIASES.get(input_charset, input_charset)
|
||||
# We can try to guess which encoding and conversion to use by the
|
||||
# charset_map dictionary. Try that first, but let the user override
|
||||
# it.
|
||||
henc, benc, conv = CHARSETS.get(self.input_charset,
|
||||
(BASE64, BASE64, None))
|
||||
# Set the attributes, allowing the arguments to override the default.
|
||||
self.header_encoding = henc
|
||||
self.body_encoding = benc
|
||||
self.output_charset = ALIASES.get(conv, conv)
|
||||
# Now set the codecs. If one isn't defined for input_charset,
|
||||
# guess and try a Unicode codec with the same name as input_codec.
|
||||
self.input_codec = CODEC_MAP.get(self.input_charset,
|
||||
self.input_charset)
|
||||
self.output_codec = CODEC_MAP.get(self.output_charset,
|
||||
self.input_codec)
|
||||
|
||||
def __str__(self):
|
||||
return self.input_charset.lower()
|
||||
|
||||
def __eq__(self, other):
|
||||
return str(self) == str(other).lower()
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def get_body_encoding(self):
|
||||
"""Return the content-transfer-encoding used for body encoding.
|
||||
|
||||
This is either the string `quoted-printable' or `base64' depending on
|
||||
the encoding used, or it is a function in which case you should call
|
||||
the function with a single argument, the Message object being
|
||||
encoded. The function should then set the Content-Transfer-Encoding:
|
||||
header itself to whatever is appropriate.
|
||||
|
||||
Returns "quoted-printable" if self.body_encoding is QP.
|
||||
Returns "base64" if self.body_encoding is BASE64.
|
||||
Returns "7bit" otherwise.
|
||||
"""
|
||||
if self.body_encoding == QP:
|
||||
return 'quoted-printable'
|
||||
elif self.body_encoding == BASE64:
|
||||
return 'base64'
|
||||
else:
|
||||
return encode_7or8bit
|
||||
|
||||
def convert(self, s):
|
||||
"""Convert a string from the input_codec to the output_codec."""
|
||||
if self.input_codec <> self.output_codec:
|
||||
return unicode(s, self.input_codec).encode(self.output_codec)
|
||||
else:
|
||||
return s
|
||||
|
||||
def to_splittable(self, s):
|
||||
"""Convert a possibly multibyte string to a safely splittable format.
|
||||
|
||||
Uses the input_codec to try and convert the string to Unicode, so it
|
||||
can be safely split on character boundaries (even for double-byte
|
||||
characters).
|
||||
|
||||
Returns the string untouched if we don't know how to convert it to
|
||||
Unicode with the input_charset.
|
||||
|
||||
Characters that could not be converted to Unicode will be replaced
|
||||
with the Unicode replacement character U+FFFD.
|
||||
"""
|
||||
if isinstance(s, UnicodeType) or self.input_codec is None:
|
||||
return s
|
||||
try:
|
||||
return unicode(s, self.input_codec, 'replace')
|
||||
except LookupError:
|
||||
# Input codec not installed on system, so return the original
|
||||
# string unchanged.
|
||||
return s
|
||||
|
||||
def from_splittable(self, ustr, to_output=1):
|
||||
"""Convert a splittable string back into an encoded string.
|
||||
|
||||
Uses the proper codec to try and convert the string from
|
||||
Unicode back into an encoded format. Return the string as-is
|
||||
if it is not Unicode, or if it could not be encoded from
|
||||
Unicode.
|
||||
|
||||
Characters that could not be converted from Unicode will be replaced
|
||||
with an appropriate character (usually '?').
|
||||
|
||||
If to_output is true, uses output_codec to convert to an encoded
|
||||
format. If to_output is false, uses input_codec. to_output defaults
|
||||
to 1.
|
||||
"""
|
||||
if to_output:
|
||||
codec = self.output_codec
|
||||
else:
|
||||
codec = self.input_codec
|
||||
if not isinstance(ustr, UnicodeType) or codec is None:
|
||||
return ustr
|
||||
try:
|
||||
return ustr.encode(codec, 'replace')
|
||||
except LookupError:
|
||||
# Output codec not installed
|
||||
return ustr
|
||||
|
||||
def get_output_charset(self):
|
||||
"""Return the output character set.
|
||||
|
||||
This is self.output_charset if that is set, otherwise it is
|
||||
self.input_charset.
|
||||
"""
|
||||
return self.output_charset or self.input_charset
|
||||
|
||||
def encoded_header_len(self, s):
|
||||
"""Return the length of the encoded header string."""
|
||||
cset = self.get_output_charset()
|
||||
# The len(s) of a 7bit encoding is len(s)
|
||||
if self.header_encoding is BASE64:
|
||||
return email.base64MIME.base64_len(s) + len(cset) + MISC_LEN
|
||||
elif self.header_encoding is QP:
|
||||
return email.quopriMIME.header_quopri_len(s) + len(cset) + MISC_LEN
|
||||
else:
|
||||
return len(s)
|
||||
|
||||
def header_encode(self, s, convert=0):
|
||||
"""Header-encode a string, optionally converting it to output_charset.
|
||||
|
||||
If convert is true, the string will be converted from the input
|
||||
charset to the output charset automatically. This is not useful for
|
||||
multibyte character sets, which have line length issues (multibyte
|
||||
characters must be split on a character, not a byte boundary); use the
|
||||
high-level Header class to deal with these issues. convert defaults
|
||||
to 0.
|
||||
|
||||
The type of encoding (base64 or quoted-printable) will be based on
|
||||
self.header_encoding.
|
||||
"""
|
||||
cset = self.get_output_charset()
|
||||
if convert:
|
||||
s = self.convert(s)
|
||||
# 7bit/8bit encodings return the string unchanged (modulo conversions)
|
||||
if self.header_encoding is BASE64:
|
||||
return email.base64MIME.header_encode(s, cset)
|
||||
elif self.header_encoding is QP:
|
||||
return email.quopriMIME.header_encode(s, cset)
|
||||
else:
|
||||
return s
|
||||
|
||||
def body_encode(self, s, convert=1):
|
||||
"""Body-encode a string and convert it to output_charset.
|
||||
|
||||
If convert is true (the default), the string will be converted from
|
||||
the input charset to output charset automatically. Unlike
|
||||
header_encode(), there are no issues with byte boundaries and
|
||||
multibyte charsets in email bodies, so this is usually pretty safe.
|
||||
|
||||
The type of encoding (base64 or quoted-printable) will be based on
|
||||
self.body_encoding.
|
||||
"""
|
||||
if convert:
|
||||
s = self.convert(s)
|
||||
# 7bit/8bit encodings return the string unchanged (module conversions)
|
||||
if self.body_encoding is BASE64:
|
||||
return email.base64MIME.body_encode(s)
|
||||
elif self.header_encoding is QP:
|
||||
return email.quopriMIME.body_encode(s)
|
||||
else:
|
||||
return s
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2001 Python Software Foundation
|
||||
# Copyright (C) 2001,2002 Python Software Foundation
|
||||
# Author: barry@zope.com (Barry Warsaw)
|
||||
|
||||
"""Module containing encoding functions for Image.Image and Text.Text.
|
||||
|
@ -11,7 +11,9 @@ from quopri import encodestring as _encodestring
|
|||
|
||||
# Helpers
|
||||
def _qencode(s):
|
||||
return _encodestring(s, quotetabs=1)
|
||||
enc = _encodestring(s, quotetabs=1)
|
||||
# Must encode spaces, which quopri.encodestring() doesn't do
|
||||
return enc.replace(' ', '=20')
|
||||
|
||||
|
||||
def _bencode(s):
|
||||
|
@ -54,6 +56,10 @@ def encode_quopri(msg):
|
|||
def encode_7or8bit(msg):
|
||||
"""Set the Content-Transfer-Encoding: header to 7bit or 8bit."""
|
||||
orig = msg.get_payload()
|
||||
if orig is None:
|
||||
# There's no payload. For backwards compatibility we use 7bit
|
||||
msg['Content-Transfer-Encoding'] = '7bit'
|
||||
return
|
||||
# We play a trick to make this go fast. If encoding to ASCII succeeds, we
|
||||
# know the data must be 7bit, otherwise treat it as 8bit.
|
||||
try:
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2001 Python Software Foundation
|
||||
# Copyright (C) 2001,2002 Python Software Foundation
|
||||
# Author: barry@zope.com (Barry Warsaw)
|
||||
|
||||
"""email package exception classes.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2001 Python Software Foundation
|
||||
# Copyright (C) 2001,2002 Python Software Foundation
|
||||
# Author: barry@zope.com (Barry Warsaw)
|
||||
|
||||
"""Classes to generate plain text from a message object tree.
|
||||
|
@ -166,30 +166,33 @@ class Generator:
|
|||
return text
|
||||
rtn = []
|
||||
for line in text.split('\n'):
|
||||
splitline = []
|
||||
# Short lines can remain unchanged
|
||||
if len(line.replace('\t', SPACE8)) <= maxheaderlen:
|
||||
rtn.append(line)
|
||||
SEMINLTAB.join(rtn)
|
||||
splitline.append(line)
|
||||
rtn.append(SEMINLTAB.join(splitline))
|
||||
else:
|
||||
oldlen = len(text)
|
||||
oldlen = len(line)
|
||||
# Try to break the line on semicolons, but if that doesn't
|
||||
# work, try to split on folding whitespace.
|
||||
while len(text) > maxheaderlen:
|
||||
i = text.rfind(';', 0, maxheaderlen)
|
||||
while len(line) > maxheaderlen:
|
||||
i = line.rfind(';', 0, maxheaderlen)
|
||||
if i < 0:
|
||||
break
|
||||
rtn.append(text[:i])
|
||||
text = text[i+1:].lstrip()
|
||||
if len(text) <> oldlen:
|
||||
splitline.append(line[:i])
|
||||
line = line[i+1:].lstrip()
|
||||
if len(line) <> oldlen:
|
||||
# Splitting on semis worked
|
||||
rtn.append(text)
|
||||
return SEMINLTAB.join(rtn)
|
||||
splitline.append(line)
|
||||
rtn.append(SEMINLTAB.join(splitline))
|
||||
continue
|
||||
# Splitting on semis didn't help, so try to split on
|
||||
# whitespace.
|
||||
parts = re.split(r'(\s+)', text)
|
||||
parts = re.split(r'(\s+)', line)
|
||||
# Watch out though for "Header: longnonsplittableline"
|
||||
if parts[0].endswith(':') and len(parts) == 3:
|
||||
return text
|
||||
rtn.append(line)
|
||||
continue
|
||||
first = parts.pop(0)
|
||||
sublines = [first]
|
||||
acc = len(first)
|
||||
|
@ -203,13 +206,14 @@ class Generator:
|
|||
else:
|
||||
# Split it here, but don't forget to ignore the
|
||||
# next whitespace-only part
|
||||
rtn.append(EMPTYSTRING.join(sublines))
|
||||
splitline.append(EMPTYSTRING.join(sublines))
|
||||
del parts[0]
|
||||
first = parts.pop(0)
|
||||
sublines = [first]
|
||||
acc = len(first)
|
||||
rtn.append(EMPTYSTRING.join(sublines))
|
||||
return NLTAB.join(rtn)
|
||||
splitline.append(EMPTYSTRING.join(sublines))
|
||||
rtn.append(NLTAB.join(splitline))
|
||||
return NL.join(rtn)
|
||||
|
||||
#
|
||||
# Handlers for writing types and subtypes
|
||||
|
@ -219,6 +223,9 @@ class Generator:
|
|||
payload = msg.get_payload()
|
||||
if payload is None:
|
||||
return
|
||||
cset = msg.get_charset()
|
||||
if cset is not None:
|
||||
payload = cset.body_encode(payload)
|
||||
if not isinstance(payload, StringType):
|
||||
raise TypeError, 'string payload expected: %s' % type(payload)
|
||||
if self._mangle_from_:
|
||||
|
@ -233,7 +240,18 @@ class Generator:
|
|||
# together, and then make sure that the boundary we've chosen isn't
|
||||
# present in the payload.
|
||||
msgtexts = []
|
||||
for part in msg.get_payload():
|
||||
subparts = msg.get_payload()
|
||||
if subparts is None:
|
||||
# Nothing has every been attached
|
||||
boundary = msg.get_boundary(failobj=_make_boundary())
|
||||
print >> self._fp, '--' + boundary
|
||||
print >> self._fp, '\n'
|
||||
print >> self._fp, '--' + boundary + '--'
|
||||
return
|
||||
elif not isinstance(subparts, ListType):
|
||||
# Scalar payload
|
||||
subparts = [subparts]
|
||||
for part in subparts:
|
||||
s = StringIO()
|
||||
g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
|
||||
g(part, unixfrom=0)
|
||||
|
@ -365,7 +383,7 @@ class DecodedGenerator(Generator):
|
|||
|
||||
|
||||
# Helper
|
||||
def _make_boundary(self, text=None):
|
||||
def _make_boundary(text=None):
|
||||
# Craft a random boundary. If text is given, ensure that the chosen
|
||||
# boundary doesn't appear in the text.
|
||||
boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
|
||||
|
|
210
Lib/email/Header.py
Normal file
210
Lib/email/Header.py
Normal file
|
@ -0,0 +1,210 @@
|
|||
# Copyright (C) 2002 Python Software Foundation
|
||||
# Author: che@debian.org (Ben Gertzfield)
|
||||
|
||||
"""Header encoding and decoding functionality."""
|
||||
|
||||
import re
|
||||
import email.quopriMIME
|
||||
import email.base64MIME
|
||||
from email.Charset import Charset
|
||||
|
||||
CRLFSPACE = '\r\n '
|
||||
CRLF = '\r\n'
|
||||
NLSPACE = '\n '
|
||||
|
||||
MAXLINELEN = 76
|
||||
|
||||
ENCODE = 1
|
||||
DECODE = 2
|
||||
|
||||
# Match encoded-word strings in the form =?charset?q?Hello_World?=
|
||||
ecre = re.compile(r'''
|
||||
=\? # literal =?
|
||||
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
|
||||
\? # literal ?
|
||||
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
|
||||
\? # literal ?
|
||||
(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
|
||||
\?= # literal ?=
|
||||
''', re.VERBOSE | re.IGNORECASE)
|
||||
|
||||
|
||||
|
||||
# Helpers
|
||||
_max_append = email.quopriMIME._max_append
|
||||
|
||||
|
||||
|
||||
def decode_header(header):
|
||||
"""Decode a message header value without converting charset.
|
||||
|
||||
Returns a list of (decoded_string, charset) pairs containing each of the
|
||||
decoded parts of the header. Charset is None for non-encoded parts of the
|
||||
header, otherwise a lower-case string containing the name of the character
|
||||
set specified in the encoded string.
|
||||
"""
|
||||
# If no encoding, just return the header
|
||||
header = str(header)
|
||||
if not ecre.search(header):
|
||||
return [(header, None)]
|
||||
|
||||
decoded = []
|
||||
dec = ''
|
||||
for line in header.splitlines():
|
||||
# This line might not have an encoding in it
|
||||
if not ecre.search(line):
|
||||
decoded.append((line, None))
|
||||
continue
|
||||
|
||||
parts = ecre.split(line)
|
||||
while parts:
|
||||
unenc = parts.pop(0).strip()
|
||||
if unenc:
|
||||
# Should we continue a long line?
|
||||
if decoded and decoded[-1][1] is None:
|
||||
decoded[-1] = (decoded[-1][0] + dec, None)
|
||||
else:
|
||||
decoded.append((unenc, None))
|
||||
if parts:
|
||||
charset, encoding = [s.lower() for s in parts[0:2]]
|
||||
encoded = parts[2]
|
||||
dec = ''
|
||||
if encoding == 'q':
|
||||
dec = email.quopriMIME.header_decode(encoded)
|
||||
elif encoding == 'b':
|
||||
dec = email.base64MIME.decode(encoded)
|
||||
else:
|
||||
dec = encoded
|
||||
|
||||
if decoded and decoded[-1][1] == charset:
|
||||
decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1])
|
||||
else:
|
||||
decoded.append((dec, charset))
|
||||
del parts[0:3]
|
||||
return decoded
|
||||
|
||||
|
||||
|
||||
class Header:
|
||||
def __init__(self, s, charset=None, maxlinelen=MAXLINELEN,
|
||||
header_name=None):
|
||||
"""Create a MIME-compliant header that can contain many languages.
|
||||
|
||||
Specify the initial header value in s. Specify its character set as a
|
||||
Charset object in the charset argument. If none, a default Charset
|
||||
instance will be used.
|
||||
|
||||
You can later append to the header with append(s, charset) below;
|
||||
charset does not have to be the same as the one initially specified
|
||||
here. In fact, it's optional, and if not given, defaults to the
|
||||
charset specified in the constructor.
|
||||
|
||||
The maximum line length can either be specified by maxlinelen, or you
|
||||
can pass in the name of the header field (e.g. "Subject") to let this
|
||||
class guess the best line length to use to prevent wrapping. The
|
||||
default maxlinelen is 76.
|
||||
"""
|
||||
if charset is None:
|
||||
charset = Charset()
|
||||
self._charset = charset
|
||||
# BAW: I believe `chunks' and `maxlinelen' should be non-public.
|
||||
self._chunks = []
|
||||
self.append(s, charset)
|
||||
self._maxlinelen = maxlinelen
|
||||
if header_name is not None:
|
||||
self.guess_maxlinelen(header_name)
|
||||
|
||||
def __str__(self):
|
||||
"""A synonym for self.encode()."""
|
||||
return self.encode()
|
||||
|
||||
def guess_maxlinelen(self, s=None):
|
||||
"""Guess the maximum length to make each header line.
|
||||
|
||||
Given a header name (e.g. "Subject"), set this header's maximum line
|
||||
length to an appropriate length to avoid line wrapping. If s is not
|
||||
given, return the previous maximum line length and don't set it.
|
||||
|
||||
Returns the new maximum line length.
|
||||
"""
|
||||
# BAW: is this semantic necessary?
|
||||
if s is not None:
|
||||
self._maxlinelen = MAXLINELEN - len(s) - 2
|
||||
return self._maxlinelen
|
||||
|
||||
def append(self, s, charset=None):
|
||||
"""Append string s with Charset charset to the MIME header.
|
||||
|
||||
charset defaults to the one given in the class constructor.
|
||||
"""
|
||||
if charset is None:
|
||||
charset = self._charset
|
||||
self._chunks.append((s, charset))
|
||||
|
||||
def _split(self, s, charset):
|
||||
# Split up a header safely for use with encode_chunks. BAW: this
|
||||
# appears to be a private convenience method.
|
||||
splittable = charset.to_splittable(s)
|
||||
encoded = charset.from_splittable(splittable)
|
||||
|
||||
if charset.encoded_header_len(encoded) < self._maxlinelen:
|
||||
return [(encoded, charset)]
|
||||
else:
|
||||
# Divide and conquer. BAW: halfway depends on integer division.
|
||||
# When porting to Python 2.2, use the // operator.
|
||||
halfway = len(splittable) // 2
|
||||
first = charset.from_splittable(splittable[:halfway], 0)
|
||||
last = charset.from_splittable(splittable[halfway:], 0)
|
||||
return self._split(first, charset) + self._split(last, charset)
|
||||
|
||||
def encode(self):
|
||||
"""Encode a message header, possibly converting charset and encoding.
|
||||
|
||||
There are many issues involved in converting a given string for use in
|
||||
an email header. Only certain character sets are readable in most
|
||||
email clients, and as header strings can only contain a subset of
|
||||
7-bit ASCII, care must be taken to properly convert and encode (with
|
||||
Base64 or quoted-printable) header strings. In addition, there is a
|
||||
75-character length limit on any given encoded header field, so
|
||||
line-wrapping must be performed, even with double-byte character sets.
|
||||
|
||||
This method will do its best to convert the string to the correct
|
||||
character set used in email, and encode and line wrap it safely with
|
||||
the appropriate scheme for that character set.
|
||||
|
||||
If the given charset is not known or an error occurs during
|
||||
conversion, this function will return the header untouched.
|
||||
"""
|
||||
newchunks = []
|
||||
for s, charset in self._chunks:
|
||||
newchunks += self._split(s, charset)
|
||||
self._chunks = newchunks
|
||||
return self.encode_chunks()
|
||||
|
||||
def encode_chunks(self):
|
||||
"""MIME-encode a header with many different charsets and/or encodings.
|
||||
|
||||
Given a list of pairs (string, charset), return a MIME-encoded string
|
||||
suitable for use in a header field. Each pair may have different
|
||||
charsets and/or encodings, and the resulting header will accurately
|
||||
reflect each setting.
|
||||
|
||||
Each encoding can be email.Utils.QP (quoted-printable, for ASCII-like
|
||||
character sets like iso-8859-1), email.Utils.BASE64 (Base64, for
|
||||
non-ASCII like character sets like KOI8-R and iso-2022-jp), or None
|
||||
(no encoding).
|
||||
|
||||
Each pair will be represented on a separate line; the resulting string
|
||||
will be in the format:
|
||||
|
||||
"=?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
|
||||
=?charset2?b?SvxyZ2VuIEL2aW5n?="
|
||||
"""
|
||||
chunks = []
|
||||
for header, charset in self._chunks:
|
||||
if charset is None:
|
||||
_max_append(chunks, header, self._maxlinelen, ' ')
|
||||
else:
|
||||
_max_append(chunks, charset.header_encode(header, 0),
|
||||
self._maxlinelen, ' ')
|
||||
return NLSPACE.join(chunks)
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2001 Python Software Foundation
|
||||
# Copyright (C) 2001,2002 Python Software Foundation
|
||||
# Author: barry@zope.com (Barry Warsaw)
|
||||
|
||||
"""Various types of useful iterators and generators.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2001 Python Software Foundation
|
||||
# Copyright (C) 2001,2002 Python Software Foundation
|
||||
# Author: barry@zope.com (Barry Warsaw)
|
||||
|
||||
"""Base class for MIME specializations.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2001 Python Software Foundation
|
||||
# Copyright (C) 2001,2002 Python Software Foundation
|
||||
# Author: barry@zope.com (Barry Warsaw)
|
||||
|
||||
"""Class representing image/* type MIME documents.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2001 Python Software Foundation
|
||||
# Copyright (C) 2001,2002 Python Software Foundation
|
||||
# Author: barry@zope.com (Barry Warsaw)
|
||||
|
||||
"""Class representing message/* MIME documents.
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
# Copyright (C) 2001 Python Software Foundation
|
||||
# Copyright (C) 2001,2002 Python Software Foundation
|
||||
# Author: barry@zope.com (Barry Warsaw)
|
||||
|
||||
"""Class representing text/* type MIME documents.
|
||||
"""
|
||||
|
||||
import warnings
|
||||
import MIMEBase
|
||||
from Encoders import encode_7or8bit
|
||||
|
||||
|
@ -13,7 +14,7 @@ class MIMEText(MIMEBase.MIMEBase):
|
|||
"""Class for generating text/* type MIME documents."""
|
||||
|
||||
def __init__(self, _text, _subtype='plain', _charset='us-ascii',
|
||||
_encoder=encode_7or8bit):
|
||||
_encoder=None):
|
||||
"""Create a text/* type MIME document.
|
||||
|
||||
_text is the string for this message object. If the text does not end
|
||||
|
@ -22,20 +23,26 @@ class MIMEText(MIMEBase.MIMEBase):
|
|||
_subtype is the MIME sub content type, defaulting to "plain".
|
||||
|
||||
_charset is the character set parameter added to the Content-Type:
|
||||
header. This defaults to "us-ascii".
|
||||
header. This defaults to "us-ascii". Note that as a side-effect, the
|
||||
Content-Transfer-Encoding: header will also be set.
|
||||
|
||||
_encoder is a function which will perform the actual encoding for
|
||||
transport of the text data. It takes one argument, which is this
|
||||
Text instance. It should use get_payload() and set_payload() to
|
||||
change the payload to the encoded form. It should also add any
|
||||
Content-Transfer-Encoding: or other headers to the message as
|
||||
necessary. The default encoding doesn't actually modify the payload,
|
||||
but it does set Content-Transfer-Encoding: to either `7bit' or `8bit'
|
||||
as appropriate.
|
||||
The use of the _encoder is deprecated. The encoding of the payload,
|
||||
and the setting of the character set parameter now happens implicitly
|
||||
based on the _charset argument. If _encoder is supplied, then a
|
||||
DeprecationWarning is used, and the _encoder functionality may
|
||||
override any header settings indicated by _charset. This is probably
|
||||
not what you want.
|
||||
"""
|
||||
MIMEBase.MIMEBase.__init__(self, 'text', _subtype,
|
||||
**{'charset': _charset})
|
||||
if _text and _text[-1] <> '\n':
|
||||
_text += '\n'
|
||||
self.set_payload(_text)
|
||||
_encoder(self)
|
||||
self.set_payload(_text, _charset)
|
||||
if _encoder is not None:
|
||||
warnings.warn('_encoder argument is obsolete.',
|
||||
DeprecationWarning, 2)
|
||||
# Because set_payload() with a _charset will set its own
|
||||
# Content-Transfer-Encoding: header, we need to delete the
|
||||
# existing one or will end up with two of them. :(
|
||||
del self['content-transfer-encoding']
|
||||
_encoder(self)
|
||||
|
|
|
@ -1,23 +1,47 @@
|
|||
# Copyright (C) 2001 Python Software Foundation
|
||||
# Copyright (C) 2001,2002 Python Software Foundation
|
||||
# Author: barry@zope.com (Barry Warsaw)
|
||||
|
||||
"""Basic message object for the email package object model.
|
||||
"""
|
||||
|
||||
from __future__ import generators
|
||||
|
||||
import re
|
||||
import base64
|
||||
import quopri
|
||||
import warnings
|
||||
from cStringIO import StringIO
|
||||
from types import ListType
|
||||
from types import ListType, StringType
|
||||
|
||||
# Intrapackage imports
|
||||
import Errors
|
||||
import Utils
|
||||
import Charset
|
||||
|
||||
SEMISPACE = '; '
|
||||
|
||||
# Regular expression used to split header parameters. BAW: this may be too
|
||||
# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
|
||||
# most headers found in the wild. We may eventually need a full fledged
|
||||
# parser eventually.
|
||||
paramre = re.compile(r'\s*;\s*')
|
||||
# Regular expression that matches `special' characters in parameters, the
|
||||
# existance of which force quoting of the parameter value.
|
||||
tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
|
||||
|
||||
|
||||
|
||||
# Helper function
|
||||
def _formatparam(param, value=None, quote=1):
|
||||
"""Convenience function to format and return a key=value pair.
|
||||
|
||||
Will quote the value if needed or if quote is true.
|
||||
"""
|
||||
if value is not None and len(value) > 0:
|
||||
# BAW: Please check this. I think that if quote is set it should
|
||||
# force quoting even if not necessary.
|
||||
if quote or tspecials.search(value):
|
||||
return '%s="%s"' % (param, Utils.quote(value))
|
||||
else:
|
||||
return '%s=%s' % (param, value)
|
||||
else:
|
||||
return param
|
||||
|
||||
|
||||
|
||||
|
@ -39,6 +63,7 @@ class Message:
|
|||
self._headers = []
|
||||
self._unixfrom = None
|
||||
self._payload = None
|
||||
self._charset = None
|
||||
# Defaults for multipart messages
|
||||
self.preamble = self.epilogue = None
|
||||
|
||||
|
@ -83,6 +108,8 @@ class Message:
|
|||
If the current payload is empty, then the current payload will be made
|
||||
a scalar, set to the given value.
|
||||
"""
|
||||
warnings.warn('add_payload() is deprecated, use attach() instead.',
|
||||
DeprecationWarning, 2)
|
||||
if self._payload is None:
|
||||
self._payload = payload
|
||||
elif type(self._payload) is ListType:
|
||||
|
@ -93,8 +120,18 @@ class Message:
|
|||
else:
|
||||
self._payload = [self._payload, payload]
|
||||
|
||||
# A useful synonym
|
||||
attach = add_payload
|
||||
def attach(self, payload):
|
||||
"""Add the given payload to the current payload.
|
||||
|
||||
The current payload will always be a list of objects after this method
|
||||
is called. If you want to set the payload to a scalar object
|
||||
(e.g. because you're attaching a message/rfc822 subpart), use
|
||||
set_payload() instead.
|
||||
"""
|
||||
if self._payload is None:
|
||||
self._payload = [payload]
|
||||
else:
|
||||
self._payload.append(payload)
|
||||
|
||||
def get_payload(self, i=None, decode=0):
|
||||
"""Return the current payload exactly as is.
|
||||
|
@ -128,10 +165,58 @@ class Message:
|
|||
return payload
|
||||
|
||||
|
||||
def set_payload(self, payload):
|
||||
"""Set the payload to the given value."""
|
||||
self._payload = payload
|
||||
def set_payload(self, payload, charset=None):
|
||||
"""Set the payload to the given value.
|
||||
|
||||
Optionally set the charset, which must be a Charset instance."""
|
||||
self._payload = payload
|
||||
if charset is not None:
|
||||
self.set_charset(charset)
|
||||
|
||||
def set_charset(self, charset):
|
||||
"""Set the charset of the payload to a given character set.
|
||||
|
||||
charset can be a string or a Charset object. If it is a string, it
|
||||
will be converted to a Charset object by calling Charset's
|
||||
constructor. If charset is None, the charset parameter will be
|
||||
removed from the Content-Type: field. Anything else will generate a
|
||||
TypeError.
|
||||
|
||||
The message will be assumed to be a text message encoded with
|
||||
charset.input_charset. It will be converted to charset.output_charset
|
||||
and encoded properly, if needed, when generating the plain text
|
||||
representation of the message. MIME headers (MIME-Version,
|
||||
Content-Type, Content-Transfer-Encoding) will be added as needed.
|
||||
"""
|
||||
if charset is None:
|
||||
self.del_param('charset')
|
||||
self._charset = None
|
||||
return
|
||||
if isinstance(charset, StringType):
|
||||
charset = Charset.Charset(charset)
|
||||
if not isinstance(charset, Charset.Charset):
|
||||
raise TypeError, charset
|
||||
# BAW: should we accept strings that can serve as arguments to the
|
||||
# Charset constructor?
|
||||
self._charset = charset
|
||||
if not self.has_key('MIME-Version'):
|
||||
self.add_header('MIME-Version', '1.0')
|
||||
if not self.has_key('Content-Type'):
|
||||
self.add_header('Content-Type', 'text/plain',
|
||||
charset=charset.get_output_charset())
|
||||
else:
|
||||
self.set_param('charset', charset.get_output_charset())
|
||||
if not self.has_key('Content-Transfer-Encoding'):
|
||||
cte = charset.get_body_encoding()
|
||||
if callable(cte):
|
||||
cte(self)
|
||||
else:
|
||||
self.add_header('Content-Transfer-Encoding', cte)
|
||||
|
||||
def get_charset(self):
|
||||
"""Return the Charset object associated with the message's payload."""
|
||||
return self._charset
|
||||
|
||||
#
|
||||
# MAPPING INTERFACE (partial)
|
||||
#
|
||||
|
@ -257,7 +342,7 @@ class Message:
|
|||
if v is None:
|
||||
parts.append(k.replace('_', '-'))
|
||||
else:
|
||||
parts.append('%s="%s"' % (k.replace('_', '-'), v))
|
||||
parts.append(_formatparam(k.replace('_', '-'), v))
|
||||
if _value is not None:
|
||||
parts.insert(0, _value)
|
||||
self._headers.append((_name, SEMISPACE.join(parts)))
|
||||
|
@ -308,6 +393,8 @@ class Message:
|
|||
for p in paramre.split(value):
|
||||
try:
|
||||
name, val = p.split('=', 1)
|
||||
name = name.rstrip()
|
||||
val = val.lstrip()
|
||||
except ValueError:
|
||||
# Must have been a bare attribute
|
||||
name = p
|
||||
|
@ -315,26 +402,29 @@ class Message:
|
|||
params.append((name, val))
|
||||
return params
|
||||
|
||||
def get_params(self, failobj=None, header='content-type'):
|
||||
def get_params(self, failobj=None, header='content-type', unquote=1):
|
||||
"""Return the message's Content-Type: parameters, as a list.
|
||||
|
||||
The elements of the returned list are 2-tuples of key/value pairs, as
|
||||
split on the `=' sign. The left hand side of the `=' is the key,
|
||||
while the right hand side is the value. If there is no `=' sign in
|
||||
the parameter the value is the empty string. The value is always
|
||||
unquoted.
|
||||
unquoted, unless unquote is set to a false value.
|
||||
|
||||
Optional failobj is the object to return if there is no Content-Type:
|
||||
header. Optional header is the header to search instead of
|
||||
Content-Type:
|
||||
Content-Type:.
|
||||
"""
|
||||
missing = []
|
||||
params = self._get_params_preserve(missing, header)
|
||||
if params is missing:
|
||||
return failobj
|
||||
return [(k, Utils.unquote(v)) for k, v in params]
|
||||
if unquote:
|
||||
return [(k, Utils.unquote(v)) for k, v in params]
|
||||
else:
|
||||
return params
|
||||
|
||||
def get_param(self, param, failobj=None, header='content-type'):
|
||||
def get_param(self, param, failobj=None, header='content-type', unquote=1):
|
||||
"""Return the parameter value if found in the Content-Type: header.
|
||||
|
||||
Optional failobj is the object to return if there is no Content-Type:
|
||||
|
@ -342,15 +432,112 @@ class Message:
|
|||
Content-Type:
|
||||
|
||||
Parameter keys are always compared case insensitively. Values are
|
||||
always unquoted.
|
||||
always unquoted, unless unquote is set to a false value.
|
||||
"""
|
||||
if not self.has_key(header):
|
||||
return failobj
|
||||
for k, v in self._get_params_preserve(failobj, header):
|
||||
if k.lower() == param.lower():
|
||||
return Utils.unquote(v)
|
||||
if unquote:
|
||||
return Utils.unquote(v)
|
||||
else:
|
||||
return v
|
||||
return failobj
|
||||
|
||||
def set_param(self, param, value, header='Content-Type', requote=1):
|
||||
"""Set a parameter in the Content-Type: header.
|
||||
|
||||
If the parameter already exists in the header, its value will be
|
||||
replaced with the new value.
|
||||
|
||||
If header is Content-Type: and has not yet been defined in this
|
||||
message, it will be set to "text/plain" and the new parameter and
|
||||
value will be appended, as per RFC 2045.
|
||||
|
||||
An alternate header can specified in the header argument, and
|
||||
all parameters will be quoted as appropriate unless requote is
|
||||
set to a false value.
|
||||
"""
|
||||
if not self.has_key(header) and header.lower() == 'content-type':
|
||||
ctype = 'text/plain'
|
||||
else:
|
||||
ctype = self.get(header)
|
||||
if not self.get_param(param, header=header):
|
||||
if not ctype:
|
||||
ctype = _formatparam(param, value, requote)
|
||||
else:
|
||||
ctype = SEMISPACE.join(
|
||||
[ctype, _formatparam(param, value, requote)])
|
||||
else:
|
||||
ctype = ''
|
||||
for old_param, old_value in self.get_params(header=header,
|
||||
unquote=requote):
|
||||
append_param = ''
|
||||
if old_param.lower() == param.lower():
|
||||
append_param = _formatparam(param, value, requote)
|
||||
else:
|
||||
append_param = _formatparam(old_param, old_value, requote)
|
||||
if not ctype:
|
||||
ctype = append_param
|
||||
else:
|
||||
ctype = SEMISPACE.join([ctype, append_param])
|
||||
if ctype <> self.get(header):
|
||||
del self[header]
|
||||
self[header] = ctype
|
||||
|
||||
def del_param(self, param, header='content-type', requote=1):
|
||||
"""Remove the given parameter completely from the Content-Type header.
|
||||
|
||||
The header will be re-written in place without param or its value.
|
||||
All values will be quoted as appropriate unless requote is set to a
|
||||
false value.
|
||||
"""
|
||||
if not self.has_key(header):
|
||||
return
|
||||
new_ctype = ''
|
||||
for p, v in self.get_params(header, unquote=requote):
|
||||
if p.lower() <> param.lower():
|
||||
if not new_ctype:
|
||||
new_ctype = _formatparam(p, v, requote)
|
||||
else:
|
||||
new_ctype = SEMISPACE.join([new_ctype,
|
||||
_formatparam(p, v, requote)])
|
||||
if new_ctype <> self.get(header):
|
||||
del self[header]
|
||||
self[header] = new_ctype
|
||||
|
||||
def set_type(self, type, header='Content-Type', requote=1):
|
||||
"""Set the main type and subtype for the Content-Type: header.
|
||||
|
||||
type must be a string in the form "maintype/subtype", otherwise a
|
||||
ValueError is raised.
|
||||
|
||||
This method replaces the Content-Type: header, keeping all the
|
||||
parameters in place. If requote is false, this leaves the existing
|
||||
header's quoting as is. Otherwise, the parameters will be quoted (the
|
||||
default).
|
||||
|
||||
An alternate header can be specified in the header argument. When the
|
||||
Content-Type: header is set, we'll always also add a MIME-Version:
|
||||
header.
|
||||
"""
|
||||
# BAW: should we be strict?
|
||||
if not type.count('/') == 1:
|
||||
raise ValueError
|
||||
# Set the Content-Type: you get a MIME-Version:
|
||||
if header.lower() == 'content-type':
|
||||
del self['mime-version']
|
||||
self['MIME-Version'] = '1.0'
|
||||
if not self.has_key(header):
|
||||
self[header] = type
|
||||
return
|
||||
params = self.get_params(header, unquote=requote)
|
||||
del self[header]
|
||||
self[header] = type
|
||||
# Skip the first param; it's the old type.
|
||||
for p, v in params[1:]:
|
||||
self.set_param(p, v, header, requote)
|
||||
|
||||
def get_filename(self, failobj=None):
|
||||
"""Return the filename associated with the payload if present.
|
||||
|
||||
|
|
|
@ -51,9 +51,16 @@ class Parser:
|
|||
lastvalue = []
|
||||
lineno = 0
|
||||
while 1:
|
||||
line = fp.readline()[:-1]
|
||||
if not line or not line.strip():
|
||||
# Don't strip the line before we test for the end condition,
|
||||
# because whitespace-only header lines are RFC compliant
|
||||
# continuation lines.
|
||||
line = fp.readline()
|
||||
if not line:
|
||||
break
|
||||
line = line.splitlines()[0]
|
||||
if not line:
|
||||
break
|
||||
# Ignore the trailing newline
|
||||
lineno += 1
|
||||
# Check for initial Unix From_ line
|
||||
if line.startswith('From '):
|
||||
|
@ -63,7 +70,6 @@ class Parser:
|
|||
else:
|
||||
raise Errors.HeaderParseError(
|
||||
'Unix-from in headers after first rfc822 header')
|
||||
#
|
||||
# Header continuation line
|
||||
if line[0] in ' \t':
|
||||
if not lastheader:
|
||||
|
@ -134,11 +140,11 @@ class Parser:
|
|||
msgobj = self.parsestr(part)
|
||||
container.preamble = preamble
|
||||
container.epilogue = epilogue
|
||||
# Ensure that the container's payload is a list
|
||||
if not isinstance(container.get_payload(), ListType):
|
||||
container.set_payload([msgobj])
|
||||
else:
|
||||
container.add_payload(msgobj)
|
||||
container.attach(msgobj)
|
||||
elif container.get_main_type() == 'multipart':
|
||||
# Very bad. A message is a multipart with no boundary!
|
||||
raise Errors.BoundaryError(
|
||||
'multipart message with no defined boundary')
|
||||
elif container.get_type() == 'message/delivery-status':
|
||||
# This special kind of type contains blocks of headers separated
|
||||
# by a blank line. We'll represent each header block as a
|
||||
|
@ -160,9 +166,9 @@ class Parser:
|
|||
except Errors.HeaderParseError:
|
||||
msg = self._class()
|
||||
self._parsebody(msg, fp)
|
||||
container.add_payload(msg)
|
||||
container.set_payload(msg)
|
||||
else:
|
||||
container.add_payload(fp.read())
|
||||
container.set_payload(fp.read())
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,16 +1,26 @@
|
|||
# Copyright (C) 2001 Python Software Foundation
|
||||
# Copyright (C) 2001,2002 Python Software Foundation
|
||||
# Author: barry@zope.com (Barry Warsaw)
|
||||
|
||||
"""Miscellaneous utilities.
|
||||
"""
|
||||
|
||||
import time
|
||||
import socket
|
||||
import re
|
||||
import random
|
||||
import os
|
||||
import warnings
|
||||
from cStringIO import StringIO
|
||||
from types import ListType
|
||||
|
||||
from rfc822 import unquote, quote, parseaddr
|
||||
from rfc822 import dump_address_pair
|
||||
from rfc822 import unquote, quote
|
||||
from rfc822 import AddrlistClass as _AddrlistClass
|
||||
from rfc822 import parsedate_tz, parsedate, mktime_tz
|
||||
from rfc822 import mktime_tz
|
||||
|
||||
# We need wormarounds for bugs in these methods in older Pythons (see below)
|
||||
from rfc822 import parsedate as _parsedate
|
||||
from rfc822 import parsedate_tz as _parsedate_tz
|
||||
from rfc822 import parseaddr as _parseaddr
|
||||
|
||||
from quopri import decodestring as _qdecode
|
||||
import base64
|
||||
|
@ -20,6 +30,10 @@ from Encoders import _bencode, _qencode
|
|||
|
||||
COMMASPACE = ', '
|
||||
UEMPTYSTRING = u''
|
||||
CRLF = '\r\n'
|
||||
|
||||
specialsre = re.compile(r'[][\()<>@,:;".]')
|
||||
escapesre = re.compile(r'[][\()"]')
|
||||
|
||||
|
||||
|
||||
|
@ -43,6 +57,41 @@ def _bdecode(s):
|
|||
return value
|
||||
|
||||
|
||||
|
||||
def fix_eols(s):
|
||||
"""Replace all line-ending characters with \r\n."""
|
||||
# Fix newlines with no preceding carriage return
|
||||
s = re.sub(r'(?<!\r)\n', CRLF, s)
|
||||
# Fix carriage returns with no following newline
|
||||
s = re.sub(r'\r(?!\n)', CRLF, s)
|
||||
return s
|
||||
|
||||
|
||||
|
||||
def formataddr(pair):
|
||||
"""The inverse of parseaddr(), this takes a 2-tuple of the form
|
||||
(realname, email_address) and returns the string value suitable
|
||||
for an RFC 2822 From:, To: or Cc:.
|
||||
|
||||
If the first element of pair is false, then the second element is
|
||||
returned unmodified.
|
||||
"""
|
||||
name, address = pair
|
||||
if name:
|
||||
quotes = ''
|
||||
if specialsre.search(name):
|
||||
quotes = '"'
|
||||
name = escapesre.sub(r'\\\g<0>', name)
|
||||
return '%s%s%s <%s>' % (quotes, name, quotes, address)
|
||||
return address
|
||||
|
||||
# For backwards compatibility
|
||||
def dump_address_pair(pair):
|
||||
warnings.warn('Use email.Utils.formataddr() instead',
|
||||
DeprecationWarning, 2)
|
||||
return formataddr(pair)
|
||||
|
||||
|
||||
|
||||
def getaddresses(fieldvalues):
|
||||
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
|
||||
|
@ -64,30 +113,26 @@ ecre = re.compile(r'''
|
|||
|
||||
|
||||
def decode(s):
|
||||
"""Return a decoded string according to RFC 2047, as a unicode string."""
|
||||
"""Return a decoded string according to RFC 2047, as a unicode string.
|
||||
|
||||
NOTE: This function is deprecated. Use Header.decode_header() instead.
|
||||
"""
|
||||
warnings.warn('Use Header.decode_header() instead.', DeprecationWarning, 2)
|
||||
# Intra-package import here to avoid circular import problems.
|
||||
from Header import decode_header
|
||||
L = decode_header(s)
|
||||
if not isinstance(L, ListType):
|
||||
# s wasn't decoded
|
||||
return s
|
||||
|
||||
rtn = []
|
||||
parts = ecre.split(s, 1)
|
||||
while parts:
|
||||
# If there are less than 4 parts, it can't be encoded and we're done
|
||||
if len(parts) < 5:
|
||||
rtn.extend(parts)
|
||||
break
|
||||
# The first element is any non-encoded leading text
|
||||
rtn.append(parts[0])
|
||||
charset = parts[1]
|
||||
encoding = parts[2].lower()
|
||||
atom = parts[3]
|
||||
# The next chunk to decode should be in parts[4]
|
||||
parts = ecre.split(parts[4])
|
||||
# The encoding must be either `q' or `b', case-insensitive
|
||||
if encoding == 'q':
|
||||
func = _qdecode
|
||||
elif encoding == 'b':
|
||||
func = _bdecode
|
||||
for atom, charset in L:
|
||||
if charset is None:
|
||||
rtn.append(atom)
|
||||
else:
|
||||
func = _identity
|
||||
# Decode and get the unicode in the charset
|
||||
rtn.append(unicode(func(atom), charset))
|
||||
# Convert the string to Unicode using the given encoding. Leave
|
||||
# Unicode conversion errors to strict.
|
||||
rtn.append(unicode(atom, charset))
|
||||
# Now that we've decoded everything, we just need to join all the parts
|
||||
# together into the final string.
|
||||
return UEMPTYSTRING.join(rtn)
|
||||
|
@ -96,6 +141,7 @@ def decode(s):
|
|||
|
||||
def encode(s, charset='iso-8859-1', encoding='q'):
|
||||
"""Encode a string according to RFC 2047."""
|
||||
warnings.warn('Use Header.Header.encode() instead.', DeprecationWarning, 2)
|
||||
encoding = encoding.lower()
|
||||
if encoding == 'q':
|
||||
estr = _qencode(s)
|
||||
|
@ -150,3 +196,48 @@ def formatdate(timeval=None, localtime=0):
|
|||
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
|
||||
now[0], now[3], now[4], now[5],
|
||||
zone)
|
||||
|
||||
|
||||
|
||||
def make_msgid(idstring=None):
|
||||
"""Returns a string suitable for RFC 2822 compliant Message-ID:, e.g:
|
||||
|
||||
<20020201195627.33539.96671@nightshade.la.mastaler.com>
|
||||
|
||||
Optional idstring if given is a string used to strengthen the
|
||||
uniqueness of the Message-ID, otherwise an empty string is used.
|
||||
"""
|
||||
timeval = time.time()
|
||||
utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
|
||||
pid = os.getpid()
|
||||
randint = random.randrange(100000)
|
||||
if idstring is None:
|
||||
idstring = ''
|
||||
else:
|
||||
idstring = '.' + idstring
|
||||
idhost = socket.getfqdn()
|
||||
msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
|
||||
return msgid
|
||||
|
||||
|
||||
|
||||
# These functions are in the standalone mimelib version only because they've
|
||||
# subsequently been fixed in the latest Python versions. We use this to worm
|
||||
# around broken older Pythons.
|
||||
def parsedate(data):
|
||||
if not data:
|
||||
return None
|
||||
return _parsedate(data)
|
||||
|
||||
|
||||
def parsedate_tz(data):
|
||||
if not data:
|
||||
return None
|
||||
return _parsedate_tz(data)
|
||||
|
||||
|
||||
def parseaddr(addr):
|
||||
realname, emailaddr = _parseaddr(addr)
|
||||
if realname == '' and emailaddr is None:
|
||||
return '', ''
|
||||
return realname, emailaddr
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
# Copyright (C) 2001 Python Software Foundation
|
||||
# Copyright (C) 2001,2002 Python Software Foundation
|
||||
# Author: barry@zope.com (Barry Warsaw)
|
||||
|
||||
"""A package for parsing, handling, and generating email messages.
|
||||
"""
|
||||
|
||||
__version__ = '1.0'
|
||||
__version__ = '2.0'
|
||||
|
||||
__all__ = ['Encoders',
|
||||
__all__ = ['Charset',
|
||||
'Encoders',
|
||||
'Errors',
|
||||
'Generator',
|
||||
'Header',
|
||||
'Iterators',
|
||||
'MIMEAudio',
|
||||
'MIMEBase',
|
||||
|
@ -18,6 +20,8 @@ __all__ = ['Encoders',
|
|||
'Message',
|
||||
'Parser',
|
||||
'Utils',
|
||||
'base64MIME',
|
||||
'quopriMIME',
|
||||
'message_from_string',
|
||||
'message_from_file',
|
||||
]
|
||||
|
|
174
Lib/email/base64MIME.py
Normal file
174
Lib/email/base64MIME.py
Normal file
|
@ -0,0 +1,174 @@
|
|||
# Copyright (C) 2002 Python Software Foundation
|
||||
# Author: che@debian.org (Ben Gertzfield)
|
||||
|
||||
"""Base64 content transfer encoding per RFCs 2045-2047.
|
||||
|
||||
This module handles the content transfer encoding method defined in RFC 2045
|
||||
to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
|
||||
characters encoding known as Base64.
|
||||
|
||||
It is used in the MIME standards for email to attach images, audio, and text
|
||||
using some 8-bit character sets to messages.
|
||||
|
||||
This module provides an interface to encode and decode both headers and bodies
|
||||
with Base64 encoding.
|
||||
|
||||
RFC 2045 defines a method for including character set information in an
|
||||
`encoded-word' in a header. This method is commonly used for 8-bit real names
|
||||
in To:, From:, Cc:, etc. fields, as well as Subject: lines.
|
||||
|
||||
This module does not do the line wrapping or end-of-line character conversion
|
||||
necessary for proper internationalized headers; it only does dumb encoding and
|
||||
decoding. To deal with the various line wrapping issues, use the email.Header
|
||||
module.
|
||||
"""
|
||||
|
||||
import re
|
||||
from binascii import b2a_base64, a2b_base64
|
||||
from email.Utils import fix_eols
|
||||
|
||||
CRLF = '\r\n'
|
||||
NL = '\n'
|
||||
EMPTYSTRING = ''
|
||||
|
||||
# See also Charset.py
|
||||
MISC_LEN = 7
|
||||
|
||||
|
||||
|
||||
# Helpers
|
||||
def base64_len(s):
|
||||
"""Return the length of s when it is encoded with base64."""
|
||||
groups_of_3, leftover = divmod(len(s), 3)
|
||||
# 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
|
||||
# Thanks, Tim!
|
||||
n = groups_of_3 * 4
|
||||
if leftover:
|
||||
n += 4
|
||||
return n
|
||||
|
||||
|
||||
|
||||
def header_encode(header, charset='iso-8859-1', keep_eols=0, maxlinelen=76,
|
||||
eol=NL):
|
||||
"""Encode a single header line with Base64 encoding in a given charset.
|
||||
|
||||
Defined in RFC 2045, this Base64 encoding is identical to normal Base64
|
||||
encoding, except that each line must be intelligently wrapped (respecting
|
||||
the Base64 encoding), and subsequent lines must start with a space.
|
||||
|
||||
charset names the character set to use to encode the header. It defaults
|
||||
to iso-8859-1.
|
||||
|
||||
End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
|
||||
to the canonical email line separator \\r\\n unless the keep_eols
|
||||
parameter is set to true (the default is false).
|
||||
|
||||
Each line of the header will be terminated in the value of eol, which
|
||||
defaults to "\\n". Set this to "\\r\\n" if you are using the result of
|
||||
this function directly in email.
|
||||
|
||||
The resulting string will be in the form:
|
||||
|
||||
"=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
|
||||
=?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
|
||||
|
||||
with each line wrapped at, at most, maxlinelen characters (defaults to 76
|
||||
characters).
|
||||
"""
|
||||
# Return empty headers unchanged
|
||||
if not header:
|
||||
return header
|
||||
|
||||
if not keep_eols:
|
||||
header = fix_eols(header)
|
||||
|
||||
# Base64 encode each line, in encoded chunks no greater than maxlinelen in
|
||||
# length, after the RFC chrome is added in.
|
||||
base64ed = []
|
||||
max_encoded = maxlinelen - len(charset) - MISC_LEN
|
||||
max_unencoded = max_encoded * 3 / 4
|
||||
|
||||
# BAW: Ben's original code used a step of max_unencoded, but I think it
|
||||
# ought to be max_encoded. Otherwise, where's max_encoded used? I'm
|
||||
# still not sure what the
|
||||
for i in range(0, len(header), max_unencoded):
|
||||
base64ed.append(b2a_base64(header[i:i+max_unencoded]))
|
||||
|
||||
# Now add the RFC chrome to each encoded chunk
|
||||
lines = []
|
||||
for line in base64ed:
|
||||
# Ignore the last character of each line if it is a newline
|
||||
if line[-1] == NL:
|
||||
line = line[:-1]
|
||||
# Add the chrome
|
||||
lines.append('=?%s?b?%s?=' % (charset, line))
|
||||
# Glue the lines together and return it. BAW: should we be able to
|
||||
# specify the leading whitespace in the joiner?
|
||||
joiner = eol + ' '
|
||||
return joiner.join(lines)
|
||||
|
||||
|
||||
|
||||
def encode(s, binary=1, maxlinelen=76, eol=NL):
|
||||
"""Encode a string with base64.
|
||||
|
||||
Each line will be wrapped at, at most, maxlinelen characters (defaults to
|
||||
76 characters).
|
||||
|
||||
If binary is false, end-of-line characters will be converted to the
|
||||
canonical email end-of-line sequence \\r\\n. Otherwise they will be left
|
||||
verbatim (this is the default).
|
||||
|
||||
Each line of encoded text will end with eol, which defaults to "\\n". Set
|
||||
this to "\r\n" if you will be using the result of this function directly
|
||||
in an email.
|
||||
"""
|
||||
if not s:
|
||||
return s
|
||||
|
||||
if not binary:
|
||||
s = fix_eols(s)
|
||||
|
||||
encvec = []
|
||||
max_unencoded = maxlinelen * 3 / 4
|
||||
for i in range(0, len(s), max_unencoded):
|
||||
# BAW: should encode() inherit b2a_base64()'s dubious behavior in
|
||||
# adding a newline to the encoded string?
|
||||
enc = b2a_base64(s[i:i + max_unencoded])
|
||||
if enc[-1] == NL and eol <> NL:
|
||||
enc = enc[:-1] + eol
|
||||
encvec.append(enc)
|
||||
return EMPTYSTRING.join(encvec)
|
||||
|
||||
|
||||
# For convenience and backwards compatibility w/ standard base64 module
|
||||
body_encode = encode
|
||||
encodestring = encode
|
||||
|
||||
|
||||
|
||||
def decode(s, convert_eols=None):
|
||||
"""Decode a raw base64 string.
|
||||
|
||||
If convert_eols is set to a string value, all canonical email linefeeds,
|
||||
e.g. "\\r\\n", in the decoded text will be converted to the value of
|
||||
convert_eols. os.linesep is a good choice for convert_eols if you are
|
||||
decoding a text attachment.
|
||||
|
||||
This function does not parse a full MIME header value encoded with
|
||||
base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
|
||||
level email.Header class for that functionality.
|
||||
"""
|
||||
if not s:
|
||||
return s
|
||||
|
||||
dec = a2b_base64(s)
|
||||
if convert_eols:
|
||||
return dec.replace(CRLF, convert_eols)
|
||||
return dec
|
||||
|
||||
|
||||
# For convenience and backwards compatibility w/ standard base64 module
|
||||
body_decode = decode
|
||||
decodestring = decode
|
312
Lib/email/quopriMIME.py
Normal file
312
Lib/email/quopriMIME.py
Normal file
|
@ -0,0 +1,312 @@
|
|||
# Copyright (C) 2001,2002 Python Software Foundation
|
||||
# Author: che@debian.org (Ben Gertzfield)
|
||||
|
||||
"""Quoted-printable content transfer encoding per RFCs 2045-2047.
|
||||
|
||||
This module handles the content transfer encoding method defined in RFC 2045
|
||||
to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to
|
||||
safely encode text that is in a character set similar to the 7-bit US ASCII
|
||||
character set, but that includes some 8-bit characters that are normally not
|
||||
allowed in email bodies or headers.
|
||||
|
||||
Quoted-printable is very space-inefficient for encoding binary files; use the
|
||||
email.base64MIME module for that instead.
|
||||
|
||||
This module provides an interface to encode and decode both headers and bodies
|
||||
with quoted-printable encoding.
|
||||
|
||||
RFC 2045 defines a method for including character set information in an
|
||||
`encoded-word' in a header. This method is commonly used for 8-bit real names
|
||||
in To:/From:/Cc: etc. fields, as well as Subject: lines.
|
||||
|
||||
This module does not do the line wrapping or end-of-line character
|
||||
conversion necessary for proper internationalized headers; it only
|
||||
does dumb encoding and decoding. To deal with the various line
|
||||
wrapping issues, use the email.Header module.
|
||||
"""
|
||||
|
||||
import re
|
||||
from string import hexdigits
|
||||
from email.Utils import fix_eols
|
||||
|
||||
CRLF = '\r\n'
|
||||
NL = '\n'
|
||||
|
||||
# See also Charset.py
|
||||
MISC_LEN = 7
|
||||
|
||||
hqre = re.compile(r'[^-a-zA-Z0-9!*+/ ]')
|
||||
bqre = re.compile(r'[^ !-<>-~\t]')
|
||||
|
||||
|
||||
|
||||
# Helpers
|
||||
def header_quopri_check(c):
|
||||
"""Return true if the character should be escaped with header quopri."""
|
||||
return hqre.match(c) and 1
|
||||
|
||||
|
||||
def body_quopri_check(c):
|
||||
"""Return true if the character should be escaped with body quopri."""
|
||||
return bqre.match(c) and 1
|
||||
|
||||
|
||||
def header_quopri_len(s):
|
||||
"""Return the length of str when it is encoded with header quopri."""
|
||||
count = 0
|
||||
for c in s:
|
||||
if hqre.match(c):
|
||||
count += 3
|
||||
else:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def body_quopri_len(str):
|
||||
"""Return the length of str when it is encoded with body quopri."""
|
||||
count = 0
|
||||
for c in str:
|
||||
if bqre.match(c):
|
||||
count += 3
|
||||
else:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def _max_append(L, s, maxlen, extra=''):
|
||||
if not L:
|
||||
L.append(s)
|
||||
elif len(L[-1]) + len(s) < maxlen:
|
||||
L[-1] += extra + s
|
||||
else:
|
||||
L.append(s)
|
||||
|
||||
|
||||
def unquote(s):
|
||||
"""Turn a string in the form =AB to the ASCII character with value 0xab"""
|
||||
return chr(int(s[1:3], 16))
|
||||
|
||||
|
||||
def quote(c):
|
||||
return "=%02X" % ord(c)
|
||||
|
||||
|
||||
|
||||
def header_encode(header, charset="iso-8859-1", keep_eols=0, maxlinelen=76,
|
||||
eol=NL):
|
||||
"""Encode a single header line with quoted-printable (like) encoding.
|
||||
|
||||
Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
|
||||
used specifically for email header fields to allow charsets with mostly 7
|
||||
bit characters (and some 8 bit) to remain more or less readable in non-RFC
|
||||
2045 aware mail clients.
|
||||
|
||||
charset names the character set to use to encode the header. It defaults
|
||||
to iso-8859-1.
|
||||
|
||||
The resulting string will be in the form:
|
||||
|
||||
"=?charset?q?I_f=E2rt_in_your_g=E8n=E8ral_dire=E7tion?\\n
|
||||
=?charset?q?Silly_=C8nglish_Kn=EEghts?="
|
||||
|
||||
with each line wrapped safely at, at most, maxlinelen characters (defaults
|
||||
to 76 characters).
|
||||
|
||||
End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
|
||||
to the canonical email line separator \\r\\n unless the keep_eols
|
||||
parameter is set to true (the default is false).
|
||||
|
||||
Each line of the header will be terminated in the value of eol, which
|
||||
defaults to "\\n". Set this to "\\r\\n" if you are using the result of
|
||||
this function directly in email.
|
||||
"""
|
||||
# Return empty headers unchanged
|
||||
if not header:
|
||||
return header
|
||||
|
||||
if not keep_eols:
|
||||
header = fix_eols(header)
|
||||
|
||||
# Quopri encode each line, in encoded chunks no greater than maxlinelen in
|
||||
# lenght, after the RFC chrome is added in.
|
||||
quoted = []
|
||||
max_encoded = maxlinelen - len(charset) - MISC_LEN
|
||||
|
||||
for c in header:
|
||||
# Space may be represented as _ instead of =20 for readability
|
||||
if c == ' ':
|
||||
_max_append(quoted, '_', max_encoded)
|
||||
# These characters can be included verbatim
|
||||
elif not hqre.match(c):
|
||||
_max_append(quoted, c, max_encoded)
|
||||
# Otherwise, replace with hex value like =E2
|
||||
else:
|
||||
_max_append(quoted, "=%02X" % ord(c), max_encoded)
|
||||
|
||||
# Now add the RFC chrome to each encoded chunk and glue the chunks
|
||||
# together. BAW: should we be able to specify the leading whitespace in
|
||||
# the joiner?
|
||||
joiner = eol + ' '
|
||||
return joiner.join(['=?%s?q?%s?=' % (charset, line) for line in quoted])
|
||||
|
||||
|
||||
|
||||
def encode(body, binary=0, maxlinelen=76, eol=NL):
|
||||
"""Encode with quoted-printable, wrapping at maxlinelen characters.
|
||||
|
||||
If binary is false (the default), end-of-line characters will be converted
|
||||
to the canonical email end-of-line sequence \\r\\n. Otherwise they will
|
||||
be left verbatim.
|
||||
|
||||
Each line of encoded text will end with eol, which defaults to "\\n". Set
|
||||
this to "\\r\\n" if you will be using the result of this function directly
|
||||
in an email.
|
||||
|
||||
Each line will be wrapped at, at most, maxlinelen characters (defaults to
|
||||
76 characters). Long lines will have the `soft linefeed' quoted-printable
|
||||
character "=" appended to them, so the decoded text will be identical to
|
||||
the original text.
|
||||
"""
|
||||
if not body:
|
||||
return body
|
||||
|
||||
if not binary:
|
||||
body = fix_eols(body)
|
||||
|
||||
# BAW: We're accumulating the body text by string concatenation. That
|
||||
# can't be very efficient, but I don't have time now to rewrite it. It
|
||||
# just feels like this algorithm could be more efficient.
|
||||
encoded_body = ''
|
||||
lineno = -1
|
||||
# Preserve line endings here so we can check later to see an eol needs to
|
||||
# be added to the output later.
|
||||
lines = body.splitlines(1)
|
||||
for line in lines:
|
||||
# But strip off line-endings for processing this line.
|
||||
if line.endswith(CRLF):
|
||||
line = line[:-2]
|
||||
elif line[-1] in CRLF:
|
||||
line = line[:-1]
|
||||
|
||||
lineno += 1
|
||||
encoded_line = ''
|
||||
prev = None
|
||||
linelen = len(line)
|
||||
# Now we need to examine every character to see if it needs to be
|
||||
# quopri encoded. BAW: again, string concatenation is inefficient.
|
||||
for j in range(linelen):
|
||||
c = line[j]
|
||||
prev = c
|
||||
if bqre.match(c):
|
||||
c = quote(c)
|
||||
elif j+1 == linelen:
|
||||
# Check for whitespace at end of line; special case
|
||||
if c not in ' \t':
|
||||
encoded_line += c
|
||||
prev = c
|
||||
continue
|
||||
# Check to see to see if the line has reached its maximum length
|
||||
if len(encoded_line) + len(c) >= maxlinelen:
|
||||
encoded_body += encoded_line + '=' + eol
|
||||
encoded_line = ''
|
||||
encoded_line += c
|
||||
# Now at end of line..
|
||||
if prev and prev in ' \t':
|
||||
# Special case for whitespace at end of file
|
||||
if lineno+1 == len(lines):
|
||||
prev = quote(prev)
|
||||
if len(encoded_line) + len(prev) > maxlinelen:
|
||||
encoded_body += encoded_line + '=' + eol + prev
|
||||
else:
|
||||
encoded_body += encoded_line + prev
|
||||
# Just normal whitespace at end of line
|
||||
else:
|
||||
encoded_body += encoded_line + prev + '=' + eol
|
||||
encoded_line = ''
|
||||
# Now look at the line we just finished and it has a line ending, we
|
||||
# need to add eol to the end of the line.
|
||||
if lines[lineno].endswith(CRLF) or lines[lineno][-1] in CRLF:
|
||||
encoded_body += encoded_line + eol
|
||||
else:
|
||||
encoded_body += encoded_line
|
||||
encoded_line = ''
|
||||
return encoded_body
|
||||
|
||||
|
||||
# For convenience and backwards compatibility w/ standard base64 module
|
||||
body_encode = encode
|
||||
encodestring = encode
|
||||
|
||||
|
||||
|
||||
# BAW: I'm not sure if the intent was for the signature of this function to be
|
||||
# the same as base64MIME.decode() or not...
|
||||
def decode(encoded, eol=NL):
|
||||
"""Decode a quoted-printable string.
|
||||
|
||||
Lines are separated with eol, which defaults to \\n.
|
||||
"""
|
||||
if not encoded:
|
||||
return encoded
|
||||
# BAW: see comment in encode() above. Again, we're building up the
|
||||
# decoded string with string concatenation, which could be done much more
|
||||
# efficiently.
|
||||
decoded = ''
|
||||
|
||||
for line in encoded.splitlines():
|
||||
line = line.rstrip()
|
||||
if not line:
|
||||
decoded += eol
|
||||
continue
|
||||
|
||||
i = 0
|
||||
n = len(line)
|
||||
while i < n:
|
||||
c = line[i]
|
||||
if c <> '=':
|
||||
decoded += c
|
||||
i += 1
|
||||
# Otherwise, c == "=". Are we at the end of the line? If so, add
|
||||
# a soft line break.
|
||||
elif i+1 == n:
|
||||
i += 1
|
||||
continue
|
||||
# Decode if in form =AB
|
||||
elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:
|
||||
decoded += unquote(line[i:i+3])
|
||||
i += 3
|
||||
# Otherwise, not in form =AB, pass literally
|
||||
else:
|
||||
decoded += c
|
||||
i += 1
|
||||
|
||||
if i == n:
|
||||
decoded += eol
|
||||
# Special case if original string did not end with eol
|
||||
if encoded[-1] <> eol and decoded[-1] == eol:
|
||||
decoded = decoded[:-1]
|
||||
return decoded
|
||||
|
||||
|
||||
# For convenience and backwards compatibility w/ standard base64 module
|
||||
body_decode = decode
|
||||
decodestring = decode
|
||||
|
||||
|
||||
|
||||
def _unquote_match(match):
|
||||
"""Turn a match in the form =AB to the ASCII character with value 0xab"""
|
||||
s = match.group(0)
|
||||
return unquote(s)
|
||||
|
||||
|
||||
# Header decoding is done a bit differently
|
||||
def header_decode(s):
|
||||
"""Decode a string encoded with RFC 2045 MIME header `Q' encoding.
|
||||
|
||||
This function does not parse a full MIME header value encoded with
|
||||
quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use
|
||||
the high level email.Header class for that functionality.
|
||||
"""
|
||||
s = s.replace('_', ' ')
|
||||
return re.sub(r'=\w{2}', _unquote_match, s)
|
10
Lib/test/data/msg_24.txt
Normal file
10
Lib/test/data/msg_24.txt
Normal file
|
@ -0,0 +1,10 @@
|
|||
Content-Type: multipart/mixed; boundary="BOUNDARY"
|
||||
MIME-Version: 1.0
|
||||
Subject: A subject
|
||||
To: aperson@dom.ain
|
||||
From: bperson@dom.ain
|
||||
|
||||
--BOUNDARY
|
||||
|
||||
|
||||
--BOUNDARY--
|
117
Lib/test/data/msg_25.txt
Normal file
117
Lib/test/data/msg_25.txt
Normal file
|
@ -0,0 +1,117 @@
|
|||
From MAILER-DAEMON Fri Apr 06 16:46:09 2001
|
||||
Received: from [204.245.199.98] (helo=zinfandel.lacita.com)
|
||||
by www.linux.org.uk with esmtp (Exim 3.13 #1)
|
||||
id 14lYR6-0008Iv-00
|
||||
for linuxuser-admin@www.linux.org.uk; Fri, 06 Apr 2001 16:46:09 +0100
|
||||
Received: from localhost (localhost) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with internal id JAB03225; Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
|
||||
Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
|
||||
From: Mail Delivery Subsystem <MAILER-DAEMON@zinfandel.lacita.com>
|
||||
Subject: Returned mail: Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [199.164.235.226], to <scoffman@wellpartner.com>
|
||||
Message-Id: <200104061723.JAB03225@zinfandel.lacita.com>
|
||||
To: <linuxuser-admin@www.linux.org.uk>
|
||||
To: postmaster@zinfandel.lacita.com
|
||||
MIME-Version: 1.0
|
||||
Content-Type: multipart/report; report-type=delivery-status;
|
||||
bo
|
||||
Auto-Submitted: auto-generated (failure)
|
||||
|
||||
This is a MIME-encapsulated message
|
||||
|
||||
--JAB03225.986577786/zinfandel.lacita.com
|
||||
|
||||
The original message was received at Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
|
||||
from [199.164.235.226]
|
||||
|
||||
----- The following addresses have delivery notifications -----
|
||||
<scoffman@wellpartner.com> (unrecoverable error)
|
||||
|
||||
----- Transcript of session follows -----
|
||||
554 Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [199.164.235.226], to <scoffman@wellpartner.com>
|
||||
|
||||
--JAB03225.986577786/zinfandel.lacita.com
|
||||
Content-Type: message/delivery-status
|
||||
|
||||
Reporting-MTA: dns; zinfandel.lacita.com
|
||||
Received-From-MTA: dns; [199.164.235.226]
|
||||
Arrival-Date: Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
|
||||
|
||||
Final-Recipient: rfc822; scoffman@wellpartner.com
|
||||
Action: failed
|
||||
Status: 5.4.6
|
||||
Last-Attempt-Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
|
||||
|
||||
--JAB03225.986577786/zinfandel.lacita.com
|
||||
Content-Type: text/rfc822-headers
|
||||
|
||||
Return-Path: linuxuser-admin@www.linux.org.uk
|
||||
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03225 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
|
||||
Received: from zinfandel.lacita.com ([204.245.199.98])
|
||||
by
|
||||
fo
|
||||
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03221 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:22:18 -0800 (GMT-0800)
|
||||
Received: from zinfandel.lacita.com ([204.245.199.98])
|
||||
by
|
||||
fo
|
||||
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03217 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:21:37 -0800 (GMT-0800)
|
||||
Received: from zinfandel.lacita.com ([204.245.199.98])
|
||||
by
|
||||
fo
|
||||
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03213 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:56 -0800 (GMT-0800)
|
||||
Received: from zinfandel.lacita.com ([204.245.199.98])
|
||||
by
|
||||
fo
|
||||
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03209 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:15 -0800 (GMT-0800)
|
||||
Received: from zinfandel.lacita.com ([204.245.199.98])
|
||||
by
|
||||
fo
|
||||
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03205 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:19:33 -0800 (GMT-0800)
|
||||
Received: from zinfandel.lacita.com ([204.245.199.98])
|
||||
by
|
||||
fo
|
||||
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03201 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:18:52 -0800 (GMT-0800)
|
||||
Received: from zinfandel.lacita.com ([204.245.199.98])
|
||||
by
|
||||
fo
|
||||
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03197 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:17:54 -0800 (GMT-0800)
|
||||
Received: from www.linux.org.uk (parcelfarce.linux.theplanet.co.uk [195.92.249.252])
|
||||
by
|
||||
fo
|
||||
Received: from localhost.localdomain
|
||||
([
|
||||
by
|
||||
id
|
||||
Received: from [212.1.130.11] (helo=s1.uklinux.net ident=root)
|
||||
by
|
||||
id
|
||||
fo
|
||||
Received: from server (ppp-2-22.cvx4.telinco.net [212.1.149.22])
|
||||
by
|
||||
fo
|
||||
From: Daniel James <daniel@linuxuser.co.uk>
|
||||
Organization: LinuxUser
|
||||
To: linuxuser@www.linux.org.uk
|
||||
X-Mailer: KMail [version 1.1.99]
|
||||
Content-Type: text/plain;
|
||||
c
|
||||
MIME-Version: 1.0
|
||||
Message-Id: <01040616033903.00962@server>
|
||||
Content-Transfer-Encoding: 8bit
|
||||
Subject: [LinuxUser] bulletin no. 45
|
||||
Sender: linuxuser-admin@www.linux.org.uk
|
||||
Errors-To: linuxuser-admin@www.linux.org.uk
|
||||
X-BeenThere: linuxuser@www.linux.org.uk
|
||||
X-Mailman-Version: 2.0.3
|
||||
Precedence: bulk
|
||||
List-Help: <mailto:linuxuser-request@www.linux.org.uk?subject=help>
|
||||
List-Post: <mailto:linuxuser@www.linux.org.uk>
|
||||
List-Subscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>,
|
||||
<m
|
||||
List-Id: bulletins from LinuxUser magazine <linuxuser.www.linux.org.uk>
|
||||
List-Unsubscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>,
|
||||
<m
|
||||
List-Archive: <http://www.linux.org.uk/pipermail/linuxuser/>
|
||||
Date: Fri, 6 Apr 2001 16:03:39 +0100
|
||||
|
||||
--JAB03225.986577786/zinfandel.lacita.com--
|
||||
|
||||
|
|
@ -1,15 +1,19 @@
|
|||
# Copyright (C) 2001,2002 Python Software Foundation
|
||||
# email package unit tests
|
||||
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
import unittest
|
||||
import base64
|
||||
from cStringIO import StringIO
|
||||
from types import StringType
|
||||
import warnings
|
||||
|
||||
import email
|
||||
|
||||
from email.Charset import Charset
|
||||
from email.Header import Header, decode_header
|
||||
from email.Parser import Parser, HeaderParser
|
||||
from email.Generator import Generator, DecodedGenerator
|
||||
from email.Message import Message
|
||||
|
@ -22,14 +26,18 @@ from email import Utils
|
|||
from email import Errors
|
||||
from email import Encoders
|
||||
from email import Iterators
|
||||
from email import base64MIME
|
||||
from email import quopriMIME
|
||||
|
||||
from test_support import findfile, __file__ as test_support_file
|
||||
|
||||
|
||||
NL = '\n'
|
||||
EMPTYSTRING = ''
|
||||
SPACE = ' '
|
||||
|
||||
# We don't care about DeprecationWarnings
|
||||
warnings.filterwarnings('ignore', '', DeprecationWarning, __name__)
|
||||
|
||||
|
||||
|
||||
def openfile(filename):
|
||||
|
@ -41,7 +49,7 @@ def openfile(filename):
|
|||
# Base test class
|
||||
class TestEmailBase(unittest.TestCase):
|
||||
def _msgobj(self, filename):
|
||||
fp = openfile(filename)
|
||||
fp = openfile(findfile(filename))
|
||||
try:
|
||||
msg = email.message_from_file(fp)
|
||||
finally:
|
||||
|
@ -58,6 +66,45 @@ class TestMessageAPI(TestEmailBase):
|
|||
eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
|
||||
eq(msg.get_all('xx', 'n/a'), 'n/a')
|
||||
|
||||
def test_getset_charset(self):
|
||||
eq = self.assertEqual
|
||||
msg = Message()
|
||||
eq(msg.get_charset(), None)
|
||||
charset = Charset('iso-8859-1')
|
||||
msg.set_charset(charset)
|
||||
eq(msg['mime-version'], '1.0')
|
||||
eq(msg.get_type(), 'text/plain')
|
||||
eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
|
||||
eq(msg.get_param('charset'), 'iso-8859-1')
|
||||
eq(msg['content-transfer-encoding'], 'quoted-printable')
|
||||
eq(msg.get_charset().input_charset, 'iso-8859-1')
|
||||
# Remove the charset
|
||||
msg.set_charset(None)
|
||||
eq(msg.get_charset(), None)
|
||||
eq(msg['content-type'], 'text/plain')
|
||||
# Try adding a charset when there's already MIME headers present
|
||||
msg = Message()
|
||||
msg['MIME-Version'] = '2.0'
|
||||
msg['Content-Type'] = 'text/x-weird'
|
||||
msg['Content-Transfer-Encoding'] = 'quinted-puntable'
|
||||
msg.set_charset(charset)
|
||||
eq(msg['mime-version'], '2.0')
|
||||
eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
|
||||
eq(msg['content-transfer-encoding'], 'quinted-puntable')
|
||||
|
||||
def test_set_charset_from_string(self):
|
||||
eq = self.assertEqual
|
||||
msg = Message()
|
||||
msg.set_charset('us-ascii')
|
||||
eq(msg.get_charset().input_charset, 'us-ascii')
|
||||
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
|
||||
|
||||
def test_set_payload_with_charset(self):
|
||||
msg = Message()
|
||||
charset = Charset('iso-8859-1')
|
||||
msg.set_payload('This is a string payload', charset)
|
||||
self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
|
||||
|
||||
def test_get_charsets(self):
|
||||
eq = self.assertEqual
|
||||
|
||||
|
@ -204,6 +251,11 @@ class TestMessageAPI(TestEmailBase):
|
|||
eq(msg.get_params(header='x-header'),
|
||||
[('foo', ''), ('bar', 'one'), ('baz', 'two')])
|
||||
|
||||
def test_get_param_liberal(self):
|
||||
msg = Message()
|
||||
msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
|
||||
self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
|
||||
|
||||
def test_get_param(self):
|
||||
eq = self.assertEqual
|
||||
msg = email.message_from_string(
|
||||
|
@ -216,6 +268,10 @@ class TestMessageAPI(TestEmailBase):
|
|||
eq(msg.get_param('foo', header='x-header'), '')
|
||||
eq(msg.get_param('bar', header='x-header'), 'one')
|
||||
eq(msg.get_param('baz', header='x-header'), 'two')
|
||||
# XXX: We are not RFC-2045 compliant! We cannot parse:
|
||||
# msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
|
||||
# msg.get_param("weird")
|
||||
# yet.
|
||||
|
||||
def test_get_param_funky_continuation_lines(self):
|
||||
msg = self._msgobj('msg_22.txt')
|
||||
|
@ -228,7 +284,52 @@ class TestMessageAPI(TestEmailBase):
|
|||
self.failUnless(msg.has_key('HEADER'))
|
||||
self.failIf(msg.has_key('headeri'))
|
||||
|
||||
def test_set_param(self):
|
||||
eq = self.assertEqual
|
||||
msg = Message()
|
||||
msg.set_param('charset', 'iso-2022-jp')
|
||||
eq(msg.get_param('charset'), 'iso-2022-jp')
|
||||
msg.set_param('importance', 'high value')
|
||||
eq(msg.get_param('importance'), 'high value')
|
||||
eq(msg.get_param('importance', unquote=0), '"high value"')
|
||||
eq(msg.get_params(), [('text/plain', ''),
|
||||
('charset', 'iso-2022-jp'),
|
||||
('importance', 'high value')])
|
||||
eq(msg.get_params(unquote=0), [('text/plain', ''),
|
||||
('charset', '"iso-2022-jp"'),
|
||||
('importance', '"high value"')])
|
||||
msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
|
||||
eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
|
||||
|
||||
def test_del_param(self):
|
||||
eq = self.assertEqual
|
||||
msg = self._msgobj('msg_05.txt')
|
||||
eq(msg.get_params(),
|
||||
[('multipart/report', ''), ('report-type', 'delivery-status'),
|
||||
('boundary', 'D1690A7AC1.996856090/mail.example.com')])
|
||||
old_val = msg.get_param("report-type")
|
||||
msg.del_param("report-type")
|
||||
eq(msg.get_params(),
|
||||
[('multipart/report', ''),
|
||||
('boundary', 'D1690A7AC1.996856090/mail.example.com')])
|
||||
msg.set_param("report-type", old_val)
|
||||
eq(msg.get_params(),
|
||||
[('multipart/report', ''),
|
||||
('boundary', 'D1690A7AC1.996856090/mail.example.com'),
|
||||
('report-type', old_val)])
|
||||
|
||||
def test_set_type(self):
|
||||
eq = self.assertEqual
|
||||
msg = Message()
|
||||
self.assertRaises(ValueError, msg.set_type, 'text')
|
||||
msg.set_type('text/plain')
|
||||
eq(msg['content-type'], 'text/plain')
|
||||
msg.set_param('charset', 'us-ascii')
|
||||
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
|
||||
msg.set_type('text/html')
|
||||
eq(msg['content-type'], 'text/html; charset="us-ascii"')
|
||||
|
||||
|
||||
|
||||
# Test the email.Encoders module
|
||||
class TestEncoders(unittest.TestCase):
|
||||
|
@ -236,7 +337,6 @@ class TestEncoders(unittest.TestCase):
|
|||
eq = self.assertEqual
|
||||
msg = MIMEText('hello world', _encoder=Encoders.encode_noop)
|
||||
eq(msg.get_payload(), 'hello world\n')
|
||||
eq(msg['content-transfer-encoding'], None)
|
||||
|
||||
def test_encode_7bit(self):
|
||||
eq = self.assertEqual
|
||||
|
@ -253,6 +353,12 @@ class TestEncoders(unittest.TestCase):
|
|||
eq(msg.get_payload(), 'hello \x80 world\n')
|
||||
eq(msg['content-transfer-encoding'], '8bit')
|
||||
|
||||
def test_encode_empty_payload(self):
|
||||
eq = self.assertEqual
|
||||
msg = Message()
|
||||
msg.set_charset('us-ascii')
|
||||
eq(msg['content-transfer-encoding'], '7bit')
|
||||
|
||||
def test_encode_base64(self):
|
||||
eq = self.assertEqual
|
||||
msg = MIMEText('hello world', _encoder=Encoders.encode_base64)
|
||||
|
@ -265,6 +371,23 @@ class TestEncoders(unittest.TestCase):
|
|||
eq(msg.get_payload(), 'hello=20world\n')
|
||||
eq(msg['content-transfer-encoding'], 'quoted-printable')
|
||||
|
||||
def test_default_cte(self):
|
||||
eq = self.assertEqual
|
||||
msg = MIMEText('hello world')
|
||||
eq(msg['content-transfer-encoding'], '7bit')
|
||||
|
||||
def test_default_cte(self):
|
||||
eq = self.assertEqual
|
||||
# With no explicit _charset its us-ascii, and all are 7-bit
|
||||
msg = MIMEText('hello world')
|
||||
eq(msg['content-transfer-encoding'], '7bit')
|
||||
# Similar, but with 8-bit data
|
||||
msg = MIMEText('hello \xf8 world')
|
||||
eq(msg['content-transfer-encoding'], '8bit')
|
||||
# And now with a different charset
|
||||
msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
|
||||
eq(msg['content-transfer-encoding'], 'quoted-printable')
|
||||
|
||||
|
||||
|
||||
# Test long header wrapping
|
||||
|
@ -279,7 +402,14 @@ class TestLongHeaders(unittest.TestCase):
|
|||
sfp = StringIO()
|
||||
g = Generator(sfp)
|
||||
g(msg)
|
||||
self.assertEqual(sfp.getvalue(), openfile('msg_18.txt').read())
|
||||
self.assertEqual(sfp.getvalue(), '''\
|
||||
Content-Type: text/plain; charset="us-ascii"
|
||||
MIME-Version: 1.0
|
||||
Content-Transfer-Encoding: 7bit
|
||||
X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
|
||||
spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
|
||||
|
||||
''')
|
||||
|
||||
def test_no_semis_header_splitter(self):
|
||||
msg = Message()
|
||||
|
@ -314,6 +444,30 @@ References: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
|||
|
||||
Test""")
|
||||
|
||||
def test_splitting_multiple_long_lines(self):
|
||||
msg = Message()
|
||||
msg['Received'] = """\
|
||||
from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
|
||||
from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
|
||||
from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
|
||||
"""
|
||||
self.assertEqual(msg.as_string(), """\
|
||||
Received: from babylon.socal-raves.org (localhost [127.0.0.1]);
|
||||
by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
|
||||
for <mailman-admin@babylon.socal-raves.org>;
|
||||
Sat, 2 Feb 2002 17:00:06 -0800 (PST)
|
||||
from babylon.socal-raves.org (localhost [127.0.0.1]);
|
||||
by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
|
||||
for <mailman-admin@babylon.socal-raves.org>;
|
||||
Sat, 2 Feb 2002 17:00:06 -0800 (PST)
|
||||
from babylon.socal-raves.org (localhost [127.0.0.1]);
|
||||
by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
|
||||
for <mailman-admin@babylon.socal-raves.org>;
|
||||
Sat, 2 Feb 2002 17:00:06 -0800 (PST)
|
||||
|
||||
|
||||
""")
|
||||
|
||||
|
||||
|
||||
# Test mangling of "From " lines in the body of a message
|
||||
|
@ -476,6 +630,12 @@ class TestMIMEText(unittest.TestCase):
|
|||
self.assertEqual(self._msg.get_payload(), 'hello there\n')
|
||||
self.failUnless(not self._msg.is_multipart())
|
||||
|
||||
def test_charset(self):
|
||||
eq = self.assertEqual
|
||||
msg = MIMEText('hello there', _charset='us-ascii')
|
||||
eq(msg.get_charset().input_charset, 'us-ascii')
|
||||
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
|
||||
|
||||
|
||||
|
||||
# Test a more complicated multipart/mixed type message
|
||||
|
@ -539,6 +699,82 @@ This is the dingus fish.
|
|||
unless(not m0.is_multipart())
|
||||
unless(not m1.is_multipart())
|
||||
|
||||
def test_no_parts_in_a_multipart(self):
|
||||
outer = MIMEBase('multipart', 'mixed')
|
||||
outer['Subject'] = 'A subject'
|
||||
outer['To'] = 'aperson@dom.ain'
|
||||
outer['From'] = 'bperson@dom.ain'
|
||||
outer.preamble = ''
|
||||
outer.epilogue = ''
|
||||
outer.set_boundary('BOUNDARY')
|
||||
msg = MIMEText('hello world')
|
||||
self.assertEqual(outer.as_string(), '''\
|
||||
Content-Type: multipart/mixed; boundary="BOUNDARY"
|
||||
MIME-Version: 1.0
|
||||
Subject: A subject
|
||||
To: aperson@dom.ain
|
||||
From: bperson@dom.ain
|
||||
|
||||
--BOUNDARY
|
||||
|
||||
|
||||
--BOUNDARY--
|
||||
''')
|
||||
|
||||
def test_one_part_in_a_multipart(self):
|
||||
outer = MIMEBase('multipart', 'mixed')
|
||||
outer['Subject'] = 'A subject'
|
||||
outer['To'] = 'aperson@dom.ain'
|
||||
outer['From'] = 'bperson@dom.ain'
|
||||
outer.preamble = ''
|
||||
outer.epilogue = ''
|
||||
outer.set_boundary('BOUNDARY')
|
||||
msg = MIMEText('hello world')
|
||||
outer.attach(msg)
|
||||
self.assertEqual(outer.as_string(), '''\
|
||||
Content-Type: multipart/mixed; boundary="BOUNDARY"
|
||||
MIME-Version: 1.0
|
||||
Subject: A subject
|
||||
To: aperson@dom.ain
|
||||
From: bperson@dom.ain
|
||||
|
||||
--BOUNDARY
|
||||
Content-Type: text/plain; charset="us-ascii"
|
||||
MIME-Version: 1.0
|
||||
Content-Transfer-Encoding: 7bit
|
||||
|
||||
hello world
|
||||
|
||||
--BOUNDARY--
|
||||
''')
|
||||
|
||||
def test_seq_parts_in_a_multipart(self):
|
||||
outer = MIMEBase('multipart', 'mixed')
|
||||
outer['Subject'] = 'A subject'
|
||||
outer['To'] = 'aperson@dom.ain'
|
||||
outer['From'] = 'bperson@dom.ain'
|
||||
outer.preamble = ''
|
||||
outer.epilogue = ''
|
||||
msg = MIMEText('hello world')
|
||||
outer.attach(msg)
|
||||
outer.set_boundary('BOUNDARY')
|
||||
self.assertEqual(outer.as_string(), '''\
|
||||
Content-Type: multipart/mixed; boundary="BOUNDARY"
|
||||
MIME-Version: 1.0
|
||||
Subject: A subject
|
||||
To: aperson@dom.ain
|
||||
From: bperson@dom.ain
|
||||
|
||||
--BOUNDARY
|
||||
Content-Type: text/plain; charset="us-ascii"
|
||||
MIME-Version: 1.0
|
||||
Content-Transfer-Encoding: 7bit
|
||||
|
||||
hello world
|
||||
|
||||
--BOUNDARY--
|
||||
''')
|
||||
|
||||
|
||||
|
||||
# Test some badly formatted messages
|
||||
|
@ -551,7 +787,7 @@ class TestNonConformant(TestEmailBase):
|
|||
self.failUnless(msg.get_subtype() is None)
|
||||
|
||||
def test_bogus_boundary(self):
|
||||
fp = openfile('msg_15.txt')
|
||||
fp = openfile(findfile('msg_15.txt'))
|
||||
try:
|
||||
data = fp.read()
|
||||
finally:
|
||||
|
@ -561,6 +797,10 @@ class TestNonConformant(TestEmailBase):
|
|||
# message into the intended message tree.
|
||||
self.assertRaises(Errors.BoundaryError, p.parsestr, data)
|
||||
|
||||
def test_multipart_no_boundary(self):
|
||||
fp = openfile(findfile('msg_25.txt'))
|
||||
self.assertRaises(Errors.BoundaryError, email.message_from_file, fp)
|
||||
|
||||
|
||||
|
||||
# Test RFC 2047 header encoding and decoding
|
||||
|
@ -570,7 +810,7 @@ class TestRFC2047(unittest.TestCase):
|
|||
s = '=?iso-8859-1?q?this=20is=20some=20text?='
|
||||
eq(Utils.decode(s), 'this is some text')
|
||||
s = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?='
|
||||
eq(Utils.decode(s), u'Keld_J\xf8rn_Simonsen')
|
||||
eq(Utils.decode(s), u'Keld J\xf8rn Simonsen')
|
||||
s = '=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=' \
|
||||
'=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?='
|
||||
eq(Utils.decode(s), 'If you can read this you understand the example.')
|
||||
|
@ -578,6 +818,8 @@ class TestRFC2047(unittest.TestCase):
|
|||
eq(Utils.decode(s),
|
||||
u'\u05dd\u05d5\u05dc\u05e9 \u05df\u05d1 \u05d9\u05dc\u05d8\u05e4\u05e0')
|
||||
s = '=?iso-8859-1?q?this=20is?= =?iso-8859-1?q?some=20text?='
|
||||
eq(Utils.decode(s), u'this issome text')
|
||||
s = '=?iso-8859-1?q?this=20is_?= =?iso-8859-1?q?some=20text?='
|
||||
eq(Utils.decode(s), u'this is some text')
|
||||
|
||||
def test_encode_header(self):
|
||||
|
@ -794,6 +1036,10 @@ class TestIdempotent(unittest.TestCase):
|
|||
msg, text = self._msgobj('msg_23.txt')
|
||||
self._idempotent(msg, text)
|
||||
|
||||
def test_multipart_no_parts(self):
|
||||
msg, text = self._msgobj('msg_24.txt')
|
||||
self._idempotent(msg, text)
|
||||
|
||||
def test_content_type(self):
|
||||
eq = self.assertEquals
|
||||
# Get a message object and reset the seek pointer for other tests
|
||||
|
@ -835,7 +1081,6 @@ class TestIdempotent(unittest.TestCase):
|
|||
self.failUnless(isinstance(msg1.get_payload(), StringType))
|
||||
eq(msg1.get_payload(), '\n')
|
||||
|
||||
|
||||
|
||||
# Test various other bits of the package's functionality
|
||||
class TestMiscellaneous(unittest.TestCase):
|
||||
|
@ -916,49 +1161,77 @@ class TestMiscellaneous(unittest.TestCase):
|
|||
module = __import__('email')
|
||||
all = module.__all__
|
||||
all.sort()
|
||||
self.assertEqual(all, ['Encoders', 'Errors', 'Generator', 'Iterators',
|
||||
'MIMEAudio', 'MIMEBase', 'MIMEImage',
|
||||
'MIMEMessage', 'MIMEText', 'Message', 'Parser',
|
||||
'Utils',
|
||||
'message_from_file', 'message_from_string'])
|
||||
self.assertEqual(all, ['Charset', 'Encoders', 'Errors', 'Generator',
|
||||
'Header', 'Iterators', 'MIMEAudio',
|
||||
'MIMEBase', 'MIMEImage', 'MIMEMessage',
|
||||
'MIMEText', 'Message', 'Parser',
|
||||
'Utils', 'base64MIME',
|
||||
'message_from_file', 'message_from_string',
|
||||
'quopriMIME'])
|
||||
|
||||
def test_formatdate(self):
|
||||
now = 1005327232.109884
|
||||
gm_epoch = time.gmtime(0)[0:3]
|
||||
loc_epoch = time.localtime(0)[0:3]
|
||||
# When does the epoch start?
|
||||
if gm_epoch == (1970, 1, 1):
|
||||
# traditional Unix epoch
|
||||
matchdate = 'Fri, 09 Nov 2001 17:33:52 -0000'
|
||||
elif loc_epoch == (1904, 1, 1):
|
||||
# Mac epoch
|
||||
matchdate = 'Sat, 09 Nov 1935 16:33:52 -0000'
|
||||
else:
|
||||
matchdate = "I don't understand your epoch"
|
||||
gdate = Utils.formatdate(now)
|
||||
self.assertEqual(gdate, matchdate)
|
||||
now = time.time()
|
||||
self.assertEqual(Utils.parsedate(Utils.formatdate(now))[:6],
|
||||
time.gmtime(now)[:6])
|
||||
|
||||
def test_formatdate_localtime(self):
|
||||
now = 1005327232.109884
|
||||
ldate = Utils.formatdate(now, localtime=1)
|
||||
zone = ldate.split()[5]
|
||||
offset = int(zone[1:3]) * 3600 + int(zone[-2:]) * 60
|
||||
# Remember offset is in seconds west of UTC, but the timezone is in
|
||||
# minutes east of UTC, so the signs differ.
|
||||
if zone[0] == '+':
|
||||
offset = -offset
|
||||
if time.daylight and time.localtime(now)[-1]:
|
||||
toff = time.altzone
|
||||
else:
|
||||
toff = time.timezone
|
||||
self.assertEqual(offset, toff)
|
||||
now = time.time()
|
||||
self.assertEqual(
|
||||
Utils.parsedate(Utils.formatdate(now, localtime=1))[:6],
|
||||
time.localtime(now)[:6])
|
||||
|
||||
def test_parsedate_none(self):
|
||||
self.assertEqual(Utils.parsedate(''), None)
|
||||
|
||||
def test_parseaddr_empty(self):
|
||||
self.assertEqual(Utils.parseaddr('<>'), ('', ''))
|
||||
self.assertEqual(Utils.dump_address_pair(Utils.parseaddr('<>')), '')
|
||||
self.assertEqual(Utils.formataddr(Utils.parseaddr('<>')), '')
|
||||
|
||||
def test_noquote_dump(self):
|
||||
self.assertEqual(
|
||||
Utils.formataddr(('A Silly Person', 'person@dom.ain')),
|
||||
'A Silly Person <person@dom.ain>')
|
||||
|
||||
def test_escape_dump(self):
|
||||
self.assertEqual(
|
||||
Utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
|
||||
r'"A \(Very\) Silly Person" <person@dom.ain>')
|
||||
a = r'A \(Special\) Person'
|
||||
b = 'person@dom.ain'
|
||||
self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b))
|
||||
|
||||
def test_quote_dump(self):
|
||||
self.assertEqual(
|
||||
Utils.formataddr(('A Silly; Person', 'person@dom.ain')),
|
||||
r'"A Silly; Person" <person@dom.ain>')
|
||||
|
||||
def test_fix_eols(self):
|
||||
eq = self.assertEqual
|
||||
eq(Utils.fix_eols('hello'), 'hello')
|
||||
eq(Utils.fix_eols('hello\n'), 'hello\r\n')
|
||||
eq(Utils.fix_eols('hello\r'), 'hello\r\n')
|
||||
eq(Utils.fix_eols('hello\r\n'), 'hello\r\n')
|
||||
eq(Utils.fix_eols('hello\n\r'), 'hello\r\n\r\n')
|
||||
|
||||
def test_charset_richcomparisons(self):
|
||||
eq = self.assertEqual
|
||||
ne = self.failIfEqual
|
||||
cset1 = Charset()
|
||||
cset2 = Charset()
|
||||
eq(cset1, 'us-ascii')
|
||||
eq(cset1, 'US-ASCII')
|
||||
eq(cset1, 'Us-AsCiI')
|
||||
eq('us-ascii', cset1)
|
||||
eq('US-ASCII', cset1)
|
||||
eq('Us-AsCiI', cset1)
|
||||
ne(cset1, 'usascii')
|
||||
ne(cset1, 'USASCII')
|
||||
ne(cset1, 'UsAsCiI')
|
||||
ne('usascii', cset1)
|
||||
ne('USASCII', cset1)
|
||||
ne('UsAsCiI', cset1)
|
||||
eq(cset1, cset2)
|
||||
eq(cset2, cset1)
|
||||
|
||||
|
||||
|
||||
|
@ -983,8 +1256,12 @@ class TestIterators(TestEmailBase):
|
|||
eq = self.assertEqual
|
||||
msg = self._msgobj('msg_04.txt')
|
||||
it = Iterators.typed_subpart_iterator(msg, 'text')
|
||||
lines = [subpart.get_payload() for subpart in it]
|
||||
eq(len(lines), 2)
|
||||
lines = []
|
||||
subparts = 0
|
||||
for subpart in it:
|
||||
subparts += 1
|
||||
lines.append(subpart.get_payload())
|
||||
eq(subparts, 2)
|
||||
eq(EMPTYSTRING.join(lines), """\
|
||||
a simple kind of mirror
|
||||
to reflect upon our own
|
||||
|
@ -1011,6 +1288,7 @@ Do you like this message?
|
|||
-Me
|
||||
""")
|
||||
|
||||
|
||||
|
||||
class TestParsers(unittest.TestCase):
|
||||
def test_header_parser(self):
|
||||
|
@ -1025,6 +1303,274 @@ class TestParsers(unittest.TestCase):
|
|||
eq(msg.is_multipart(), 0)
|
||||
self.failUnless(isinstance(msg.get_payload(), StringType))
|
||||
|
||||
def test_whitespace_continuaton(self):
|
||||
eq = self.assertEqual
|
||||
# This message contains a line after the Subject: header that has only
|
||||
# whitespace, but it is not empty!
|
||||
msg = email.message_from_string("""\
|
||||
From: aperson@dom.ain
|
||||
To: bperson@dom.ain
|
||||
Subject: the next line has a space on it
|
||||
|
||||
Date: Mon, 8 Apr 2002 15:09:19 -0400
|
||||
Message-ID: spam
|
||||
|
||||
Here's the message body
|
||||
""")
|
||||
eq(msg['subject'], 'the next line has a space on it\n ')
|
||||
eq(msg['message-id'], 'spam')
|
||||
eq(msg.get_payload(), "Here's the message body\n")
|
||||
|
||||
|
||||
|
||||
class TestBase64(unittest.TestCase):
|
||||
def test_len(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64MIME.base64_len('hello'),
|
||||
len(base64MIME.encode('hello', eol='')))
|
||||
for size in range(15):
|
||||
if size == 0 : bsize = 0
|
||||
elif size <= 3 : bsize = 4
|
||||
elif size <= 6 : bsize = 8
|
||||
elif size <= 9 : bsize = 12
|
||||
elif size <= 12: bsize = 16
|
||||
else : bsize = 20
|
||||
eq(base64MIME.base64_len('x'*size), bsize)
|
||||
|
||||
def test_decode(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64MIME.decode(''), '')
|
||||
eq(base64MIME.decode('aGVsbG8='), 'hello')
|
||||
eq(base64MIME.decode('aGVsbG8=', 'X'), 'hello')
|
||||
eq(base64MIME.decode('aGVsbG8NCndvcmxk\n', 'X'), 'helloXworld')
|
||||
|
||||
def test_encode(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64MIME.encode(''), '')
|
||||
eq(base64MIME.encode('hello'), 'aGVsbG8=\n')
|
||||
# Test the binary flag
|
||||
eq(base64MIME.encode('hello\n'), 'aGVsbG8K\n')
|
||||
eq(base64MIME.encode('hello\n', 0), 'aGVsbG8NCg==\n')
|
||||
# Test the maxlinelen arg
|
||||
eq(base64MIME.encode('xxxx ' * 20, maxlinelen=40), """\
|
||||
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
|
||||
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
|
||||
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
|
||||
eHh4eCB4eHh4IA==
|
||||
""")
|
||||
# Test the eol argument
|
||||
eq(base64MIME.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
|
||||
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
|
||||
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
|
||||
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
|
||||
eHh4eCB4eHh4IA==\r
|
||||
""")
|
||||
|
||||
def test_header_encode(self):
|
||||
eq = self.assertEqual
|
||||
he = base64MIME.header_encode
|
||||
eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
|
||||
eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
|
||||
# Test the charset option
|
||||
eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
|
||||
# Test the keep_eols flag
|
||||
eq(he('hello\nworld', keep_eols=1),
|
||||
'=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
|
||||
# Test the maxlinelen argument
|
||||
eq(he('xxxx ' * 20, maxlinelen=40), """\
|
||||
=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?=
|
||||
=?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?=
|
||||
=?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?=
|
||||
=?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?=
|
||||
=?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?=
|
||||
=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""")
|
||||
# Test the eol argument
|
||||
eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
|
||||
=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?=\r
|
||||
=?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?=\r
|
||||
=?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?=\r
|
||||
=?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?=\r
|
||||
=?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?=\r
|
||||
=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""")
|
||||
|
||||
|
||||
|
||||
class TestQuopri(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.hlit = [chr(x) for x in range(ord('a'), ord('z')+1)] + \
|
||||
[chr(x) for x in range(ord('A'), ord('Z')+1)] + \
|
||||
[chr(x) for x in range(ord('0'), ord('9')+1)] + \
|
||||
['!', '*', '+', '-', '/', ' ']
|
||||
self.hnon = [chr(x) for x in range(256) if chr(x) not in self.hlit]
|
||||
assert len(self.hlit) + len(self.hnon) == 256
|
||||
self.blit = [chr(x) for x in range(ord(' '), ord('~')+1)] + ['\t']
|
||||
self.blit.remove('=')
|
||||
self.bnon = [chr(x) for x in range(256) if chr(x) not in self.blit]
|
||||
assert len(self.blit) + len(self.bnon) == 256
|
||||
|
||||
def test_header_quopri_check(self):
|
||||
for c in self.hlit:
|
||||
self.failIf(quopriMIME.header_quopri_check(c))
|
||||
for c in self.hnon:
|
||||
self.failUnless(quopriMIME.header_quopri_check(c))
|
||||
|
||||
def test_body_quopri_check(self):
|
||||
for c in self.blit:
|
||||
self.failIf(quopriMIME.body_quopri_check(c))
|
||||
for c in self.bnon:
|
||||
self.failUnless(quopriMIME.body_quopri_check(c))
|
||||
|
||||
def test_header_quopri_len(self):
|
||||
eq = self.assertEqual
|
||||
hql = quopriMIME.header_quopri_len
|
||||
enc = quopriMIME.header_encode
|
||||
for s in ('hello', 'h@e@l@l@o@'):
|
||||
# Empty charset and no line-endings. 7 == RFC chrome
|
||||
eq(hql(s), len(enc(s, charset='', eol=''))-7)
|
||||
for c in self.hlit:
|
||||
eq(hql(c), 1)
|
||||
for c in self.hnon:
|
||||
eq(hql(c), 3)
|
||||
|
||||
def test_body_quopri_len(self):
|
||||
eq = self.assertEqual
|
||||
bql = quopriMIME.body_quopri_len
|
||||
for c in self.blit:
|
||||
eq(bql(c), 1)
|
||||
for c in self.bnon:
|
||||
eq(bql(c), 3)
|
||||
|
||||
def test_quote_unquote_idempotent(self):
|
||||
for x in range(256):
|
||||
c = chr(x)
|
||||
self.assertEqual(quopriMIME.unquote(quopriMIME.quote(c)), c)
|
||||
|
||||
def test_header_encode(self):
|
||||
eq = self.assertEqual
|
||||
he = quopriMIME.header_encode
|
||||
eq(he('hello'), '=?iso-8859-1?q?hello?=')
|
||||
eq(he('hello\nworld'), '=?iso-8859-1?q?hello=0D=0Aworld?=')
|
||||
# Test the charset option
|
||||
eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
|
||||
# Test the keep_eols flag
|
||||
eq(he('hello\nworld', keep_eols=1), '=?iso-8859-1?q?hello=0Aworld?=')
|
||||
# Test a non-ASCII character
|
||||
eq(he('helloÇthere'), '=?iso-8859-1?q?hello=C7there?=')
|
||||
# Test the maxlinelen argument
|
||||
eq(he('xxxx ' * 20, maxlinelen=40), """\
|
||||
=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
|
||||
=?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
|
||||
=?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?=
|
||||
=?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?=
|
||||
=?iso-8859-1?q?x_xxxx_xxxx_?=""")
|
||||
# Test the eol argument
|
||||
eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
|
||||
=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=\r
|
||||
=?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=\r
|
||||
=?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?=\r
|
||||
=?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?=\r
|
||||
=?iso-8859-1?q?x_xxxx_xxxx_?=""")
|
||||
|
||||
def test_decode(self):
|
||||
eq = self.assertEqual
|
||||
eq(quopriMIME.decode(''), '')
|
||||
eq(quopriMIME.decode('hello'), 'hello')
|
||||
eq(quopriMIME.decode('hello', 'X'), 'hello')
|
||||
eq(quopriMIME.decode('hello\nworld', 'X'), 'helloXworld')
|
||||
|
||||
def test_encode(self):
|
||||
eq = self.assertEqual
|
||||
eq(quopriMIME.encode(''), '')
|
||||
eq(quopriMIME.encode('hello'), 'hello')
|
||||
# Test the binary flag
|
||||
eq(quopriMIME.encode('hello\r\nworld'), 'hello\nworld')
|
||||
eq(quopriMIME.encode('hello\r\nworld', 0), 'hello\nworld')
|
||||
# Test the maxlinelen arg
|
||||
eq(quopriMIME.encode('xxxx ' * 20, maxlinelen=40), """\
|
||||
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
|
||||
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
|
||||
x xxxx xxxx xxxx xxxx=20""")
|
||||
# Test the eol argument
|
||||
eq(quopriMIME.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
|
||||
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
|
||||
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
|
||||
x xxxx xxxx xxxx xxxx=20""")
|
||||
eq(quopriMIME.encode("""\
|
||||
one line
|
||||
|
||||
two line"""), """\
|
||||
one line
|
||||
|
||||
two line""")
|
||||
|
||||
|
||||
|
||||
# Test the Charset class
|
||||
class TestCharset(unittest.TestCase):
|
||||
def test_idempotent(self):
|
||||
eq = self.assertEqual
|
||||
# Make sure us-ascii = no Unicode conversion
|
||||
c = Charset('us-ascii')
|
||||
s = 'Hello World!'
|
||||
sp = c.to_splittable(s)
|
||||
eq(s, c.from_splittable(sp))
|
||||
# test 8-bit idempotency with us-ascii
|
||||
s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
|
||||
sp = c.to_splittable(s)
|
||||
eq(s, c.from_splittable(sp))
|
||||
|
||||
|
||||
|
||||
# Test multilingual MIME headers.
|
||||
class TestHeader(unittest.TestCase):
|
||||
def test_simple(self):
|
||||
eq = self.assertEqual
|
||||
h = Header('Hello World!')
|
||||
eq(h.encode(), 'Hello World!')
|
||||
h.append('Goodbye World!')
|
||||
eq(h.encode(), 'Hello World! Goodbye World!')
|
||||
|
||||
def test_header_needs_no_decoding(self):
|
||||
h = 'no decoding needed'
|
||||
self.assertEqual(decode_header(h), [(h, None)])
|
||||
|
||||
def test_long(self):
|
||||
h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
|
||||
maxlinelen=76)
|
||||
for l in h.encode().split('\n '):
|
||||
self.failUnless(len(l) <= 76)
|
||||
|
||||
def test_multilingual(self):
|
||||
eq = self.assertEqual
|
||||
g = Charset("iso-8859-1")
|
||||
cz = Charset("iso-8859-2")
|
||||
utf8 = Charset("utf-8")
|
||||
g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
|
||||
cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
|
||||
utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
|
||||
h = Header(g_head, g)
|
||||
h.append(cz_head, cz)
|
||||
h.append(utf8_head, utf8)
|
||||
enc = h.encode()
|
||||
eq(enc, """=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?=
|
||||
=?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?=
|
||||
=?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?=
|
||||
=?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?=
|
||||
=?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?=
|
||||
=?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
|
||||
=?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?=
|
||||
=?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
|
||||
=?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?=
|
||||
=?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?=
|
||||
=?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?=
|
||||
=?utf-8?b?cyBOdW5zdHVjayBnaXQgdW5k?=
|
||||
=?utf-8?b?IFNsb3Rlcm1leWVyPyBKYSEgQmVpaGVyaHVuZCBkYXMgT2Rl?=
|
||||
=?utf-8?b?ciBkaWUgRmxpcHBlcndhbGR0?=
|
||||
=?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
|
||||
eq(decode_header(enc),
|
||||
[(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"),
|
||||
(utf8_head, "utf-8")])
|
||||
|
||||
|
||||
|
||||
def suite():
|
||||
|
@ -1044,13 +1590,13 @@ def suite():
|
|||
suite.addTest(unittest.makeSuite(TestMiscellaneous))
|
||||
suite.addTest(unittest.makeSuite(TestIterators))
|
||||
suite.addTest(unittest.makeSuite(TestParsers))
|
||||
suite.addTest(unittest.makeSuite(TestBase64))
|
||||
suite.addTest(unittest.makeSuite(TestQuopri))
|
||||
suite.addTest(unittest.makeSuite(TestHeader))
|
||||
suite.addTest(unittest.makeSuite(TestCharset))
|
||||
return suite
|
||||
|
||||
|
||||
|
||||
def test_main():
|
||||
from test_support import run_suite
|
||||
run_suite(suite())
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_main()
|
||||
unittest.main(defaultTest='suite')
|
||||
|
|
51
Lib/test/test_email_codecs.py
Normal file
51
Lib/test/test_email_codecs.py
Normal file
|
@ -0,0 +1,51 @@
|
|||
# Copyright (C) 2002 Python Software Foundation
|
||||
# email package unit tests for (optional) Asian codecs
|
||||
|
||||
import unittest
|
||||
from test_support import TestSkipped
|
||||
|
||||
from email.Charset import Charset
|
||||
from email.Header import Header, decode_header
|
||||
|
||||
|
||||
# See if we have the Japanese codecs package installed
|
||||
try:
|
||||
unicode('foo', 'japanese.iso-2022-jp')
|
||||
except LookupError:
|
||||
raise TestSkipped, 'Optional Japanese codecs not installed'
|
||||
|
||||
|
||||
|
||||
class TestEmailAsianCodecs(unittest.TestCase):
|
||||
def test_japanese_codecs(self):
|
||||
eq = self.assertEqual
|
||||
j = Charset("euc-jp")
|
||||
g = Charset("iso-8859-1")
|
||||
h = Header("Hello World!")
|
||||
jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa'
|
||||
ghello = 'Gr\xfc\xdf Gott!'
|
||||
h.append(jhello, j)
|
||||
h.append(ghello, g)
|
||||
eq(h.encode(), 'Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=\n =?iso-8859-1?q?Gr=FC=DF_Gott!?=')
|
||||
eq(decode_header(h.encode()),
|
||||
[('Hello World!', None),
|
||||
('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
|
||||
('Gr\xfc\xdf Gott!', 'iso-8859-1')])
|
||||
long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9'
|
||||
h = Header(long, j, header_name="Subject")
|
||||
# test a very long header
|
||||
enc = h.encode()
|
||||
eq(enc, '=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYRsoQg==?=\n =?iso-2022-jp?b?GyRCITwlayRPO0oycTxUJE4+NRsoQg==?=\n =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=')
|
||||
eq(decode_header(enc), [("test-ja \x1b$B$XEj9F$5$l$?%a\x1b(B\x1b$B!<%k$O;J2q<T$N>5\x1b(B\x1b$BG'$rBT$C$F$$$^$9\x1b(B", 'iso-2022-jp')])
|
||||
|
||||
|
||||
|
||||
def suite():
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(unittest.makeSuite(TestEmailAsianCodecs))
|
||||
return suite
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main(defaultTest='suite')
|
Loading…
Add table
Add a link
Reference in a new issue