mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
#18891: Complete new provisional email API.
This adds EmailMessage and, MIMEPart subclasses of Message with new API methods, and a ContentManager class used by the new methods. Also a new policy setting, content_manager. Patch was reviewed by Stephen J. Turnbull and Serhiy Storchaka, and reflects their feedback. I will ideally add some examples of using the new API to the documentation before the final release.
This commit is contained in:
parent
1a16288197
commit
3da240fd01
15 changed files with 2539 additions and 26 deletions
249
Lib/email/contentmanager.py
Normal file
249
Lib/email/contentmanager.py
Normal file
|
@ -0,0 +1,249 @@
|
|||
import binascii
|
||||
import email.charset
|
||||
import email.message
|
||||
import email.errors
|
||||
from email import quoprimime
|
||||
|
||||
class ContentManager:
|
||||
|
||||
def __init__(self):
|
||||
self.get_handlers = {}
|
||||
self.set_handlers = {}
|
||||
|
||||
def add_get_handler(self, key, handler):
|
||||
self.get_handlers[key] = handler
|
||||
|
||||
def get_content(self, msg, *args, **kw):
|
||||
content_type = msg.get_content_type()
|
||||
if content_type in self.get_handlers:
|
||||
return self.get_handlers[content_type](msg, *args, **kw)
|
||||
maintype = msg.get_content_maintype()
|
||||
if maintype in self.get_handlers:
|
||||
return self.get_handlers[maintype](msg, *args, **kw)
|
||||
if '' in self.get_handlers:
|
||||
return self.get_handlers[''](msg, *args, **kw)
|
||||
raise KeyError(content_type)
|
||||
|
||||
def add_set_handler(self, typekey, handler):
|
||||
self.set_handlers[typekey] = handler
|
||||
|
||||
def set_content(self, msg, obj, *args, **kw):
|
||||
if msg.get_content_maintype() == 'multipart':
|
||||
# XXX: is this error a good idea or not? We can remove it later,
|
||||
# but we can't add it later, so do it for now.
|
||||
raise TypeError("set_content not valid on multipart")
|
||||
handler = self._find_set_handler(msg, obj)
|
||||
msg.clear_content()
|
||||
handler(msg, obj, *args, **kw)
|
||||
|
||||
def _find_set_handler(self, msg, obj):
|
||||
full_path_for_error = None
|
||||
for typ in type(obj).__mro__:
|
||||
if typ in self.set_handlers:
|
||||
return self.set_handlers[typ]
|
||||
qname = typ.__qualname__
|
||||
modname = getattr(typ, '__module__', '')
|
||||
full_path = '.'.join((modname, qname)) if modname else qname
|
||||
if full_path_for_error is None:
|
||||
full_path_for_error = full_path
|
||||
if full_path in self.set_handlers:
|
||||
return self.set_handlers[full_path]
|
||||
if qname in self.set_handlers:
|
||||
return self.set_handlers[qname]
|
||||
name = typ.__name__
|
||||
if name in self.set_handlers:
|
||||
return self.set_handlers[name]
|
||||
if None in self.set_handlers:
|
||||
return self.set_handlers[None]
|
||||
raise KeyError(full_path_for_error)
|
||||
|
||||
|
||||
raw_data_manager = ContentManager()
|
||||
|
||||
|
||||
def get_text_content(msg, errors='replace'):
|
||||
content = msg.get_payload(decode=True)
|
||||
charset = msg.get_param('charset', 'ASCII')
|
||||
return content.decode(charset, errors=errors)
|
||||
raw_data_manager.add_get_handler('text', get_text_content)
|
||||
|
||||
|
||||
def get_non_text_content(msg):
|
||||
return msg.get_payload(decode=True)
|
||||
for maintype in 'audio image video application'.split():
|
||||
raw_data_manager.add_get_handler(maintype, get_non_text_content)
|
||||
|
||||
|
||||
def get_message_content(msg):
|
||||
return msg.get_payload(0)
|
||||
for subtype in 'rfc822 external-body'.split():
|
||||
raw_data_manager.add_get_handler('message/'+subtype, get_message_content)
|
||||
|
||||
|
||||
def get_and_fixup_unknown_message_content(msg):
|
||||
# If we don't understand a message subtype, we are supposed to treat it as
|
||||
# if it were application/octet-stream, per
|
||||
# tools.ietf.org/html/rfc2046#section-5.2.4. Feedparser doesn't do that,
|
||||
# so do our best to fix things up. Note that it is *not* appropriate to
|
||||
# model message/partial content as Message objects, so they are handled
|
||||
# here as well. (How to reassemble them is out of scope for this comment :)
|
||||
return bytes(msg.get_payload(0))
|
||||
raw_data_manager.add_get_handler('message',
|
||||
get_and_fixup_unknown_message_content)
|
||||
|
||||
|
||||
def _prepare_set(msg, maintype, subtype, headers):
|
||||
msg['Content-Type'] = '/'.join((maintype, subtype))
|
||||
if headers:
|
||||
if not hasattr(headers[0], 'name'):
|
||||
mp = msg.policy
|
||||
headers = [mp.header_factory(*mp.header_source_parse([header]))
|
||||
for header in headers]
|
||||
try:
|
||||
for header in headers:
|
||||
if header.defects:
|
||||
raise header.defects[0]
|
||||
msg[header.name] = header
|
||||
except email.errors.HeaderDefect as exc:
|
||||
raise ValueError("Invalid header: {}".format(
|
||||
header.fold(policy=msg.policy))) from exc
|
||||
|
||||
|
||||
def _finalize_set(msg, disposition, filename, cid, params):
|
||||
if disposition is None and filename is not None:
|
||||
disposition = 'attachment'
|
||||
if disposition is not None:
|
||||
msg['Content-Disposition'] = disposition
|
||||
if filename is not None:
|
||||
msg.set_param('filename',
|
||||
filename,
|
||||
header='Content-Disposition',
|
||||
replace=True)
|
||||
if cid is not None:
|
||||
msg['Content-ID'] = cid
|
||||
if params is not None:
|
||||
for key, value in params.items():
|
||||
msg.set_param(key, value)
|
||||
|
||||
|
||||
# XXX: This is a cleaned-up version of base64mime.body_encode. It would
|
||||
# be nice to drop both this and quoprimime.body_encode in favor of
|
||||
# enhanced binascii routines that accepted a max_line_length parameter.
|
||||
def _encode_base64(data, max_line_length):
|
||||
encoded_lines = []
|
||||
unencoded_bytes_per_line = max_line_length * 3 // 4
|
||||
for i in range(0, len(data), unencoded_bytes_per_line):
|
||||
thisline = data[i:i+unencoded_bytes_per_line]
|
||||
encoded_lines.append(binascii.b2a_base64(thisline).decode('ascii'))
|
||||
return ''.join(encoded_lines)
|
||||
|
||||
|
||||
def _encode_text(string, charset, cte, policy):
|
||||
lines = string.encode(charset).splitlines()
|
||||
linesep = policy.linesep.encode('ascii')
|
||||
def embeded_body(lines): return linesep.join(lines) + linesep
|
||||
def normal_body(lines): return b'\n'.join(lines) + b'\n'
|
||||
if cte==None:
|
||||
# Use heuristics to decide on the "best" encoding.
|
||||
try:
|
||||
return '7bit', normal_body(lines).decode('ascii')
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
if (policy.cte_type == '8bit' and
|
||||
max(len(x) for x in lines) <= policy.max_line_length):
|
||||
return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
|
||||
sniff = embeded_body(lines[:10])
|
||||
sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'),
|
||||
policy.max_line_length)
|
||||
sniff_base64 = binascii.b2a_base64(sniff)
|
||||
# This is a little unfair to qp; it includes lineseps, base64 doesn't.
|
||||
if len(sniff_qp) > len(sniff_base64):
|
||||
cte = 'base64'
|
||||
else:
|
||||
cte = 'quoted-printable'
|
||||
if len(lines) <= 10:
|
||||
return cte, sniff_qp
|
||||
if cte == '7bit':
|
||||
data = normal_body(lines).decode('ascii')
|
||||
elif cte == '8bit':
|
||||
data = normal_body(lines).decode('ascii', 'surrogateescape')
|
||||
elif cte == 'quoted-printable':
|
||||
data = quoprimime.body_encode(normal_body(lines).decode('latin-1'),
|
||||
policy.max_line_length)
|
||||
elif cte == 'base64':
|
||||
data = _encode_base64(embeded_body(lines), policy.max_line_length)
|
||||
else:
|
||||
raise ValueError("Unknown content transfer encoding {}".format(cte))
|
||||
return cte, data
|
||||
|
||||
|
||||
def set_text_content(msg, string, subtype="plain", charset='utf-8', cte=None,
|
||||
disposition=None, filename=None, cid=None,
|
||||
params=None, headers=None):
|
||||
_prepare_set(msg, 'text', subtype, headers)
|
||||
cte, payload = _encode_text(string, charset, cte, msg.policy)
|
||||
msg.set_payload(payload)
|
||||
msg.set_param('charset',
|
||||
email.charset.ALIASES.get(charset, charset),
|
||||
replace=True)
|
||||
msg['Content-Transfer-Encoding'] = cte
|
||||
_finalize_set(msg, disposition, filename, cid, params)
|
||||
raw_data_manager.add_set_handler(str, set_text_content)
|
||||
|
||||
|
||||
def set_message_content(msg, message, subtype="rfc822", cte=None,
|
||||
disposition=None, filename=None, cid=None,
|
||||
params=None, headers=None):
|
||||
if subtype == 'partial':
|
||||
raise ValueError("message/partial is not supported for Message objects")
|
||||
if subtype == 'rfc822':
|
||||
if cte not in (None, '7bit', '8bit', 'binary'):
|
||||
# http://tools.ietf.org/html/rfc2046#section-5.2.1 mandate.
|
||||
raise ValueError(
|
||||
"message/rfc822 parts do not support cte={}".format(cte))
|
||||
# 8bit will get coerced on serialization if policy.cte_type='7bit'. We
|
||||
# may end up claiming 8bit when it isn't needed, but the only negative
|
||||
# result of that should be a gateway that needs to coerce to 7bit
|
||||
# having to look through the whole embedded message to discover whether
|
||||
# or not it actually has to do anything.
|
||||
cte = '8bit' if cte is None else cte
|
||||
elif subtype == 'external-body':
|
||||
if cte not in (None, '7bit'):
|
||||
# http://tools.ietf.org/html/rfc2046#section-5.2.3 mandate.
|
||||
raise ValueError(
|
||||
"message/external-body parts do not support cte={}".format(cte))
|
||||
cte = '7bit'
|
||||
elif cte is None:
|
||||
# http://tools.ietf.org/html/rfc2046#section-5.2.4 says all future
|
||||
# subtypes should be restricted to 7bit, so assume that.
|
||||
cte = '7bit'
|
||||
_prepare_set(msg, 'message', subtype, headers)
|
||||
msg.set_payload([message])
|
||||
msg['Content-Transfer-Encoding'] = cte
|
||||
_finalize_set(msg, disposition, filename, cid, params)
|
||||
raw_data_manager.add_set_handler(email.message.Message, set_message_content)
|
||||
|
||||
|
||||
def set_bytes_content(msg, data, maintype, subtype, cte='base64',
|
||||
disposition=None, filename=None, cid=None,
|
||||
params=None, headers=None):
|
||||
_prepare_set(msg, maintype, subtype, headers)
|
||||
if cte == 'base64':
|
||||
data = _encode_base64(data, max_line_length=msg.policy.max_line_length)
|
||||
elif cte == 'quoted-printable':
|
||||
# XXX: quoprimime.body_encode won't encode newline characters in data,
|
||||
# so we can't use it. This means max_line_length is ignored. Another
|
||||
# bug to fix later. (Note: encoders.quopri is broken on line ends.)
|
||||
data = binascii.b2a_qp(data, istext=False, header=False, quotetabs=True)
|
||||
data = data.decode('ascii')
|
||||
elif cte == '7bit':
|
||||
# Make sure it really is only ASCII. The early warning here seems
|
||||
# worth the overhead...if you care write your own content manager :).
|
||||
data.encode('ascii')
|
||||
elif cte in ('8bit', 'binary'):
|
||||
data = data.decode('ascii', 'surrogateescape')
|
||||
msg.set_payload(data)
|
||||
msg['Content-Transfer-Encoding'] = cte
|
||||
_finalize_set(msg, disposition, filename, cid, params)
|
||||
for typ in (bytes, bytearray, memoryview):
|
||||
raw_data_manager.add_set_handler(typ, set_bytes_content)
|
|
@ -8,8 +8,6 @@ __all__ = ['Message']
|
|||
|
||||
import re
|
||||
import uu
|
||||
import base64
|
||||
import binascii
|
||||
from io import BytesIO, StringIO
|
||||
|
||||
# Intrapackage imports
|
||||
|
@ -679,7 +677,7 @@ class Message:
|
|||
return failobj
|
||||
|
||||
def set_param(self, param, value, header='Content-Type', requote=True,
|
||||
charset=None, language=''):
|
||||
charset=None, language='', replace=False):
|
||||
"""Set a parameter in the Content-Type header.
|
||||
|
||||
If the parameter already exists in the header, its value will be
|
||||
|
@ -723,8 +721,11 @@ class Message:
|
|||
else:
|
||||
ctype = SEMISPACE.join([ctype, append_param])
|
||||
if ctype != self.get(header):
|
||||
del self[header]
|
||||
self[header] = ctype
|
||||
if replace:
|
||||
self.replace_header(header, ctype)
|
||||
else:
|
||||
del self[header]
|
||||
self[header] = ctype
|
||||
|
||||
def del_param(self, param, header='content-type', requote=True):
|
||||
"""Remove the given parameter completely from the Content-Type header.
|
||||
|
@ -905,3 +906,208 @@ class Message:
|
|||
|
||||
# I.e. def walk(self): ...
|
||||
from email.iterators import walk
|
||||
|
||||
|
||||
class MIMEPart(Message):
|
||||
|
||||
def __init__(self, policy=None):
|
||||
if policy is None:
|
||||
from email.policy import default
|
||||
policy = default
|
||||
Message.__init__(self, policy)
|
||||
|
||||
@property
|
||||
def is_attachment(self):
|
||||
c_d = self.get('content-disposition')
|
||||
if c_d is None:
|
||||
return False
|
||||
return c_d.lower() == 'attachment'
|
||||
|
||||
def _find_body(self, part, preferencelist):
|
||||
if part.is_attachment:
|
||||
return
|
||||
maintype, subtype = part.get_content_type().split('/')
|
||||
if maintype == 'text':
|
||||
if subtype in preferencelist:
|
||||
yield (preferencelist.index(subtype), part)
|
||||
return
|
||||
if maintype != 'multipart':
|
||||
return
|
||||
if subtype != 'related':
|
||||
for subpart in part.iter_parts():
|
||||
yield from self._find_body(subpart, preferencelist)
|
||||
return
|
||||
if 'related' in preferencelist:
|
||||
yield (preferencelist.index('related'), part)
|
||||
candidate = None
|
||||
start = part.get_param('start')
|
||||
if start:
|
||||
for subpart in part.iter_parts():
|
||||
if subpart['content-id'] == start:
|
||||
candidate = subpart
|
||||
break
|
||||
if candidate is None:
|
||||
subparts = part.get_payload()
|
||||
candidate = subparts[0] if subparts else None
|
||||
if candidate is not None:
|
||||
yield from self._find_body(candidate, preferencelist)
|
||||
|
||||
def get_body(self, preferencelist=('related', 'html', 'plain')):
|
||||
"""Return best candidate mime part for display as 'body' of message.
|
||||
|
||||
Do a depth first search, starting with self, looking for the first part
|
||||
matching each of the items in preferencelist, and return the part
|
||||
corresponding to the first item that has a match, or None if no items
|
||||
have a match. If 'related' is not included in preferencelist, consider
|
||||
the root part of any multipart/related encountered as a candidate
|
||||
match. Ignore parts with 'Content-Disposition: attachment'.
|
||||
"""
|
||||
best_prio = len(preferencelist)
|
||||
body = None
|
||||
for prio, part in self._find_body(self, preferencelist):
|
||||
if prio < best_prio:
|
||||
best_prio = prio
|
||||
body = part
|
||||
if prio == 0:
|
||||
break
|
||||
return body
|
||||
|
||||
_body_types = {('text', 'plain'),
|
||||
('text', 'html'),
|
||||
('multipart', 'related'),
|
||||
('multipart', 'alternative')}
|
||||
def iter_attachments(self):
|
||||
"""Return an iterator over the non-main parts of a multipart.
|
||||
|
||||
Skip the first of each occurrence of text/plain, text/html,
|
||||
multipart/related, or multipart/alternative in the multipart (unless
|
||||
they have a 'Content-Disposition: attachment' header) and include all
|
||||
remaining subparts in the returned iterator. When applied to a
|
||||
multipart/related, return all parts except the root part. Return an
|
||||
empty iterator when applied to a multipart/alternative or a
|
||||
non-multipart.
|
||||
"""
|
||||
maintype, subtype = self.get_content_type().split('/')
|
||||
if maintype != 'multipart' or subtype == 'alternative':
|
||||
return
|
||||
parts = self.get_payload()
|
||||
if maintype == 'multipart' and subtype == 'related':
|
||||
# For related, we treat everything but the root as an attachment.
|
||||
# The root may be indicated by 'start'; if there's no start or we
|
||||
# can't find the named start, treat the first subpart as the root.
|
||||
start = self.get_param('start')
|
||||
if start:
|
||||
found = False
|
||||
attachments = []
|
||||
for part in parts:
|
||||
if part.get('content-id') == start:
|
||||
found = True
|
||||
else:
|
||||
attachments.append(part)
|
||||
if found:
|
||||
yield from attachments
|
||||
return
|
||||
parts.pop(0)
|
||||
yield from parts
|
||||
return
|
||||
# Otherwise we more or less invert the remaining logic in get_body.
|
||||
# This only really works in edge cases (ex: non-text relateds or
|
||||
# alternatives) if the sending agent sets content-disposition.
|
||||
seen = [] # Only skip the first example of each candidate type.
|
||||
for part in parts:
|
||||
maintype, subtype = part.get_content_type().split('/')
|
||||
if ((maintype, subtype) in self._body_types and
|
||||
not part.is_attachment and subtype not in seen):
|
||||
seen.append(subtype)
|
||||
continue
|
||||
yield part
|
||||
|
||||
def iter_parts(self):
|
||||
"""Return an iterator over all immediate subparts of a multipart.
|
||||
|
||||
Return an empty iterator for a non-multipart.
|
||||
"""
|
||||
if self.get_content_maintype() == 'multipart':
|
||||
yield from self.get_payload()
|
||||
|
||||
def get_content(self, *args, content_manager=None, **kw):
|
||||
if content_manager is None:
|
||||
content_manager = self.policy.content_manager
|
||||
return content_manager.get_content(self, *args, **kw)
|
||||
|
||||
def set_content(self, *args, content_manager=None, **kw):
|
||||
if content_manager is None:
|
||||
content_manager = self.policy.content_manager
|
||||
content_manager.set_content(self, *args, **kw)
|
||||
|
||||
def _make_multipart(self, subtype, disallowed_subtypes, boundary):
|
||||
if self.get_content_maintype() == 'multipart':
|
||||
existing_subtype = self.get_content_subtype()
|
||||
disallowed_subtypes = disallowed_subtypes + (subtype,)
|
||||
if existing_subtype in disallowed_subtypes:
|
||||
raise ValueError("Cannot convert {} to {}".format(
|
||||
existing_subtype, subtype))
|
||||
keep_headers = []
|
||||
part_headers = []
|
||||
for name, value in self._headers:
|
||||
if name.lower().startswith('content-'):
|
||||
part_headers.append((name, value))
|
||||
else:
|
||||
keep_headers.append((name, value))
|
||||
if part_headers:
|
||||
# There is existing content, move it to the first subpart.
|
||||
part = type(self)(policy=self.policy)
|
||||
part._headers = part_headers
|
||||
part._payload = self._payload
|
||||
self._payload = [part]
|
||||
else:
|
||||
self._payload = []
|
||||
self._headers = keep_headers
|
||||
self['Content-Type'] = 'multipart/' + subtype
|
||||
if boundary is not None:
|
||||
self.set_param('boundary', boundary)
|
||||
|
||||
def make_related(self, boundary=None):
|
||||
self._make_multipart('related', ('alternative', 'mixed'), boundary)
|
||||
|
||||
def make_alternative(self, boundary=None):
|
||||
self._make_multipart('alternative', ('mixed',), boundary)
|
||||
|
||||
def make_mixed(self, boundary=None):
|
||||
self._make_multipart('mixed', (), boundary)
|
||||
|
||||
def _add_multipart(self, _subtype, *args, _disp=None, **kw):
|
||||
if (self.get_content_maintype() != 'multipart' or
|
||||
self.get_content_subtype() != _subtype):
|
||||
getattr(self, 'make_' + _subtype)()
|
||||
part = type(self)(policy=self.policy)
|
||||
part.set_content(*args, **kw)
|
||||
if _disp and 'content-disposition' not in part:
|
||||
part['Content-Disposition'] = _disp
|
||||
self.attach(part)
|
||||
|
||||
def add_related(self, *args, **kw):
|
||||
self._add_multipart('related', *args, _disp='inline', **kw)
|
||||
|
||||
def add_alternative(self, *args, **kw):
|
||||
self._add_multipart('alternative', *args, **kw)
|
||||
|
||||
def add_attachment(self, *args, **kw):
|
||||
self._add_multipart('mixed', *args, _disp='attachment', **kw)
|
||||
|
||||
def clear(self):
|
||||
self._headers = []
|
||||
self._payload = None
|
||||
|
||||
def clear_content(self):
|
||||
self._headers = [(n, v) for n, v in self._headers
|
||||
if not n.lower().startswith('content-')]
|
||||
self._payload = None
|
||||
|
||||
|
||||
class EmailMessage(MIMEPart):
|
||||
|
||||
def set_content(self, *args, **kw):
|
||||
super().set_content(*args, **kw)
|
||||
if 'MIME-Version' not in self:
|
||||
self['MIME-Version'] = '1.0'
|
||||
|
|
|
@ -5,6 +5,7 @@ code that adds all the email6 features.
|
|||
from email._policybase import Policy, Compat32, compat32, _extend_docstrings
|
||||
from email.utils import _has_surrogates
|
||||
from email.headerregistry import HeaderRegistry as HeaderRegistry
|
||||
from email.contentmanager import raw_data_manager
|
||||
|
||||
__all__ = [
|
||||
'Compat32',
|
||||
|
@ -58,10 +59,22 @@ class EmailPolicy(Policy):
|
|||
special treatment, while all other fields are
|
||||
treated as unstructured. This list will be
|
||||
completed before the extension is marked stable.)
|
||||
|
||||
content_manager -- an object with at least two methods: get_content
|
||||
and set_content. When the get_content or
|
||||
set_content method of a Message object is called,
|
||||
it calls the corresponding method of this object,
|
||||
passing it the message object as its first argument,
|
||||
and any arguments or keywords that were passed to
|
||||
it as additional arguments. The default
|
||||
content_manager is
|
||||
:data:`~email.contentmanager.raw_data_manager`.
|
||||
|
||||
"""
|
||||
|
||||
refold_source = 'long'
|
||||
header_factory = HeaderRegistry()
|
||||
content_manager = raw_data_manager
|
||||
|
||||
def __init__(self, **kw):
|
||||
# Ensure that each new instance gets a unique header factory
|
||||
|
|
|
@ -68,9 +68,13 @@ def _has_surrogates(s):
|
|||
# How to deal with a string containing bytes before handing it to the
|
||||
# application through the 'normal' interface.
|
||||
def _sanitize(string):
|
||||
# Turn any escaped bytes into unicode 'unknown' char.
|
||||
original_bytes = string.encode('ascii', 'surrogateescape')
|
||||
return original_bytes.decode('ascii', 'replace')
|
||||
# Turn any escaped bytes into unicode 'unknown' char. If the escaped
|
||||
# bytes happen to be utf-8 they will instead get decoded, even if they
|
||||
# were invalid in the charset the source was supposed to be in. This
|
||||
# seems like it is not a bad thing; a defect was still registered.
|
||||
original_bytes = string.encode('utf-8', 'surrogateescape')
|
||||
return original_bytes.decode('utf-8', 'replace')
|
||||
|
||||
|
||||
|
||||
# Helpers
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue