#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements in control over the parser/generator without requiring any additional complexity in the parser/generator API. Patch reviewed by Éric Araujo and Barry Warsaw.
2025-12-15 21:44:50 +00:00 · 2011-04-18 13:59:37 -04:00 · 2011-04-18 13:59:37 -04:00 · 3edd22ac95
commit 3edd22ac95
parent ce16be91dc
13 changed files with 912 additions and 81 deletions
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@ -13,8 +13,10 @@ import random
 import warnings

 from io import StringIO, BytesIO
+from email import policy
 from email.header import Header
 from email.message import _has_surrogates
+import email.charset as _charset

 UNDERSCORE = '_'
 NL = '\n'  # XXX: no longer used by the code below.
@ -33,7 +35,8 @@ class Generator:
    # Public interface
    #

-    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
+    def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, *,
+                 policy=policy.default):
        """Create the generator for message flattening.

        outfp is the output file-like object for writing the message to.  It
@ -49,16 +52,23 @@ class Generator:
        defined in the Header class.  Set maxheaderlen to zero to disable
        header wrapping.  The default is 78, as recommended (but not required)
        by RFC 2822.
+
+        The policy keyword specifies a policy object that controls a number of
+        aspects of the generator's operation.  The default policy maintains
+        backward compatibility.
+
        """
        self._fp = outfp
        self._mangle_from_ = mangle_from_
-        self._maxheaderlen = maxheaderlen
+        self._maxheaderlen = (maxheaderlen if maxheaderlen is not None else
+                                 policy.max_line_length)
+        self.policy = policy

    def write(self, s):
        # Just delegate to the file object
        self._fp.write(s)

-    def flatten(self, msg, unixfrom=False, linesep='\n'):
+    def flatten(self, msg, unixfrom=False, linesep=None):
        r"""Print the message object tree rooted at msg to the output file
        specified when the Generator instance was created.

@ -70,17 +80,15 @@ class Generator:
        Note that for subobjects, no From_ line is printed.

        linesep specifies the characters used to indicate a new line in
-        the output.  The default value is the most useful for typical
-        Python applications, but it can be set to \r\n to produce RFC-compliant
-        line separators when needed.
+        the output.  The default value is determined by the policy.

        """
        # We use the _XXX constants for operating on data that comes directly
        # from the msg, and _encoded_XXX constants for operating on data that
        # has already been converted (to bytes in the BytesGenerator) and
        # inserted into a temporary buffer.
-        self._NL = linesep
-        self._encoded_NL = self._encode(linesep)
+        self._NL = linesep if linesep is not None else self.policy.linesep
+        self._encoded_NL = self._encode(self._NL)
        self._EMPTY = ''
        self._encoded_EMTPY = self._encode('')
        if unixfrom:
@ -338,7 +346,10 @@ class BytesGenerator(Generator):

    Functionally identical to the base Generator except that the output is
    bytes and not string.  When surrogates were used in the input to encode
-    bytes, these are decoded back to bytes for output.
+    bytes, these are decoded back to bytes for output.  If the policy has
+    must_be_7bit set true, then the message is transformed such that the
+    non-ASCII bytes are properly content transfer encoded, using the
+    charset unknown-8bit.

    The outfp object must accept bytes in its write method.
    """
@ -361,21 +372,22 @@ class BytesGenerator(Generator):
        # strings with 8bit bytes.
        for h, v in msg._headers:
            self.write('%s: ' % h)
-            if isinstance(v, Header):
-                self.write(v.encode(maxlinelen=self._maxheaderlen)+NL)
-            elif _has_surrogates(v):
-                # If we have raw 8bit data in a byte string, we have no idea
-                # what the encoding is.  There is no safe way to split this
-                # string.  If it's ascii-subset, then we could do a normal
-                # ascii split, but if it's multibyte then we could break the
-                # string.  There's no way to know so the least harm seems to
-                # be to not split the string and risk it being too long.
-                self.write(v+NL)
-            else:
-                # Header's got lots of smarts and this string is safe...
-                header = Header(v, maxlinelen=self._maxheaderlen,
-                                header_name=h)
-                self.write(header.encode(linesep=self._NL)+self._NL)
+            if isinstance(v, str):
+                if _has_surrogates(v):
+                    if not self.policy.must_be_7bit:
+                        # If we have raw 8bit data in a byte string, we have no idea
+                        # what the encoding is.  There is no safe way to split this
+                        # string.  If it's ascii-subset, then we could do a normal
+                        # ascii split, but if it's multibyte then we could break the
+                        # string.  There's no way to know so the least harm seems to
+                        # be to not split the string and risk it being too long.
+                        self.write(v+NL)
+                        continue
+                    h = Header(v, charset=_charset.UNKNOWN8BIT, header_name=h)
+                else:
+                    h = Header(v, header_name=h)
+            self.write(h.encode(linesep=self._NL,
+                                maxlinelen=self._maxheaderlen)+self._NL)
        # A blank line always separates headers from body
        self.write(self._NL)

@ -384,7 +396,7 @@ class BytesGenerator(Generator):
        # just write it back out.
        if msg._payload is None:
            return
-        if _has_surrogates(msg._payload):
+        if _has_surrogates(msg._payload) and not self.policy.must_be_7bit:
            self.write(msg._payload)
        else:
            super(BytesGenerator,self)._handle_text(msg)