_split_header(): The code here was terminally broken because it didn't

know anything about RFC 2047 encoded headers. Fortunately we have a perfectly good header splitter in Header.encode(). So we just call that to give us a properly formatted and split header. Header.encode() didn't know about "highest-level syntactic breaks" but that's been fixed now too.
2025-09-26 18:29:57 +00:00 · 2002-06-28 23:41:42 +00:00 · 2002-06-28 23:41:42 +00:00 · 062749ac57
commit 062749ac57
parent 7c75c99a10
1 changed files with 11 additions and 53 deletions
--- a/Lib/email/Generator.py
+++ b/Lib/email/Generator.py
@ -11,6 +11,8 @@ import random
 from types import ListType, StringType
 from cStringIO import StringIO
 from email.Header import Header
 EMPTYSTRING = ''
 SEMISPACE = '; '
 BAR = '|'
@ -149,17 +151,17 @@ class Generator:
            # headers.
            text = '%s: %s' % (h, v)
            if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
-                text = self._split_header(text)
+                text = self._split_header(h, text)
            print >> self._fp, text
        # A blank line always separates headers from body
        print >> self._fp
-    def _split_header(self, text):
+    def _split_header(self, name, text):
        maxheaderlen = self.__maxheaderlen
        # Find out whether any lines in the header are really longer than
        # maxheaderlen characters wide.  There could be continuation lines
        # that actually shorten it.  Also, replace hard tabs with 8 spaces.
-        lines = [s.replace('\t', SPACE8) for s in text.split('\n')]
+        lines = [s.replace('\t', SPACE8) for s in text.splitlines()]
        for line in lines:
            if len(line) > maxheaderlen:
                break
@ -167,56 +169,12 @@ class Generator:
            # No line was actually longer than maxheaderlen characters, so
            # just return the original unchanged.
            return text
-        rtn = []
+        # The `text' argument already has the field name prepended, so don't
-        for line in text.split('\n'):
+        # provide it here or the first line will get folded too short.
-            splitline = []
+        h = Header(text, maxlinelen=maxheaderlen,
-            # Short lines can remain unchanged
+                   # For backwards compatibility, we use a hard tab here
-            if len(line.replace('\t', SPACE8)) <= maxheaderlen:
+                   continuation_ws='\t')
-                splitline.append(line)
+        return h.encode()
                rtn.append(SEMINLTAB.join(splitline))
            else:
                oldlen = len(line)
                # Try to break the line on semicolons, but if that doesn't
                # work, try to split on folding whitespace.
                while len(line) > maxheaderlen:
                    i = line.rfind(';', 0, maxheaderlen)
                    if i < 0:
                        break
                    splitline.append(line[:i])
                    line = line[i+1:].lstrip()
                if len(line) <> oldlen:
                    # Splitting on semis worked
                    splitline.append(line)
                    rtn.append(SEMINLTAB.join(splitline))
                    continue
                # Splitting on semis didn't help, so try to split on
                # whitespace.
                parts = re.split(r'(\s+)', line)
                # Watch out though for "Header: longnonsplittableline"
                if parts[0].endswith(':') and len(parts) == 3:
                    rtn.append(line)
                    continue
                first = parts.pop(0)
                sublines = [first]
                acc = len(first)
                while parts:
                    len0 = len(parts[0])
                    len1 = len(parts[1])
                    if acc + len0 + len1 < maxheaderlen:
                        sublines.append(parts.pop(0))
                        sublines.append(parts.pop(0))
                        acc += len0 + len1
                    else:
                        # Split it here, but don't forget to ignore the
                        # next whitespace-only part
                        splitline.append(EMPTYSTRING.join(sublines))
                        del parts[0]
                        first = parts.pop(0)
                        sublines = [first]
                        acc = len(first)
                splitline.append(EMPTYSTRING.join(sublines))
                rtn.append(NLTAB.join(splitline))
        return NL.join(rtn)
    #
    # Handlers for writing types and subtypes