#10686: recode non-ASCII headers to 'unknown-8bit' instead of ?s.

This applies only when generating strings from non-RFC compliant binary
input; it makes the existing recoding behavior more consistent (ie:
now no data is lost when recoding).
This commit is contained in:
R. David Murray 2011-01-07 23:25:30 +00:00
parent 6f0022d84a
commit 9253214fd9
9 changed files with 109 additions and 62 deletions

View file

@ -17,7 +17,8 @@ import email.quoprimime
import email.base64mime
from email.errors import HeaderParseError
from email.charset import Charset
from email import charset as _charset
Charset = _charset.Charset
NL = '\n'
SPACE = ' '
@ -210,6 +211,9 @@ class Header:
# from a charset to None/us-ascii, or from None/us-ascii to a
# charset. Only do this for the second and subsequent chunks.
nextcs = charset
if nextcs == _charset.UNKNOWN8BIT:
original_bytes = string.encode('ascii', 'surrogateescape')
string = original_bytes.decode('ascii', 'replace')
if uchunks:
if lastcs not in (None, 'us-ascii'):
if nextcs in (None, 'us-ascii'):
@ -263,7 +267,8 @@ class Header:
# Ensure that the bytes we're storing can be decoded to the output
# character set, otherwise an early error is thrown.
output_charset = charset.output_codec or 'us-ascii'
s.encode(output_charset, errors)
if output_charset != _charset.UNKNOWN8BIT:
s.encode(output_charset, errors)
self._chunks.append((s, charset))
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):