mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
#10686: recode non-ASCII headers to 'unknown-8bit' instead of ?s.
This applies only when generating strings from non-RFC compliant binary input; it makes the existing recoding behavior more consistent (ie: now no data is lost when recoding).
This commit is contained in:
parent
6f0022d84a
commit
9253214fd9
9 changed files with 109 additions and 62 deletions
|
@ -17,7 +17,8 @@ import email.quoprimime
|
|||
import email.base64mime
|
||||
|
||||
from email.errors import HeaderParseError
|
||||
from email.charset import Charset
|
||||
from email import charset as _charset
|
||||
Charset = _charset.Charset
|
||||
|
||||
NL = '\n'
|
||||
SPACE = ' '
|
||||
|
@ -210,6 +211,9 @@ class Header:
|
|||
# from a charset to None/us-ascii, or from None/us-ascii to a
|
||||
# charset. Only do this for the second and subsequent chunks.
|
||||
nextcs = charset
|
||||
if nextcs == _charset.UNKNOWN8BIT:
|
||||
original_bytes = string.encode('ascii', 'surrogateescape')
|
||||
string = original_bytes.decode('ascii', 'replace')
|
||||
if uchunks:
|
||||
if lastcs not in (None, 'us-ascii'):
|
||||
if nextcs in (None, 'us-ascii'):
|
||||
|
@ -263,7 +267,8 @@ class Header:
|
|||
# Ensure that the bytes we're storing can be decoded to the output
|
||||
# character set, otherwise an early error is thrown.
|
||||
output_charset = charset.output_codec or 'us-ascii'
|
||||
s.encode(output_charset, errors)
|
||||
if output_charset != _charset.UNKNOWN8BIT:
|
||||
s.encode(output_charset, errors)
|
||||
self._chunks.append((s, charset))
|
||||
|
||||
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue