mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
#10790: make append work when output codec is different from input codec
There's still a bug here (the encode call shouldn't use the 'errors' paramter), but I'll fix that later.
This commit is contained in:
parent
ca1e7ec344
commit
477efb3944
4 changed files with 25 additions and 23 deletions
|
@ -94,14 +94,15 @@ Here is the :class:`Header` class description:
|
||||||
decoded with that character set.
|
decoded with that character set.
|
||||||
|
|
||||||
If *s* is an instance of :class:`str`, then *charset* is a hint specifying
|
If *s* is an instance of :class:`str`, then *charset* is a hint specifying
|
||||||
the character set of the characters in the string. In this case, when
|
the character set of the characters in the string.
|
||||||
producing an :rfc:`2822`\ -compliant header using :rfc:`2047` rules, the
|
|
||||||
Unicode string will be encoded using the following charsets in order:
|
|
||||||
``us-ascii``, the *charset* hint, ``utf-8``. The first character set to
|
|
||||||
not provoke a :exc:`UnicodeError` is used.
|
|
||||||
|
|
||||||
Optional *errors* is passed through to any :func:`encode` or
|
In either case, when producing an :rfc:`2822`\ -compliant header using
|
||||||
:func:`ustr.encode` call, and defaults to "strict".
|
:rfc:`2047` rules, the string will be encoded using the output codec of
|
||||||
|
the charset. If the string cannot be encoded using the output codec, a
|
||||||
|
UnicodeError will be raised.
|
||||||
|
|
||||||
|
Optional *errors* is passed as the errors argument to the decode call
|
||||||
|
if *s* is a byte string.
|
||||||
|
|
||||||
|
|
||||||
.. method:: encode(splitchars=';, \\t', maxlinelen=None, linesep='\\n')
|
.. method:: encode(splitchars=';, \\t', maxlinelen=None, linesep='\\n')
|
||||||
|
|
|
@ -245,32 +245,26 @@ class Header:
|
||||||
that byte string, and a UnicodeError will be raised if the string
|
that byte string, and a UnicodeError will be raised if the string
|
||||||
cannot be decoded with that charset. If s is a Unicode string, then
|
cannot be decoded with that charset. If s is a Unicode string, then
|
||||||
charset is a hint specifying the character set of the characters in
|
charset is a hint specifying the character set of the characters in
|
||||||
the string. In this case, when producing an RFC 2822 compliant header
|
the string. In either case, when producing an RFC 2822 compliant
|
||||||
using RFC 2047 rules, the Unicode string will be encoded using the
|
header using RFC 2047 rules, the string will be encoded using the
|
||||||
following charsets in order: us-ascii, the charset hint, utf-8. The
|
output codec of the charset. If the string cannot be encoded to the
|
||||||
first character set not to provoke a UnicodeError is used.
|
output codec, a UnicodeError will be raised.
|
||||||
|
|
||||||
Optional `errors' is passed as the third argument to any unicode() or
|
Optional `errors' is passed as the errors argument to the decode
|
||||||
ustr.encode() call.
|
call if s is a byte string.
|
||||||
"""
|
"""
|
||||||
if charset is None:
|
if charset is None:
|
||||||
charset = self._charset
|
charset = self._charset
|
||||||
elif not isinstance(charset, Charset):
|
elif not isinstance(charset, Charset):
|
||||||
charset = Charset(charset)
|
charset = Charset(charset)
|
||||||
if isinstance(s, str):
|
if not isinstance(s, str):
|
||||||
# Convert the string from the input character set to the output
|
|
||||||
# character set and store the resulting bytes and the charset for
|
|
||||||
# composition later.
|
|
||||||
input_charset = charset.input_codec or 'us-ascii'
|
input_charset = charset.input_codec or 'us-ascii'
|
||||||
input_bytes = s.encode(input_charset, errors)
|
s = s.decode(input_charset, errors)
|
||||||
else:
|
|
||||||
# We already have the bytes we will store internally.
|
|
||||||
input_bytes = s
|
|
||||||
# Ensure that the bytes we're storing can be decoded to the output
|
# Ensure that the bytes we're storing can be decoded to the output
|
||||||
# character set, otherwise an early error is thrown.
|
# character set, otherwise an early error is thrown.
|
||||||
output_charset = charset.output_codec or 'us-ascii'
|
output_charset = charset.output_codec or 'us-ascii'
|
||||||
output_string = input_bytes.decode(output_charset, errors)
|
s.encode(output_charset, errors)
|
||||||
self._chunks.append((output_string, charset))
|
self._chunks.append((s, charset))
|
||||||
|
|
||||||
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
|
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
|
||||||
"""Encode a message header into an RFC-compliant format.
|
"""Encode a message header into an RFC-compliant format.
|
||||||
|
|
|
@ -3620,6 +3620,10 @@ A very long line that must get split to something other than at the
|
||||||
s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
|
s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
|
||||||
raises(errors.HeaderParseError, decode_header, s)
|
raises(errors.HeaderParseError, decode_header, s)
|
||||||
|
|
||||||
|
def test_shift_jis_charset(self):
|
||||||
|
h = Header('文', charset='shift_jis')
|
||||||
|
self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Test RFC 2231 header parameters (en/de)coding
|
# Test RFC 2231 header parameters (en/de)coding
|
||||||
|
|
|
@ -30,6 +30,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #10790: email.header.Header.append's charset logic now works correctly
|
||||||
|
for charsets whose output codec is different from its input codec.
|
||||||
|
|
||||||
- Issue #10819: SocketIO.name property returns -1 when its closed, instead of
|
- Issue #10819: SocketIO.name property returns -1 when its closed, instead of
|
||||||
raising a ValueError, to fix repr().
|
raising a ValueError, to fix repr().
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue