mirror of
https://github.com/python/cpython.git
synced 2025-10-14 18:59:46 +00:00
Merge #14291: if a header has non-ascii unicode, default to CTE using utf-8
In Python2, if a unicode string was assigned as the value of a header, email would automatically CTE encode it using the UTF8 charset. This capability was lost in the Python3 translation, and this patch restores it. Patch by Ali Ikinci, assisted by R. David Murray. I also added a fix for the mailbox test that was depending (with a comment that it was a bad idea to so depend) on non-ASCII causing message_from_string to raise an error. It now uses support.patch to induce an error during message serialization.
This commit is contained in:
commit
e2922835b0
5 changed files with 33 additions and 7 deletions
|
@ -283,7 +283,12 @@ class Header:
|
||||||
# character set, otherwise an early error is thrown.
|
# character set, otherwise an early error is thrown.
|
||||||
output_charset = charset.output_codec or 'us-ascii'
|
output_charset = charset.output_codec or 'us-ascii'
|
||||||
if output_charset != _charset.UNKNOWN8BIT:
|
if output_charset != _charset.UNKNOWN8BIT:
|
||||||
s.encode(output_charset, errors)
|
try:
|
||||||
|
s.encode(output_charset, errors)
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
if output_charset!='us-ascii':
|
||||||
|
raise
|
||||||
|
charset = UTF8
|
||||||
self._chunks.append((s, charset))
|
self._chunks.append((s, charset))
|
||||||
|
|
||||||
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
|
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
|
||||||
|
|
|
@ -604,6 +604,19 @@ class TestMessageAPI(TestEmailBase):
|
||||||
msg['Dummy'] = 'dummy\nX-Injected-Header: test'
|
msg['Dummy'] = 'dummy\nX-Injected-Header: test'
|
||||||
self.assertRaises(errors.HeaderParseError, msg.as_string)
|
self.assertRaises(errors.HeaderParseError, msg.as_string)
|
||||||
|
|
||||||
|
def test_unicode_header_defaults_to_utf8_encoding(self):
|
||||||
|
# Issue 14291
|
||||||
|
m = MIMEText('abc\n')
|
||||||
|
m['Subject'] = 'É test'
|
||||||
|
self.assertEqual(str(m),textwrap.dedent("""\
|
||||||
|
Content-Type: text/plain; charset="us-ascii"
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Transfer-Encoding: 7bit
|
||||||
|
Subject: =?utf-8?q?=C3=89_test?=
|
||||||
|
|
||||||
|
abc
|
||||||
|
"""))
|
||||||
|
|
||||||
# Test the email.encoders module
|
# Test the email.encoders module
|
||||||
class TestEncoders(unittest.TestCase):
|
class TestEncoders(unittest.TestCase):
|
||||||
|
|
||||||
|
@ -1045,9 +1058,13 @@ Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-W
|
||||||
'f\xfcr Offshore-Windkraftprojekte '
|
'f\xfcr Offshore-Windkraftprojekte '
|
||||||
'<a-very-long-address@example.com>')
|
'<a-very-long-address@example.com>')
|
||||||
msg['Reply-To'] = header_string
|
msg['Reply-To'] = header_string
|
||||||
self.assertRaises(UnicodeEncodeError, msg.as_string)
|
eq(msg.as_string(maxheaderlen=78), """\
|
||||||
|
Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
|
||||||
|
=?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
|
||||||
|
|
||||||
|
""")
|
||||||
msg = Message()
|
msg = Message()
|
||||||
msg['Reply-To'] = Header(header_string, 'utf-8',
|
msg['Reply-To'] = Header(header_string,
|
||||||
header_name='Reply-To')
|
header_name='Reply-To')
|
||||||
eq(msg.as_string(maxheaderlen=78), """\
|
eq(msg.as_string(maxheaderlen=78), """\
|
||||||
Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
|
Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
|
||||||
|
|
|
@ -111,10 +111,10 @@ class TestMailbox(TestBase):
|
||||||
self.assertMailboxEmpty()
|
self.assertMailboxEmpty()
|
||||||
|
|
||||||
def test_add_that_raises_leaves_mailbox_empty(self):
|
def test_add_that_raises_leaves_mailbox_empty(self):
|
||||||
# XXX This test will start failing when Message learns to handle
|
def raiser(*args, **kw):
|
||||||
# non-ASCII string headers, and a different internal failure will
|
raise Exception("a fake error")
|
||||||
# need to be found or manufactured.
|
support.patch(self, email.generator.BytesGenerator, 'flatten', raiser)
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(Exception):
|
||||||
self._box.add(email.message_from_string("From: Alphöso"))
|
self._box.add(email.message_from_string("From: Alphöso"))
|
||||||
self.assertEqual(len(self._box), 0)
|
self.assertEqual(len(self._box), 0)
|
||||||
self._box.close()
|
self._box.close()
|
||||||
|
|
|
@ -470,6 +470,7 @@ Gerhard Häring
|
||||||
Fredrik Håård
|
Fredrik Håård
|
||||||
Catalin Iacob
|
Catalin Iacob
|
||||||
Mihai Ibanescu
|
Mihai Ibanescu
|
||||||
|
Ali Ikinci
|
||||||
Lars Immisch
|
Lars Immisch
|
||||||
Bobby Impollonia
|
Bobby Impollonia
|
||||||
Meador Inge
|
Meador Inge
|
||||||
|
|
|
@ -24,6 +24,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #14291: Email now defaults to utf-8 for non-ASCII unicode headers
|
||||||
|
instead of raising an error. This fixes a regression relative to 2.7.
|
||||||
|
|
||||||
- Issue #989712: Support using Tk without a mainloop.
|
- Issue #989712: Support using Tk without a mainloop.
|
||||||
|
|
||||||
- Issue #5219: Prevent event handler cascade in IDLE.
|
- Issue #5219: Prevent event handler cascade in IDLE.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue