gh-113594: Fix UnicodeEncodeError in TokenList.fold() (GH-113730)

It occurred when try to re-encode an unknown-8bit part combined with non-unknown-8bit part.
This commit is contained in:
Serhiy Storchaka 2024-01-10 14:54:36 +02:00 committed by GitHub
parent 568d220993
commit e9d5b6ea2d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 48 additions and 0 deletions

View file

@ -2915,6 +2915,45 @@ class TestFolding(TestEmailBase):
"mich. And that's\n"
" all I'm sayin.\n")
def test_unicode_after_unknown_not_combined(self):
self._test(parser.get_unstructured("=?unknown-8bit?q?=A4?=\xa4"),
"=?unknown-8bit?q?=A4?==?utf-8?q?=C2=A4?=\n")
prefix = "0123456789 "*5
self._test(parser.get_unstructured(prefix + "=?unknown-8bit?q?=A4?=\xa4"),
prefix + "=?unknown-8bit?q?=A4?=\n =?utf-8?q?=C2=A4?=\n")
def test_ascii_after_unknown_not_combined(self):
self._test(parser.get_unstructured("=?unknown-8bit?q?=A4?=abc"),
"=?unknown-8bit?q?=A4?=abc\n")
prefix = "0123456789 "*5
self._test(parser.get_unstructured(prefix + "=?unknown-8bit?q?=A4?=abc"),
prefix + "=?unknown-8bit?q?=A4?=\n =?utf-8?q?abc?=\n")
def test_unknown_after_unicode_not_combined(self):
self._test(parser.get_unstructured("\xa4"
"=?unknown-8bit?q?=A4?="),
"=?utf-8?q?=C2=A4?==?unknown-8bit?q?=A4?=\n")
prefix = "0123456789 "*5
self._test(parser.get_unstructured(prefix + "\xa4=?unknown-8bit?q?=A4?="),
prefix + "=?utf-8?q?=C2=A4?=\n =?unknown-8bit?q?=A4?=\n")
def test_unknown_after_ascii_not_combined(self):
self._test(parser.get_unstructured("abc"
"=?unknown-8bit?q?=A4?="),
"abc=?unknown-8bit?q?=A4?=\n")
prefix = "0123456789 "*5
self._test(parser.get_unstructured(prefix + "abcd=?unknown-8bit?q?=A4?="),
prefix + "abcd\n =?unknown-8bit?q?=A4?=\n")
def test_unknown_after_unknown(self):
self._test(parser.get_unstructured("=?unknown-8bit?q?=C2?="
"=?unknown-8bit?q?=A4?="),
"=?unknown-8bit?q?=C2=A4?=\n")
prefix = "0123456789 "*5
self._test(parser.get_unstructured(prefix + "=?unknown-8bit?q?=C2?="
"=?unknown-8bit?q?=A4?="),
prefix + "=?unknown-8bit?q?=C2?=\n =?unknown-8bit?q?=A4?=\n")
# XXX Need test of an encoded word so long that it needs to be wrapped
def test_simple_address(self):