bpo-21315: Fix parsing of encoded words with missing leading ws. (#13425)

* bpo-21315: Fix parsing of encoded words with missing leading ws. Because of missing leading whitespace, encoded word would get parsed as unstructured token. This patch fixes that by looking for encoded words when splitting tokens with whitespace. Missing trailing whitespace around encoded word now register a defect instead. Original patch suggestion by David R. Murray on bpo-21315.
2025-11-25 21:11:09 +00:00 · 2019-06-05 12:56:33 -04:00 · 2019-06-05 12:56:33 -04:00 · 66c4f3f38b
commit 66c4f3f38b
parent 142566c028
4 changed files with 49 additions and 3 deletions
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@ -118,7 +118,7 @@ class TestParser(TestParserMixin, TestEmailBase):
                         '=?us-ascii?q?first?==?utf-8?q?second?=',
                         'first',
                         'first',
-                         [],
+                         [errors.InvalidHeaderDefect],
                         '=?utf-8?q?second?=')

    def test_get_encoded_word_sets_extra_attributes(self):
@ -361,6 +361,25 @@ class TestParser(TestParserMixin, TestEmailBase):
            '=?utf-8?q?foo?==?utf-8?q?bar?=',
            'foobar',
            'foobar',
+            [errors.InvalidHeaderDefect,
+            errors.InvalidHeaderDefect],
+            '')
+
+    def test_get_unstructured_ew_without_leading_whitespace(self):
+        self._test_get_x(
+            self._get_unst,
+            'nowhitespace=?utf-8?q?somevalue?=',
+            'nowhitespacesomevalue',
+            'nowhitespacesomevalue',
+            [errors.InvalidHeaderDefect],
+            '')
+
+    def test_get_unstructured_ew_without_trailing_whitespace(self):
+        self._test_get_x(
+            self._get_unst,
+            '=?utf-8?q?somevalue?=nowhitespace',
+            'somevaluenowhitespace',
+            'somevaluenowhitespace',
            [errors.InvalidHeaderDefect],
            '')

@ -546,7 +565,8 @@ class TestParser(TestParserMixin, TestEmailBase):
            '"=?utf-8?Q?not_really_valid?="',
            '"not really valid"',
            'not really valid',
-            [errors.InvalidHeaderDefect],
+            [errors.InvalidHeaderDefect,
+             errors.InvalidHeaderDefect],
            '')

    # get_comment