[3.10] bpo-45461: Fix IncrementalDecoder and StreamReader in the "unicode-escape" codec (GH-28939) (GH-28943)

They support now splitting escape sequences between input chunks. Add the third parameter "final" in codecs.unicode_escape_decode(). It is True by default to match the former behavior. (cherry picked from commit c96d1546b1) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
2025-08-15 22:30:42 +00:00 · 2021-10-14 10:02:20 -07:00 · 2021-10-14 10:02:20 -07:00 · 0bff4ccbfd
commit 0bff4ccbfd
parent 70b150a366
9 changed files with 10949 additions and 5895 deletions
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@ -114,7 +114,7 @@ class ReadTest(MixInCheckStateHandling):
        q = Queue(b"")
        r = codecs.getreader(self.encoding)(q)
        result = ""
-        for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
+        for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True):
            q.write(bytes([c]))
            result += r.read()
            self.assertEqual(result, partialresult)
@ -125,7 +125,7 @@ class ReadTest(MixInCheckStateHandling):
        # do the check again, this time using an incremental decoder
        d = codecs.getincrementaldecoder(self.encoding)()
        result = ""
-        for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
+        for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True):
            result += d.decode(bytes([c]))
            self.assertEqual(result, partialresult)
        # check that there's nothing left in the buffers
@ -135,7 +135,7 @@ class ReadTest(MixInCheckStateHandling):
        # Check whether the reset method works properly
        d.reset()
        result = ""
-        for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
+        for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True):
            result += d.decode(bytes([c]))
            self.assertEqual(result, partialresult)
        # check that there's nothing left in the buffers
@ -2341,7 +2341,11 @@ class TypesTest(unittest.TestCase):
                         (r"\x5c\x55\x30\x30\x31\x31\x30\x30\x30\x30", 10))


-class UnicodeEscapeTest(unittest.TestCase):
+class UnicodeEscapeTest(ReadTest, unittest.TestCase):
+    encoding = "unicode-escape"
+
+    test_lone_surrogates = None
+
    def test_empty(self):
        self.assertEqual(codecs.unicode_escape_encode(""), (b"", 0))
        self.assertEqual(codecs.unicode_escape_decode(b""), ("", 0))
@ -2428,6 +2432,44 @@ class UnicodeEscapeTest(unittest.TestCase):
        self.assertEqual(decode(br"\U00110000", "ignore"), ("", 10))
        self.assertEqual(decode(br"\U00110000", "replace"), ("\ufffd", 10))

+    def test_partial(self):
+        self.check_partial(
+            "\x00\t\n\r\\\xff\uffff\U00010000",
+            [
+                '',
+                '',
+                '',
+                '\x00',
+                '\x00',
+                '\x00\t',
+                '\x00\t',
+                '\x00\t\n',
+                '\x00\t\n',
+                '\x00\t\n\r',
+                '\x00\t\n\r',
+                '\x00\t\n\r\\',
+                '\x00\t\n\r\\',
+                '\x00\t\n\r\\',
+                '\x00\t\n\r\\',
+                '\x00\t\n\r\\\xff',
+                '\x00\t\n\r\\\xff',
+                '\x00\t\n\r\\\xff',
+                '\x00\t\n\r\\\xff',
+                '\x00\t\n\r\\\xff',
+                '\x00\t\n\r\\\xff',
+                '\x00\t\n\r\\\xff\uffff',
+                '\x00\t\n\r\\\xff\uffff',
+                '\x00\t\n\r\\\xff\uffff',
+                '\x00\t\n\r\\\xff\uffff',
+                '\x00\t\n\r\\\xff\uffff',
+                '\x00\t\n\r\\\xff\uffff',
+                '\x00\t\n\r\\\xff\uffff',
+                '\x00\t\n\r\\\xff\uffff',
+                '\x00\t\n\r\\\xff\uffff',
+                '\x00\t\n\r\\\xff\uffff',
+                '\x00\t\n\r\\\xff\uffff\U00010000',
+            ]
+        )

 class RawUnicodeEscapeTest(unittest.TestCase):
    def test_empty(self):