bpo-45461: Fix IncrementalDecoder and StreamReader in the "unicode-escape" codec (GH-28939)

They support now splitting escape sequences between input chunks.

Add the third parameter "final" in codecs.unicode_escape_decode().
It is True by default to match the former behavior.
This commit is contained in:
Serhiy Storchaka 2021-10-14 13:17:00 +03:00 committed by GitHub
parent e71662c1ae
commit c96d1546b1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 123 additions and 34 deletions

View file

@ -114,7 +114,7 @@ class ReadTest(MixInCheckStateHandling):
q = Queue(b"")
r = codecs.getreader(self.encoding)(q)
result = ""
for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True):
q.write(bytes([c]))
result += r.read()
self.assertEqual(result, partialresult)
@ -125,7 +125,7 @@ class ReadTest(MixInCheckStateHandling):
# do the check again, this time using an incremental decoder
d = codecs.getincrementaldecoder(self.encoding)()
result = ""
for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True):
result += d.decode(bytes([c]))
self.assertEqual(result, partialresult)
# check that there's nothing left in the buffers
@ -135,7 +135,7 @@ class ReadTest(MixInCheckStateHandling):
# Check whether the reset method works properly
d.reset()
result = ""
for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True):
result += d.decode(bytes([c]))
self.assertEqual(result, partialresult)
# check that there's nothing left in the buffers
@ -2353,7 +2353,11 @@ class TypesTest(unittest.TestCase):
(r"\x5c\x55\x30\x30\x31\x31\x30\x30\x30\x30", 10))
class UnicodeEscapeTest(unittest.TestCase):
class UnicodeEscapeTest(ReadTest, unittest.TestCase):
encoding = "unicode-escape"
test_lone_surrogates = None
def test_empty(self):
self.assertEqual(codecs.unicode_escape_encode(""), (b"", 0))
self.assertEqual(codecs.unicode_escape_decode(b""), ("", 0))
@ -2440,6 +2444,44 @@ class UnicodeEscapeTest(unittest.TestCase):
self.assertEqual(decode(br"\U00110000", "ignore"), ("", 10))
self.assertEqual(decode(br"\U00110000", "replace"), ("\ufffd", 10))
def test_partial(self):
self.check_partial(
"\x00\t\n\r\\\xff\uffff\U00010000",
[
'',
'',
'',
'\x00',
'\x00',
'\x00\t',
'\x00\t',
'\x00\t\n',
'\x00\t\n',
'\x00\t\n\r',
'\x00\t\n\r',
'\x00\t\n\r\\',
'\x00\t\n\r\\',
'\x00\t\n\r\\',
'\x00\t\n\r\\',
'\x00\t\n\r\\\xff',
'\x00\t\n\r\\\xff',
'\x00\t\n\r\\\xff',
'\x00\t\n\r\\\xff',
'\x00\t\n\r\\\xff',
'\x00\t\n\r\\\xff',
'\x00\t\n\r\\\xff\uffff',
'\x00\t\n\r\\\xff\uffff',
'\x00\t\n\r\\\xff\uffff',
'\x00\t\n\r\\\xff\uffff',
'\x00\t\n\r\\\xff\uffff',
'\x00\t\n\r\\\xff\uffff',
'\x00\t\n\r\\\xff\uffff',
'\x00\t\n\r\\\xff\uffff',
'\x00\t\n\r\\\xff\uffff',
'\x00\t\n\r\\\xff\uffff',
'\x00\t\n\r\\\xff\uffff\U00010000',
]
)
class RawUnicodeEscapeTest(unittest.TestCase):
def test_empty(self):