mirror of
https://github.com/python/cpython.git
synced 2025-08-15 22:30:42 +00:00
[3.10] bpo-45461: Fix IncrementalDecoder and StreamReader in the "unicode-escape" codec (GH-28939) (GH-28943)
They support now splitting escape sequences between input chunks.
Add the third parameter "final" in codecs.unicode_escape_decode().
It is True by default to match the former behavior.
(cherry picked from commit c96d1546b1
)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
70b150a366
commit
0bff4ccbfd
9 changed files with 10949 additions and 5895 deletions
|
@ -114,7 +114,7 @@ class ReadTest(MixInCheckStateHandling):
|
|||
q = Queue(b"")
|
||||
r = codecs.getreader(self.encoding)(q)
|
||||
result = ""
|
||||
for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
|
||||
for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True):
|
||||
q.write(bytes([c]))
|
||||
result += r.read()
|
||||
self.assertEqual(result, partialresult)
|
||||
|
@ -125,7 +125,7 @@ class ReadTest(MixInCheckStateHandling):
|
|||
# do the check again, this time using an incremental decoder
|
||||
d = codecs.getincrementaldecoder(self.encoding)()
|
||||
result = ""
|
||||
for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
|
||||
for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True):
|
||||
result += d.decode(bytes([c]))
|
||||
self.assertEqual(result, partialresult)
|
||||
# check that there's nothing left in the buffers
|
||||
|
@ -135,7 +135,7 @@ class ReadTest(MixInCheckStateHandling):
|
|||
# Check whether the reset method works properly
|
||||
d.reset()
|
||||
result = ""
|
||||
for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
|
||||
for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True):
|
||||
result += d.decode(bytes([c]))
|
||||
self.assertEqual(result, partialresult)
|
||||
# check that there's nothing left in the buffers
|
||||
|
@ -2341,7 +2341,11 @@ class TypesTest(unittest.TestCase):
|
|||
(r"\x5c\x55\x30\x30\x31\x31\x30\x30\x30\x30", 10))
|
||||
|
||||
|
||||
class UnicodeEscapeTest(unittest.TestCase):
|
||||
class UnicodeEscapeTest(ReadTest, unittest.TestCase):
|
||||
encoding = "unicode-escape"
|
||||
|
||||
test_lone_surrogates = None
|
||||
|
||||
def test_empty(self):
|
||||
self.assertEqual(codecs.unicode_escape_encode(""), (b"", 0))
|
||||
self.assertEqual(codecs.unicode_escape_decode(b""), ("", 0))
|
||||
|
@ -2428,6 +2432,44 @@ class UnicodeEscapeTest(unittest.TestCase):
|
|||
self.assertEqual(decode(br"\U00110000", "ignore"), ("", 10))
|
||||
self.assertEqual(decode(br"\U00110000", "replace"), ("\ufffd", 10))
|
||||
|
||||
def test_partial(self):
|
||||
self.check_partial(
|
||||
"\x00\t\n\r\\\xff\uffff\U00010000",
|
||||
[
|
||||
'',
|
||||
'',
|
||||
'',
|
||||
'\x00',
|
||||
'\x00',
|
||||
'\x00\t',
|
||||
'\x00\t',
|
||||
'\x00\t\n',
|
||||
'\x00\t\n',
|
||||
'\x00\t\n\r',
|
||||
'\x00\t\n\r',
|
||||
'\x00\t\n\r\\',
|
||||
'\x00\t\n\r\\',
|
||||
'\x00\t\n\r\\',
|
||||
'\x00\t\n\r\\',
|
||||
'\x00\t\n\r\\\xff',
|
||||
'\x00\t\n\r\\\xff',
|
||||
'\x00\t\n\r\\\xff',
|
||||
'\x00\t\n\r\\\xff',
|
||||
'\x00\t\n\r\\\xff',
|
||||
'\x00\t\n\r\\\xff',
|
||||
'\x00\t\n\r\\\xff\uffff',
|
||||
'\x00\t\n\r\\\xff\uffff',
|
||||
'\x00\t\n\r\\\xff\uffff',
|
||||
'\x00\t\n\r\\\xff\uffff',
|
||||
'\x00\t\n\r\\\xff\uffff',
|
||||
'\x00\t\n\r\\\xff\uffff',
|
||||
'\x00\t\n\r\\\xff\uffff',
|
||||
'\x00\t\n\r\\\xff\uffff',
|
||||
'\x00\t\n\r\\\xff\uffff',
|
||||
'\x00\t\n\r\\\xff\uffff',
|
||||
'\x00\t\n\r\\\xff\uffff\U00010000',
|
||||
]
|
||||
)
|
||||
|
||||
class RawUnicodeEscapeTest(unittest.TestCase):
|
||||
def test_empty(self):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue