mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
bpo-24214: Fixed the UTF-8 incremental decoder. (GH-12603)
The bug occurred when the encoded surrogate character is passed to the incremental decoder in two chunks.
This commit is contained in:
parent
38f4e468d4
commit
7a465cb5ee
3 changed files with 14 additions and 0 deletions
|
@ -406,6 +406,15 @@ class ReadTest(MixInCheckStateHandling):
|
|||
self.assertEqual(test_sequence.decode(self.encoding, "backslashreplace"),
|
||||
before + backslashreplace + after)
|
||||
|
||||
def test_incremental_surrogatepass(self):
|
||||
# Test incremental decoder for surrogatepass handler:
|
||||
# see issue #24214
|
||||
data = '\uD901'.encode(self.encoding, 'surrogatepass')
|
||||
for i in range(1, len(data)):
|
||||
dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')
|
||||
self.assertEqual(dec.decode(data[:i]), '')
|
||||
self.assertEqual(dec.decode(data[i:], True), '\uD901')
|
||||
|
||||
|
||||
class UTF32Test(ReadTest, unittest.TestCase):
|
||||
encoding = "utf-32"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue