mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
bpo-24214: Fixed the UTF-8 incremental decoder. (GH-12603)
The bug occurred when the encoded surrogate character is passed to the incremental decoder in two chunks.
This commit is contained in:
parent
38f4e468d4
commit
7a465cb5ee
3 changed files with 14 additions and 0 deletions
|
@ -406,6 +406,15 @@ class ReadTest(MixInCheckStateHandling):
|
||||||
self.assertEqual(test_sequence.decode(self.encoding, "backslashreplace"),
|
self.assertEqual(test_sequence.decode(self.encoding, "backslashreplace"),
|
||||||
before + backslashreplace + after)
|
before + backslashreplace + after)
|
||||||
|
|
||||||
|
def test_incremental_surrogatepass(self):
|
||||||
|
# Test incremental decoder for surrogatepass handler:
|
||||||
|
# see issue #24214
|
||||||
|
data = '\uD901'.encode(self.encoding, 'surrogatepass')
|
||||||
|
for i in range(1, len(data)):
|
||||||
|
dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')
|
||||||
|
self.assertEqual(dec.decode(data[:i]), '')
|
||||||
|
self.assertEqual(dec.decode(data[i:], True), '\uD901')
|
||||||
|
|
||||||
|
|
||||||
class UTF32Test(ReadTest, unittest.TestCase):
|
class UTF32Test(ReadTest, unittest.TestCase):
|
||||||
encoding = "utf-32"
|
encoding = "utf-32"
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fixed support of the surrogatepass error handler in the UTF-8 incremental
|
||||||
|
decoder.
|
|
@ -4883,6 +4883,9 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
|
||||||
case 2:
|
case 2:
|
||||||
case 3:
|
case 3:
|
||||||
case 4:
|
case 4:
|
||||||
|
if (s == end || consumed) {
|
||||||
|
goto End;
|
||||||
|
}
|
||||||
errmsg = "invalid continuation byte";
|
errmsg = "invalid continuation byte";
|
||||||
startinpos = s - starts;
|
startinpos = s - starts;
|
||||||
endinpos = startinpos + ch - 1;
|
endinpos = startinpos + ch - 1;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue