bpo-33578: Add getstate/setstate for CJK codec (GH-6984)

This implements getstate and setstate for the cjkcodecs multibyte incremental encoders/decoders, primarily to fix issues with seek/tell.

The encoder getstate/setstate is slightly tricky as the "state" is pending bytes + MultibyteCodec_State but only an integer can be returned. The approach I've taken is to encode this data into a long, similar to how .tell() encodes a "cookie_type" as a long.


https://bugs.python.org/issue33578
This commit is contained in:
Christopher Thorne 2018-11-01 10:48:49 +00:00 committed by Miss Islington (bot)
parent 4b5e62dbb2
commit ac22f6aa98
8 changed files with 416 additions and 22 deletions

View file

@ -2971,6 +2971,34 @@ class TextIOWrapperTest(unittest.TestCase):
finally:
StatefulIncrementalDecoder.codecEnabled = 0
def test_multibyte_seek_and_tell(self):
f = self.open(support.TESTFN, "w", encoding="euc_jp")
f.write("AB\n\u3046\u3048\n")
f.close()
f = self.open(support.TESTFN, "r", encoding="euc_jp")
self.assertEqual(f.readline(), "AB\n")
p0 = f.tell()
self.assertEqual(f.readline(), "\u3046\u3048\n")
p1 = f.tell()
f.seek(p0)
self.assertEqual(f.readline(), "\u3046\u3048\n")
self.assertEqual(f.tell(), p1)
f.close()
def test_seek_with_encoder_state(self):
f = self.open(support.TESTFN, "w", encoding="euc_jis_2004")
f.write("\u00e6\u0300")
p0 = f.tell()
f.write("\u00e6")
f.seek(p0)
f.write("\u0300")
f.close()
f = self.open(support.TESTFN, "r", encoding="euc_jis_2004")
self.assertEqual(f.readline(), "\u00e6\u0300\u0300")
f.close()
def test_encoded_writes(self):
data = "1234567890"
tests = ("utf-16",