mirror of
https://github.com/python/cpython.git
synced 2025-10-09 08:31:26 +00:00
bpo-33578: Add getstate/setstate for CJK codec (GH-6984)
This implements getstate and setstate for the cjkcodecs multibyte incremental encoders/decoders, primarily to fix issues with seek/tell. The encoder getstate/setstate is slightly tricky as the "state" is pending bytes + MultibyteCodec_State but only an integer can be returned. The approach I've taken is to encode this data into a long, similar to how .tell() encodes a "cookie_type" as a long. https://bugs.python.org/issue33578
This commit is contained in:
parent
4b5e62dbb2
commit
ac22f6aa98
8 changed files with 416 additions and 22 deletions
|
@ -2971,6 +2971,34 @@ class TextIOWrapperTest(unittest.TestCase):
|
|||
finally:
|
||||
StatefulIncrementalDecoder.codecEnabled = 0
|
||||
|
||||
def test_multibyte_seek_and_tell(self):
|
||||
f = self.open(support.TESTFN, "w", encoding="euc_jp")
|
||||
f.write("AB\n\u3046\u3048\n")
|
||||
f.close()
|
||||
|
||||
f = self.open(support.TESTFN, "r", encoding="euc_jp")
|
||||
self.assertEqual(f.readline(), "AB\n")
|
||||
p0 = f.tell()
|
||||
self.assertEqual(f.readline(), "\u3046\u3048\n")
|
||||
p1 = f.tell()
|
||||
f.seek(p0)
|
||||
self.assertEqual(f.readline(), "\u3046\u3048\n")
|
||||
self.assertEqual(f.tell(), p1)
|
||||
f.close()
|
||||
|
||||
def test_seek_with_encoder_state(self):
|
||||
f = self.open(support.TESTFN, "w", encoding="euc_jis_2004")
|
||||
f.write("\u00e6\u0300")
|
||||
p0 = f.tell()
|
||||
f.write("\u00e6")
|
||||
f.seek(p0)
|
||||
f.write("\u0300")
|
||||
f.close()
|
||||
|
||||
f = self.open(support.TESTFN, "r", encoding="euc_jis_2004")
|
||||
self.assertEqual(f.readline(), "\u00e6\u0300\u0300")
|
||||
f.close()
|
||||
|
||||
def test_encoded_writes(self):
|
||||
data = "1234567890"
|
||||
tests = ("utf-16",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue