mirror of
https://github.com/python/cpython.git
synced 2025-12-15 21:44:50 +00:00
bpo-33578: Add getstate/setstate for CJK codec (GH-6984)
This implements getstate and setstate for the cjkcodecs multibyte incremental encoders/decoders, primarily to fix issues with seek/tell. The encoder getstate/setstate is slightly tricky as the "state" is pending bytes + MultibyteCodec_State but only an integer can be returned. The approach I've taken is to encode this data into a long, similar to how .tell() encodes a "cookie_type" as a long. https://bugs.python.org/issue33578
This commit is contained in:
parent
4b5e62dbb2
commit
ac22f6aa98
8 changed files with 416 additions and 22 deletions
|
|
@ -51,6 +51,12 @@
|
|||
; \
|
||||
}
|
||||
|
||||
/*
|
||||
* codecs in this file use the first byte of MultibyteCodec_State.c[8]
|
||||
* to store a 0 or 1 state value
|
||||
*/
|
||||
#define CN_STATE_OFFSET 0
|
||||
|
||||
/*
|
||||
* GB2312 codec
|
||||
*/
|
||||
|
|
@ -329,15 +335,15 @@ DECODER(gb18030)
|
|||
|
||||
ENCODER_INIT(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
state->c[CN_STATE_OFFSET] = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER_RESET(hz)
|
||||
{
|
||||
if (state->i != 0) {
|
||||
if (state->c[CN_STATE_OFFSET] != 0) {
|
||||
WRITEBYTE2('~', '}');
|
||||
state->i = 0;
|
||||
state->c[CN_STATE_OFFSET] = 0;
|
||||
NEXT_OUT(2);
|
||||
}
|
||||
return 0;
|
||||
|
|
@ -350,10 +356,10 @@ ENCODER(hz)
|
|||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
if (state->i) {
|
||||
if (state->c[CN_STATE_OFFSET]) {
|
||||
WRITEBYTE2('~', '}');
|
||||
NEXT_OUT(2);
|
||||
state->i = 0;
|
||||
state->c[CN_STATE_OFFSET] = 0;
|
||||
}
|
||||
WRITEBYTE1((unsigned char)c);
|
||||
NEXT(1, 1);
|
||||
|
|
@ -375,10 +381,10 @@ ENCODER(hz)
|
|||
if (code & 0x8000) /* MSB set: GBK */
|
||||
return 1;
|
||||
|
||||
if (state->i == 0) {
|
||||
if (state->c[CN_STATE_OFFSET] == 0) {
|
||||
WRITEBYTE4('~', '{', code >> 8, code & 0xff);
|
||||
NEXT(1, 4);
|
||||
state->i = 1;
|
||||
state->c[CN_STATE_OFFSET] = 1;
|
||||
}
|
||||
else {
|
||||
WRITEBYTE2(code >> 8, code & 0xff);
|
||||
|
|
@ -391,13 +397,13 @@ ENCODER(hz)
|
|||
|
||||
DECODER_INIT(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
state->c[CN_STATE_OFFSET] = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER_RESET(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
state->c[CN_STATE_OFFSET] = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -411,14 +417,14 @@ DECODER(hz)
|
|||
unsigned char c2 = INBYTE2;
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
if (c2 == '~' && state->i == 0)
|
||||
if (c2 == '~' && state->c[CN_STATE_OFFSET] == 0)
|
||||
OUTCHAR('~');
|
||||
else if (c2 == '{' && state->i == 0)
|
||||
state->i = 1; /* set GB */
|
||||
else if (c2 == '\n' && state->i == 0)
|
||||
else if (c2 == '{' && state->c[CN_STATE_OFFSET] == 0)
|
||||
state->c[CN_STATE_OFFSET] = 1; /* set GB */
|
||||
else if (c2 == '\n' && state->c[CN_STATE_OFFSET] == 0)
|
||||
; /* line-continuation */
|
||||
else if (c2 == '}' && state->i == 1)
|
||||
state->i = 0; /* set ASCII */
|
||||
else if (c2 == '}' && state->c[CN_STATE_OFFSET] == 1)
|
||||
state->c[CN_STATE_OFFSET] = 0; /* set ASCII */
|
||||
else
|
||||
return 1;
|
||||
NEXT_IN(2);
|
||||
|
|
@ -428,7 +434,7 @@ DECODER(hz)
|
|||
if (c & 0x80)
|
||||
return 1;
|
||||
|
||||
if (state->i == 0) { /* ASCII mode */
|
||||
if (state->c[CN_STATE_OFFSET] == 0) { /* ASCII mode */
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue