mirror of
https://github.com/python/cpython.git
synced 2025-11-25 21:11:09 +00:00
bpo-33578: Add getstate/setstate for CJK codec (GH-6984)
This implements getstate and setstate for the cjkcodecs multibyte incremental encoders/decoders, primarily to fix issues with seek/tell. The encoder getstate/setstate is slightly tricky as the "state" is pending bytes + MultibyteCodec_State but only an integer can be returned. The approach I've taken is to encode this data into a long, similar to how .tell() encodes a "cookie_type" as a long. https://bugs.python.org/issue33578
This commit is contained in:
parent
4b5e62dbb2
commit
ac22f6aa98
8 changed files with 416 additions and 22 deletions
|
|
@ -895,6 +895,93 @@ _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEnco
|
|||
return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_multibytecodec.MultibyteIncrementalEncoder.getstate
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
|
||||
/*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
|
||||
{
|
||||
/* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
|
||||
for UTF-8 encoded buffer (each character can use up to 4
|
||||
bytes), and required bytes for MultibyteCodec_State.c. A byte
|
||||
array is used to avoid different compilers generating different
|
||||
values for the same state, e.g. as a result of struct padding.
|
||||
*/
|
||||
unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
|
||||
Py_ssize_t statesize;
|
||||
const char *pendingbuffer = NULL;
|
||||
Py_ssize_t pendingsize;
|
||||
|
||||
if (self->pending != NULL) {
|
||||
pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
|
||||
if (pendingbuffer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
if (pendingsize > MAXENCPENDING*4) {
|
||||
PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
|
||||
return NULL;
|
||||
}
|
||||
statebytes[0] = pendingsize;
|
||||
memcpy(statebytes+1, pendingbuffer, pendingsize);
|
||||
statesize = 1 + pendingsize;
|
||||
} else {
|
||||
statebytes[0] = 0;
|
||||
statesize = 1;
|
||||
}
|
||||
memcpy(statebytes+statesize, self->state.c,
|
||||
sizeof(self->state.c));
|
||||
statesize += sizeof(self->state.c);
|
||||
|
||||
return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
|
||||
1 /* little-endian */ ,
|
||||
0 /* unsigned */ );
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_multibytecodec.MultibyteIncrementalEncoder.setstate
|
||||
state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
|
||||
/
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
|
||||
PyLongObject *statelong)
|
||||
/*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
|
||||
{
|
||||
PyObject *pending = NULL;
|
||||
unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
|
||||
|
||||
if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
|
||||
1 /* little-endian */ ,
|
||||
0 /* unsigned */ ) < 0) {
|
||||
goto errorexit;
|
||||
}
|
||||
|
||||
if (statebytes[0] > MAXENCPENDING*4) {
|
||||
PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
|
||||
statebytes[0], "strict");
|
||||
if (pending == NULL) {
|
||||
goto errorexit;
|
||||
}
|
||||
|
||||
Py_CLEAR(self->pending);
|
||||
self->pending = pending;
|
||||
memcpy(self->state.c, statebytes+1+statebytes[0],
|
||||
sizeof(self->state.c));
|
||||
|
||||
Py_RETURN_NONE;
|
||||
|
||||
errorexit:
|
||||
Py_XDECREF(pending);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_multibytecodec.MultibyteIncrementalEncoder.reset
|
||||
[clinic start generated code]*/
|
||||
|
|
@ -919,6 +1006,8 @@ _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncod
|
|||
|
||||
static struct PyMethodDef mbiencoder_methods[] = {
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
|
@ -984,6 +1073,7 @@ mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
|
|||
{
|
||||
PyObject_GC_UnTrack(self);
|
||||
ERROR_DECREF(self->errors);
|
||||
Py_CLEAR(self->pending);
|
||||
Py_TYPE(self)->tp_free(self);
|
||||
}
|
||||
|
||||
|
|
@ -1119,6 +1209,68 @@ errorexit:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_multibytecodec.MultibyteIncrementalDecoder.getstate
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
|
||||
/*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
|
||||
{
|
||||
PyObject *buffer;
|
||||
|
||||
buffer = PyBytes_FromStringAndSize((const char *)self->pending,
|
||||
self->pendingsize);
|
||||
if (buffer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return make_tuple(buffer, (Py_ssize_t)*self->state.c);
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_multibytecodec.MultibyteIncrementalDecoder.setstate
|
||||
state: object(subclass_of='&PyTuple_Type')
|
||||
/
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
|
||||
PyObject *state)
|
||||
/*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
|
||||
{
|
||||
PyObject *buffer;
|
||||
Py_ssize_t buffersize;
|
||||
char *bufferstr;
|
||||
unsigned long long flag;
|
||||
|
||||
if (!PyArg_ParseTuple(state, "SK;setstate(): illegal state argument",
|
||||
&buffer, &flag))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
buffersize = PyBytes_Size(buffer);
|
||||
if (buffersize == -1) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (buffersize > MAXDECPENDING) {
|
||||
PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bufferstr = PyBytes_AsString(buffer);
|
||||
if (bufferstr == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
self->pendingsize = buffersize;
|
||||
memcpy(self->pending, bufferstr, self->pendingsize);
|
||||
memcpy(self->state.c, (unsigned char *)&flag, sizeof(flag));
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_multibytecodec.MultibyteIncrementalDecoder.reset
|
||||
[clinic start generated code]*/
|
||||
|
|
@ -1137,6 +1289,8 @@ _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecod
|
|||
|
||||
static struct PyMethodDef mbidecoder_methods[] = {
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue