mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support
decoding incomplete input (when the input stream is temporarily exhausted). codecs.StreamReader now implements buffering, which enables proper readline support for the UTF-16 decoders. codecs.StreamReader.read() has a new argument chars which specifies the number of characters to return. codecs.StreamReader.readline() and codecs.StreamReader.readlines() have a new argument keepends. Trailing "\n"s will be stripped from the lines if keepends is false. Added C APIs PyUnicode_DecodeUTF8Stateful and PyUnicode_DecodeUTF16Stateful.
This commit is contained in:
parent
a708d6e3b0
commit
69652035bc
12 changed files with 419 additions and 173 deletions
|
@ -269,13 +269,20 @@ utf_8_decode(PyObject *self,
|
|||
const char *data;
|
||||
int size;
|
||||
const char *errors = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "t#|z:utf_8_decode",
|
||||
&data, &size, &errors))
|
||||
return NULL;
|
||||
int final = 0;
|
||||
int consumed;
|
||||
PyObject *decoded = NULL;
|
||||
|
||||
return codec_tuple(PyUnicode_DecodeUTF8(data, size, errors),
|
||||
size);
|
||||
if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode",
|
||||
&data, &size, &errors, &final))
|
||||
return NULL;
|
||||
consumed = size;
|
||||
|
||||
decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors,
|
||||
final ? NULL : &consumed);
|
||||
if (decoded == NULL)
|
||||
return NULL;
|
||||
return codec_tuple(decoded, consumed);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
@ -286,12 +293,19 @@ utf_16_decode(PyObject *self,
|
|||
int size;
|
||||
const char *errors = NULL;
|
||||
int byteorder = 0;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "t#|z:utf_16_decode",
|
||||
&data, &size, &errors))
|
||||
int final = 0;
|
||||
int consumed;
|
||||
PyObject *decoded;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode",
|
||||
&data, &size, &errors, &final))
|
||||
return NULL;
|
||||
return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
|
||||
size);
|
||||
consumed = size;
|
||||
decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
|
||||
final ? NULL : &consumed);
|
||||
if (decoded == NULL)
|
||||
return NULL;
|
||||
return codec_tuple(decoded, consumed);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
@ -302,12 +316,20 @@ utf_16_le_decode(PyObject *self,
|
|||
int size;
|
||||
const char *errors = NULL;
|
||||
int byteorder = -1;
|
||||
int final = 0;
|
||||
int consumed;
|
||||
PyObject *decoded = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "t#|z:utf_16_le_decode",
|
||||
&data, &size, &errors))
|
||||
if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode",
|
||||
&data, &size, &errors, &final))
|
||||
return NULL;
|
||||
return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
|
||||
size);
|
||||
consumed = size;
|
||||
decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
|
||||
&byteorder, final ? NULL : &consumed);
|
||||
if (decoded == NULL)
|
||||
return NULL;
|
||||
return codec_tuple(decoded, consumed);
|
||||
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
@ -318,12 +340,19 @@ utf_16_be_decode(PyObject *self,
|
|||
int size;
|
||||
const char *errors = NULL;
|
||||
int byteorder = 1;
|
||||
int final = 0;
|
||||
int consumed;
|
||||
PyObject *decoded = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "t#|z:utf_16_be_decode",
|
||||
&data, &size, &errors))
|
||||
if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode",
|
||||
&data, &size, &errors, &final))
|
||||
return NULL;
|
||||
return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
|
||||
size);
|
||||
consumed = size;
|
||||
decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
|
||||
&byteorder, final ? NULL : &consumed);
|
||||
if (decoded == NULL)
|
||||
return NULL;
|
||||
return codec_tuple(decoded, consumed);
|
||||
}
|
||||
|
||||
/* This non-standard version also provides access to the byteorder
|
||||
|
@ -343,15 +372,19 @@ utf_16_ex_decode(PyObject *self,
|
|||
const char *errors = NULL;
|
||||
int byteorder = 0;
|
||||
PyObject *unicode, *tuple;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "t#|zi:utf_16_ex_decode",
|
||||
&data, &size, &errors, &byteorder))
|
||||
int final = 0;
|
||||
int consumed;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode",
|
||||
&data, &size, &errors, &byteorder, &final))
|
||||
return NULL;
|
||||
|
||||
unicode = PyUnicode_DecodeUTF16(data, size, errors, &byteorder);
|
||||
consumed = size;
|
||||
unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
|
||||
final ? NULL : &consumed);
|
||||
if (unicode == NULL)
|
||||
return NULL;
|
||||
tuple = Py_BuildValue("Oii", unicode, size, byteorder);
|
||||
tuple = Py_BuildValue("Oii", unicode, consumed, byteorder);
|
||||
Py_DECREF(unicode);
|
||||
return tuple;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue