mirror of
https://github.com/python/cpython.git
synced 2025-09-16 05:36:29 +00:00
Backport r57105 and r57145 from the py3k branch: UTF-32 codecs.
This commit is contained in:
parent
437e6a3b15
commit
6e39080649
12 changed files with 999 additions and 2 deletions
|
@ -391,6 +391,126 @@ utf_16_ex_decode(PyObject *self,
|
|||
return tuple;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
utf_32_decode(PyObject *self,
|
||||
PyObject *args)
|
||||
{
|
||||
const char *data;
|
||||
Py_ssize_t size;
|
||||
const char *errors = NULL;
|
||||
int byteorder = 0;
|
||||
int final = 0;
|
||||
Py_ssize_t consumed;
|
||||
PyObject *decoded;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "t#|zi:utf_32_decode",
|
||||
&data, &size, &errors, &final))
|
||||
return NULL;
|
||||
if (size < 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "negative argument");
|
||||
return 0;
|
||||
}
|
||||
consumed = size; /* This is overwritten unless final is true. */
|
||||
decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
|
||||
final ? NULL : &consumed);
|
||||
if (decoded == NULL)
|
||||
return NULL;
|
||||
return codec_tuple(decoded, consumed);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
utf_32_le_decode(PyObject *self,
|
||||
PyObject *args)
|
||||
{
|
||||
const char *data;
|
||||
Py_ssize_t size;
|
||||
const char *errors = NULL;
|
||||
int byteorder = -1;
|
||||
int final = 0;
|
||||
Py_ssize_t consumed;
|
||||
PyObject *decoded = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "t#|zi:utf_32_le_decode",
|
||||
&data, &size, &errors, &final))
|
||||
return NULL;
|
||||
|
||||
if (size < 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "negative argument");
|
||||
return 0;
|
||||
}
|
||||
consumed = size; /* This is overwritten unless final is true. */
|
||||
decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
|
||||
&byteorder, final ? NULL : &consumed);
|
||||
if (decoded == NULL)
|
||||
return NULL;
|
||||
return codec_tuple(decoded, consumed);
|
||||
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
utf_32_be_decode(PyObject *self,
|
||||
PyObject *args)
|
||||
{
|
||||
const char *data;
|
||||
Py_ssize_t size;
|
||||
const char *errors = NULL;
|
||||
int byteorder = 1;
|
||||
int final = 0;
|
||||
Py_ssize_t consumed;
|
||||
PyObject *decoded = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "t#|zi:utf_32_be_decode",
|
||||
&data, &size, &errors, &final))
|
||||
return NULL;
|
||||
if (size < 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "negative argument");
|
||||
return 0;
|
||||
}
|
||||
consumed = size; /* This is overwritten unless final is true. */
|
||||
decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
|
||||
&byteorder, final ? NULL : &consumed);
|
||||
if (decoded == NULL)
|
||||
return NULL;
|
||||
return codec_tuple(decoded, consumed);
|
||||
}
|
||||
|
||||
/* This non-standard version also provides access to the byteorder
|
||||
parameter of the builtin UTF-32 codec.
|
||||
|
||||
It returns a tuple (unicode, bytesread, byteorder) with byteorder
|
||||
being the value in effect at the end of data.
|
||||
|
||||
*/
|
||||
|
||||
static PyObject *
|
||||
utf_32_ex_decode(PyObject *self,
|
||||
PyObject *args)
|
||||
{
|
||||
const char *data;
|
||||
Py_ssize_t size;
|
||||
const char *errors = NULL;
|
||||
int byteorder = 0;
|
||||
PyObject *unicode, *tuple;
|
||||
int final = 0;
|
||||
Py_ssize_t consumed;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "t#|zii:utf_32_ex_decode",
|
||||
&data, &size, &errors, &byteorder, &final))
|
||||
return NULL;
|
||||
if (size < 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "negative argument");
|
||||
return 0;
|
||||
}
|
||||
consumed = size; /* This is overwritten unless final is true. */
|
||||
unicode = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
|
||||
final ? NULL : &consumed);
|
||||
if (unicode == NULL)
|
||||
return NULL;
|
||||
tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
|
||||
Py_DECREF(unicode);
|
||||
return tuple;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
unicode_escape_decode(PyObject *self,
|
||||
PyObject *args)
|
||||
|
@ -683,6 +803,83 @@ utf_16_be_encode(PyObject *self,
|
|||
return v;
|
||||
}
|
||||
|
||||
/* This version provides access to the byteorder parameter of the
|
||||
builtin UTF-32 codecs as optional third argument. It defaults to 0
|
||||
which means: use the native byte order and prepend the data with a
|
||||
BOM mark.
|
||||
|
||||
*/
|
||||
|
||||
static PyObject *
|
||||
utf_32_encode(PyObject *self,
|
||||
PyObject *args)
|
||||
{
|
||||
PyObject *str, *v;
|
||||
const char *errors = NULL;
|
||||
int byteorder = 0;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
|
||||
&str, &errors, &byteorder))
|
||||
return NULL;
|
||||
|
||||
str = PyUnicode_FromObject(str);
|
||||
if (str == NULL)
|
||||
return NULL;
|
||||
v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
|
||||
PyUnicode_GET_SIZE(str),
|
||||
errors,
|
||||
byteorder),
|
||||
PyUnicode_GET_SIZE(str));
|
||||
Py_DECREF(str);
|
||||
return v;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
utf_32_le_encode(PyObject *self,
|
||||
PyObject *args)
|
||||
{
|
||||
PyObject *str, *v;
|
||||
const char *errors = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
|
||||
&str, &errors))
|
||||
return NULL;
|
||||
|
||||
str = PyUnicode_FromObject(str);
|
||||
if (str == NULL)
|
||||
return NULL;
|
||||
v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
|
||||
PyUnicode_GET_SIZE(str),
|
||||
errors,
|
||||
-1),
|
||||
PyUnicode_GET_SIZE(str));
|
||||
Py_DECREF(str);
|
||||
return v;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
utf_32_be_encode(PyObject *self,
|
||||
PyObject *args)
|
||||
{
|
||||
PyObject *str, *v;
|
||||
const char *errors = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
|
||||
&str, &errors))
|
||||
return NULL;
|
||||
|
||||
str = PyUnicode_FromObject(str);
|
||||
if (str == NULL)
|
||||
return NULL;
|
||||
v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
|
||||
PyUnicode_GET_SIZE(str),
|
||||
errors,
|
||||
+1),
|
||||
PyUnicode_GET_SIZE(str));
|
||||
Py_DECREF(str);
|
||||
return v;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
unicode_escape_encode(PyObject *self,
|
||||
PyObject *args)
|
||||
|
@ -901,6 +1098,13 @@ static PyMethodDef _codecs_functions[] = {
|
|||
{"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
|
||||
{"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
|
||||
{"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
|
||||
{"utf_32_encode", utf_32_encode, METH_VARARGS},
|
||||
{"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
|
||||
{"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
|
||||
{"utf_32_decode", utf_32_decode, METH_VARARGS},
|
||||
{"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
|
||||
{"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
|
||||
{"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
|
||||
{"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
|
||||
{"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
|
||||
{"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue