Backport r57105 and r57145 from the py3k branch: UTF-32 codecs.

This commit is contained in:
Walter Dörwald 2007-08-17 16:41:28 +00:00
parent 437e6a3b15
commit 6e39080649
12 changed files with 999 additions and 2 deletions

View file

@ -391,6 +391,126 @@ utf_16_ex_decode(PyObject *self,
return tuple;
}
static PyObject *
utf_32_decode(PyObject *self,
PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
int byteorder = 0;
int final = 0;
Py_ssize_t consumed;
PyObject *decoded;
if (!PyArg_ParseTuple(args, "t#|zi:utf_32_decode",
&data, &size, &errors, &final))
return NULL;
if (size < 0) {
PyErr_SetString(PyExc_ValueError, "negative argument");
return 0;
}
consumed = size; /* This is overwritten unless final is true. */
decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
final ? NULL : &consumed);
if (decoded == NULL)
return NULL;
return codec_tuple(decoded, consumed);
}
static PyObject *
utf_32_le_decode(PyObject *self,
PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
int byteorder = -1;
int final = 0;
Py_ssize_t consumed;
PyObject *decoded = NULL;
if (!PyArg_ParseTuple(args, "t#|zi:utf_32_le_decode",
&data, &size, &errors, &final))
return NULL;
if (size < 0) {
PyErr_SetString(PyExc_ValueError, "negative argument");
return 0;
}
consumed = size; /* This is overwritten unless final is true. */
decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
&byteorder, final ? NULL : &consumed);
if (decoded == NULL)
return NULL;
return codec_tuple(decoded, consumed);
}
static PyObject *
utf_32_be_decode(PyObject *self,
PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
int byteorder = 1;
int final = 0;
Py_ssize_t consumed;
PyObject *decoded = NULL;
if (!PyArg_ParseTuple(args, "t#|zi:utf_32_be_decode",
&data, &size, &errors, &final))
return NULL;
if (size < 0) {
PyErr_SetString(PyExc_ValueError, "negative argument");
return 0;
}
consumed = size; /* This is overwritten unless final is true. */
decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
&byteorder, final ? NULL : &consumed);
if (decoded == NULL)
return NULL;
return codec_tuple(decoded, consumed);
}
/* This non-standard version also provides access to the byteorder
parameter of the builtin UTF-32 codec.
It returns a tuple (unicode, bytesread, byteorder) with byteorder
being the value in effect at the end of data.
*/
static PyObject *
utf_32_ex_decode(PyObject *self,
PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
int byteorder = 0;
PyObject *unicode, *tuple;
int final = 0;
Py_ssize_t consumed;
if (!PyArg_ParseTuple(args, "t#|zii:utf_32_ex_decode",
&data, &size, &errors, &byteorder, &final))
return NULL;
if (size < 0) {
PyErr_SetString(PyExc_ValueError, "negative argument");
return 0;
}
consumed = size; /* This is overwritten unless final is true. */
unicode = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
final ? NULL : &consumed);
if (unicode == NULL)
return NULL;
tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
Py_DECREF(unicode);
return tuple;
}
static PyObject *
unicode_escape_decode(PyObject *self,
PyObject *args)
@ -683,6 +803,83 @@ utf_16_be_encode(PyObject *self,
return v;
}
/* This version provides access to the byteorder parameter of the
builtin UTF-32 codecs as optional third argument. It defaults to 0
which means: use the native byte order and prepend the data with a
BOM mark.
*/
static PyObject *
utf_32_encode(PyObject *self,
PyObject *args)
{
PyObject *str, *v;
const char *errors = NULL;
int byteorder = 0;
if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
&str, &errors, &byteorder))
return NULL;
str = PyUnicode_FromObject(str);
if (str == NULL)
return NULL;
v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
PyUnicode_GET_SIZE(str),
errors,
byteorder),
PyUnicode_GET_SIZE(str));
Py_DECREF(str);
return v;
}
static PyObject *
utf_32_le_encode(PyObject *self,
PyObject *args)
{
PyObject *str, *v;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
&str, &errors))
return NULL;
str = PyUnicode_FromObject(str);
if (str == NULL)
return NULL;
v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
PyUnicode_GET_SIZE(str),
errors,
-1),
PyUnicode_GET_SIZE(str));
Py_DECREF(str);
return v;
}
static PyObject *
utf_32_be_encode(PyObject *self,
PyObject *args)
{
PyObject *str, *v;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
&str, &errors))
return NULL;
str = PyUnicode_FromObject(str);
if (str == NULL)
return NULL;
v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
PyUnicode_GET_SIZE(str),
errors,
+1),
PyUnicode_GET_SIZE(str));
Py_DECREF(str);
return v;
}
static PyObject *
unicode_escape_encode(PyObject *self,
PyObject *args)
@ -901,6 +1098,13 @@ static PyMethodDef _codecs_functions[] = {
{"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
{"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
{"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
{"utf_32_encode", utf_32_encode, METH_VARARGS},
{"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
{"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
{"utf_32_decode", utf_32_decode, METH_VARARGS},
{"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
{"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
{"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
{"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
{"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
{"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},