[3.12] gh-99593: Add tests for Unicode C API (part 3) (GH-104728) (GH-106595)

Add tests for codecs.
(cherry picked from commit 51ea664d18)
This commit is contained in:
Serhiy Storchaka 2023-07-10 20:58:58 +03:00 committed by GitHub
parent 41057b2ffe
commit 2da967ea14
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 972 additions and 1 deletions

View file

@ -376,6 +376,22 @@ unicode_readchar(PyObject *self, PyObject *args)
return PyLong_FromUnsignedLong(result);
}
/* Test PyUnicode_FromEncodedObject() */
static PyObject *
unicode_fromencodedobject(PyObject *self, PyObject *args)
{
PyObject *obj;
const char *encoding;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "Oz|z", &obj, &encoding, &errors)) {
return NULL;
}
NULLABLE(obj);
return PyUnicode_FromEncodedObject(obj, encoding, errors);
}
/* Test PyUnicode_FromObject() */
static PyObject *
unicode_fromobject(PyObject *self, PyObject *arg)
@ -669,6 +685,78 @@ unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg)
return _PyUnicode_TransformDecimalAndSpaceToASCII(arg);
}
/* Test PyUnicode_Decode() */
static PyObject *
unicode_decode(PyObject *self, PyObject *args)
{
const char *s;
Py_ssize_t size;
const char *encoding;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "y#z|z", &s, &size, &encoding, &errors))
return NULL;
return PyUnicode_Decode(s, size, encoding, errors);
}
/* Test PyUnicode_AsEncodedString() */
static PyObject *
unicode_asencodedstring(PyObject *self, PyObject *args)
{
PyObject *unicode;
const char *encoding;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "Oz|z", &unicode, &encoding, &errors))
return NULL;
NULLABLE(unicode);
return PyUnicode_AsEncodedString(unicode, encoding, errors);
}
/* Test PyUnicode_BuildEncodingMap() */
static PyObject *
unicode_buildencodingmap(PyObject *self, PyObject *arg)
{
NULLABLE(arg);
return PyUnicode_BuildEncodingMap(arg);
}
/* Test PyUnicode_DecodeUTF7() */
static PyObject *
unicode_decodeutf7(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors))
return NULL;
return PyUnicode_DecodeUTF7(data, size, errors);
}
/* Test PyUnicode_DecodeUTF7Stateful() */
static PyObject *
unicode_decodeutf7stateful(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
Py_ssize_t consumed;
PyObject *result;
if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors))
return NULL;
result = PyUnicode_DecodeUTF7Stateful(data, size, errors, &consumed);
if (!result) {
return NULL;
}
return Py_BuildValue("(Nn)", result, consumed);
}
/* Test PyUnicode_DecodeUTF8() */
static PyObject *
unicode_decodeutf8(PyObject *self, PyObject *args)
@ -703,6 +791,387 @@ unicode_decodeutf8stateful(PyObject *self, PyObject *args)
return Py_BuildValue("(Nn)", result, consumed);
}
/* Test PyUnicode_AsUTF8String() */
static PyObject *
unicode_asutf8string(PyObject *self, PyObject *arg)
{
NULLABLE(arg);
return PyUnicode_AsUTF8String(arg);
}
/* Test PyUnicode_DecodeUTF32() */
static PyObject *
unicode_decodeutf32(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
int byteorder;
PyObject *result;
if (!PyArg_ParseTuple(args, "iy#|z", &byteorder, &data, &size, &errors))
return NULL;
result = PyUnicode_DecodeUTF32(data, size, errors, &byteorder);
if (!result) {
return NULL;
}
return Py_BuildValue("(iN)", byteorder, result);
}
/* Test PyUnicode_DecodeUTF32Stateful() */
static PyObject *
unicode_decodeutf32stateful(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
int byteorder;
Py_ssize_t consumed;
PyObject *result;
if (!PyArg_ParseTuple(args, "iy#|z", &byteorder, &data, &size, &errors))
return NULL;
result = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder, &consumed);
if (!result) {
return NULL;
}
return Py_BuildValue("(iNn)", byteorder, result, consumed);
}
/* Test PyUnicode_AsUTF32String() */
static PyObject *
unicode_asutf32string(PyObject *self, PyObject *arg)
{
NULLABLE(arg);
return PyUnicode_AsUTF32String(arg);
}
/* Test PyUnicode_DecodeUTF16() */
static PyObject *
unicode_decodeutf16(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
int byteorder = 0;
PyObject *result;
if (!PyArg_ParseTuple(args, "iy#|z", &byteorder, &data, &size, &errors))
return NULL;
result = PyUnicode_DecodeUTF16(data, size, errors, &byteorder);
if (!result) {
return NULL;
}
return Py_BuildValue("(iN)", byteorder, result);
}
/* Test PyUnicode_DecodeUTF16Stateful() */
static PyObject *
unicode_decodeutf16stateful(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
int byteorder;
Py_ssize_t consumed;
PyObject *result;
if (!PyArg_ParseTuple(args, "iy#|z", &byteorder, &data, &size, &errors))
return NULL;
result = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder, &consumed);
if (!result) {
return NULL;
}
return Py_BuildValue("(iNn)", byteorder, result, consumed);
}
/* Test PyUnicode_AsUTF16String() */
static PyObject *
unicode_asutf16string(PyObject *self, PyObject *arg)
{
NULLABLE(arg);
return PyUnicode_AsUTF16String(arg);
}
/* Test PyUnicode_DecodeUnicodeEscape() */
static PyObject *
unicode_decodeunicodeescape(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors))
return NULL;
return PyUnicode_DecodeUnicodeEscape(data, size, errors);
}
/* Test PyUnicode_AsUnicodeEscapeString() */
static PyObject *
unicode_asunicodeescapestring(PyObject *self, PyObject *arg)
{
NULLABLE(arg);
return PyUnicode_AsUnicodeEscapeString(arg);
}
static PyObject *
unicode_decoderawunicodeescape(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors))
return NULL;
return PyUnicode_DecodeRawUnicodeEscape(data, size, errors);
}
/* Test PyUnicode_AsRawUnicodeEscapeString() */
static PyObject *
unicode_asrawunicodeescapestring(PyObject *self, PyObject *arg)
{
NULLABLE(arg);
return PyUnicode_AsRawUnicodeEscapeString(arg);
}
static PyObject *
unicode_decodelatin1(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors))
return NULL;
return PyUnicode_DecodeLatin1(data, size, errors);
}
/* Test PyUnicode_AsLatin1String() */
static PyObject *
unicode_aslatin1string(PyObject *self, PyObject *arg)
{
NULLABLE(arg);
return PyUnicode_AsLatin1String(arg);
}
/* Test PyUnicode_DecodeASCII() */
static PyObject *
unicode_decodeascii(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors))
return NULL;
return PyUnicode_DecodeASCII(data, size, errors);
}
/* Test PyUnicode_AsASCIIString() */
static PyObject *
unicode_asasciistring(PyObject *self, PyObject *arg)
{
NULLABLE(arg);
return PyUnicode_AsASCIIString(arg);
}
/* Test PyUnicode_DecodeCharmap() */
static PyObject *
unicode_decodecharmap(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
PyObject *mapping;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "y#O|z", &data, &size, &mapping, &errors))
return NULL;
NULLABLE(mapping);
return PyUnicode_DecodeCharmap(data, size, mapping, errors);
}
/* Test PyUnicode_AsCharmapString() */
static PyObject *
unicode_ascharmapstring(PyObject *self, PyObject *args)
{
PyObject *unicode;
PyObject *mapping;
if (!PyArg_ParseTuple(args, "OO", &unicode, &mapping))
return NULL;
NULLABLE(unicode);
NULLABLE(mapping);
return PyUnicode_AsCharmapString(unicode, mapping);
}
#ifdef MS_WINDOWS
/* Test PyUnicode_DecodeMBCS() */
static PyObject *
unicode_decodembcs(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors))
return NULL;
return PyUnicode_DecodeMBCS(data, size, errors);
}
/* Test PyUnicode_DecodeMBCSStateful() */
static PyObject *
unicode_decodembcsstateful(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
Py_ssize_t consumed;
PyObject *result;
if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors))
return NULL;
result = PyUnicode_DecodeMBCSStateful(data, size, errors, &consumed);
if (!result) {
return NULL;
}
return Py_BuildValue("(Nn)", result, consumed);
}
/* Test PyUnicode_DecodeCodePageStateful() */
static PyObject *
unicode_decodecodepagestateful(PyObject *self, PyObject *args)
{
int code_page;
const char *data;
Py_ssize_t size;
const char *errors = NULL;
Py_ssize_t consumed;
PyObject *result;
if (!PyArg_ParseTuple(args, "iy#|z", &code_page, &data, &size, &errors))
return NULL;
result = PyUnicode_DecodeCodePageStateful(code_page, data, size, errors, &consumed);
if (!result) {
return NULL;
}
return Py_BuildValue("(Nn)", result, consumed);
}
/* Test PyUnicode_AsMBCSString() */
static PyObject *
unicode_asmbcsstring(PyObject *self, PyObject *arg)
{
NULLABLE(arg);
return PyUnicode_AsMBCSString(arg);
}
/* Test PyUnicode_EncodeCodePage() */
static PyObject *
unicode_encodecodepage(PyObject *self, PyObject *args)
{
int code_page;
PyObject *unicode;
const char *errors;
if (!PyArg_ParseTuple(args, "iO|z", &code_page, &unicode, &errors))
return NULL;
NULLABLE(unicode);
return PyUnicode_EncodeCodePage(code_page, unicode, errors);
}
#endif /* MS_WINDOWS */
/* Test PyUnicode_DecodeLocaleAndSize() */
static PyObject *
unicode_decodelocaleandsize(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors;
if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors))
return NULL;
return PyUnicode_DecodeLocaleAndSize(data, size, errors);
}
/* Test PyUnicode_DecodeLocale() */
static PyObject *
unicode_decodelocale(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors;
if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors))
return NULL;
return PyUnicode_DecodeLocale(data, errors);
}
/* Test PyUnicode_EncodeLocale() */
static PyObject *
unicode_encodelocale(PyObject *self, PyObject *args)
{
PyObject *unicode;
const char *errors;
if (!PyArg_ParseTuple(args, "O|z", &unicode, &errors))
return NULL;
NULLABLE(unicode);
return PyUnicode_EncodeLocale(unicode, errors);
}
/* Test PyUnicode_DecodeFSDefault() */
static PyObject *
unicode_decodefsdefault(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
if (!PyArg_ParseTuple(args, "y#", &data, &size))
return NULL;
return PyUnicode_DecodeFSDefault(data);
}
/* Test PyUnicode_DecodeFSDefaultAndSize() */
static PyObject *
unicode_decodefsdefaultandsize(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
if (!PyArg_ParseTuple(args, "y#|n", &data, &size, &size))
return NULL;
return PyUnicode_DecodeFSDefaultAndSize(data, size);
}
/* Test PyUnicode_EncodeFSDefault() */
static PyObject *
unicode_encodefsdefault(PyObject *self, PyObject *arg)
{
NULLABLE(arg);
return PyUnicode_EncodeFSDefault(arg);
}
/* Test PyUnicode_Concat() */
static PyObject *
unicode_concat(PyObject *self, PyObject *args)
@ -1528,6 +1997,7 @@ static PyMethodDef TestMethods[] = {
{"unicode_substring", unicode_substring, METH_VARARGS},
{"unicode_getlength", unicode_getlength, METH_O},
{"unicode_readchar", unicode_readchar, METH_VARARGS},
{"unicode_fromencodedobject",unicode_fromencodedobject, METH_VARARGS},
{"unicode_fromobject", unicode_fromobject, METH_O},
{"unicode_interninplace", unicode_interninplace, METH_O},
{"unicode_internfromstring", unicode_internfromstring, METH_O},
@ -1542,9 +2012,44 @@ static PyMethodDef TestMethods[] = {
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
{"unicode_asutf8andsize_null",unicode_asutf8andsize_null, METH_VARARGS},
{"unicode_getdefaultencoding",unicode_getdefaultencoding, METH_NOARGS},
{"unicode_decode", unicode_decode, METH_VARARGS},
{"unicode_asencodedstring", unicode_asencodedstring, METH_VARARGS},
{"unicode_buildencodingmap", unicode_buildencodingmap, METH_O},
{"unicode_decodeutf7", unicode_decodeutf7, METH_VARARGS},
{"unicode_decodeutf7stateful",unicode_decodeutf7stateful, METH_VARARGS},
{"unicode_decodeutf8", unicode_decodeutf8, METH_VARARGS},
{"unicode_decodeutf8stateful",unicode_decodeutf8stateful, METH_VARARGS},
{"unicode_getdefaultencoding",unicode_getdefaultencoding, METH_NOARGS},
{"unicode_asutf8string", unicode_asutf8string, METH_O},
{"unicode_decodeutf16", unicode_decodeutf16, METH_VARARGS},
{"unicode_decodeutf16stateful",unicode_decodeutf16stateful, METH_VARARGS},
{"unicode_asutf16string", unicode_asutf16string, METH_O},
{"unicode_decodeutf32", unicode_decodeutf32, METH_VARARGS},
{"unicode_decodeutf32stateful",unicode_decodeutf32stateful, METH_VARARGS},
{"unicode_asutf32string", unicode_asutf32string, METH_O},
{"unicode_decodeunicodeescape",unicode_decodeunicodeescape, METH_VARARGS},
{"unicode_asunicodeescapestring",unicode_asunicodeescapestring,METH_O},
{"unicode_decoderawunicodeescape",unicode_decoderawunicodeescape,METH_VARARGS},
{"unicode_asrawunicodeescapestring",unicode_asrawunicodeescapestring,METH_O},
{"unicode_decodelatin1", unicode_decodelatin1, METH_VARARGS},
{"unicode_aslatin1string", unicode_aslatin1string, METH_O},
{"unicode_decodeascii", unicode_decodeascii, METH_VARARGS},
{"unicode_asasciistring", unicode_asasciistring, METH_O},
{"unicode_decodecharmap", unicode_decodecharmap, METH_VARARGS},
{"unicode_ascharmapstring", unicode_ascharmapstring, METH_VARARGS},
#ifdef MS_WINDOWS
{"unicode_decodembcs", unicode_decodembcs, METH_VARARGS},
{"unicode_decodembcsstateful",unicode_decodembcsstateful, METH_VARARGS},
{"unicode_decodecodepagestateful",unicode_decodecodepagestateful,METH_VARARGS},
{"unicode_asmbcsstring", unicode_asmbcsstring, METH_O},
{"unicode_encodecodepage", unicode_encodecodepage, METH_VARARGS},
#endif /* MS_WINDOWS */
{"unicode_decodelocaleandsize",unicode_decodelocaleandsize, METH_VARARGS},
{"unicode_decodelocale", unicode_decodelocale, METH_VARARGS},
{"unicode_encodelocale", unicode_encodelocale, METH_VARARGS},
{"unicode_decodefsdefault", unicode_decodefsdefault, METH_VARARGS},
{"unicode_decodefsdefaultandsize",unicode_decodefsdefaultandsize,METH_VARARGS},
{"unicode_encodefsdefault", unicode_encodefsdefault, METH_O},
{"unicode_transformdecimalandspacetoascii", unicode_transformdecimalandspacetoascii, METH_O},
{"unicode_concat", unicode_concat, METH_VARARGS},
{"unicode_splitlines", unicode_splitlines, METH_VARARGS},