mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
Revert "bpo-39087: Add _PyUnicode_GetUTF8Buffer()" (GH-18985)
* Revert "bpo-39087: Add _PyUnicode_GetUTF8Buffer() (GH-17659)"
This reverts commit c7ad974d34
.
* Update unicodeobject.h
This commit is contained in:
parent
7a5cbc7298
commit
3a8c56295d
5 changed files with 0 additions and 284 deletions
|
@ -1967,216 +1967,6 @@ unicode_asutf8andsize(PyObject *self, PyObject *args)
|
|||
return Py_BuildValue("(Nn)", result, utf8_len);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
unicode_getutf8buffer(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *unicode;
|
||||
const char *errors = NULL;
|
||||
if(!PyArg_ParseTuple(args, "O|s", &unicode, &errors)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_buffer buffer;
|
||||
if (_PyUnicode_GetUTF8Buffer(unicode, errors, &buffer) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
assert(buffer.obj != NULL);
|
||||
assert(buffer.obj == unicode || PyBytes_CheckExact(buffer.obj));
|
||||
|
||||
PyObject *result = PyBytes_FromStringAndSize(buffer.buf, buffer.len);
|
||||
PyBuffer_Release(&buffer);
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
unicode_test_getutf8buffer(PyObject *self, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
Py_buffer buf;
|
||||
|
||||
// Test 1: ASCII string
|
||||
PyObject *str = PyUnicode_FromString("hello");
|
||||
if (str == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
Py_ssize_t refcnt = Py_REFCNT(str);
|
||||
|
||||
// _PyUnicode_GetUTF8Buffer() must not fail for ASCII string.
|
||||
int ret = _PyUnicode_GetUTF8Buffer(str, NULL, &buf);
|
||||
assert(ret == 0);
|
||||
|
||||
if (buf.obj != str) {
|
||||
PyErr_Format(TestError,
|
||||
"buf.obj must be equal to str. (%s:%d)",
|
||||
__FILE__, __LINE__);
|
||||
PyBuffer_Release(&buf);
|
||||
Py_DECREF(str);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (buf.len != PyUnicode_GET_LENGTH(str)) {
|
||||
PyErr_Format(TestError,
|
||||
"buf.len must be equal to len(str). (%s:%d)",
|
||||
__FILE__, __LINE__);
|
||||
PyBuffer_Release(&buf);
|
||||
Py_DECREF(str);
|
||||
return NULL;
|
||||
}
|
||||
assert(((const char*)buf.buf)[5] == '\0');
|
||||
|
||||
if ((Py_UCS1*)buf.buf != PyUnicode_1BYTE_DATA(str)) {
|
||||
PyErr_Format(TestError,
|
||||
"buf.buf must be equal to PyUnicode_1BYTE_DATA(str). (%s:%d)",
|
||||
__FILE__, __LINE__);
|
||||
PyBuffer_Release(&buf);
|
||||
Py_DECREF(str);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (refcnt + 1 != Py_REFCNT(str)) {
|
||||
PyErr_Format(TestError,
|
||||
"Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
|
||||
refcnt + 1, Py_REFCNT(str),
|
||||
__FILE__, __LINE__);
|
||||
PyBuffer_Release(&buf);
|
||||
Py_DECREF(str);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyBuffer_Release(&buf);
|
||||
|
||||
if (refcnt != Py_REFCNT(str)) {
|
||||
PyErr_Format(TestError,
|
||||
"Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
|
||||
refcnt, Py_REFCNT(str),
|
||||
__FILE__, __LINE__);
|
||||
Py_DECREF(str);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_DECREF(str);
|
||||
|
||||
// Test 2: non-ASCII string
|
||||
|
||||
// "hello" in Japanese. len(str)==5, len(str.encode()) == 15.
|
||||
str = PyUnicode_FromString("\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf");
|
||||
if (str == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
refcnt = Py_REFCNT(str);
|
||||
assert(PyUnicode_GET_LENGTH(str) == 5);
|
||||
|
||||
if (_PyUnicode_GetUTF8Buffer(str, NULL, &buf) < 0) {
|
||||
Py_DECREF(str);
|
||||
if (!PyErr_Occurred()) {
|
||||
PyErr_Format(TestError,
|
||||
"_PyUnicode_GetUTF8Buffer() returned nonzero "
|
||||
"without exception set. (%s:%d)",
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!PyBytes_CheckExact(buf.obj)) {
|
||||
PyErr_Format(TestError,
|
||||
"buf.obj must be a bytes object, got %R (%s:%d)",
|
||||
buf.obj, __FILE__, __LINE__);
|
||||
PyBuffer_Release(&buf);
|
||||
Py_DECREF(str);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (buf.len != 15) {
|
||||
PyErr_Format(TestError,
|
||||
"Expected buf.len == 15, actual %zd (%s:%d)",
|
||||
buf.len, __FILE__, __LINE__);
|
||||
PyBuffer_Release(&buf);
|
||||
Py_DECREF(str);
|
||||
return NULL;
|
||||
}
|
||||
assert(((const char*)buf.buf)[15] == '\0');
|
||||
|
||||
if (refcnt != Py_REFCNT(str)) {
|
||||
PyErr_Format(TestError,
|
||||
"Py_REFCNT(str) must not be changed. (%s:%d)",
|
||||
__FILE__, __LINE__);
|
||||
// Do not DECREF here because refcnt is broken.
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyBuffer_Release(&buf);
|
||||
|
||||
// Test 3: There is a UTF-8 cache
|
||||
// Reuse str of the previoss test.
|
||||
|
||||
const char *cache = PyUnicode_AsUTF8(str);
|
||||
if (cache == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (_PyUnicode_GetUTF8Buffer(str, NULL, &buf) < 0) {
|
||||
Py_DECREF(str);
|
||||
if (!PyErr_Occurred()) {
|
||||
PyErr_Format(TestError,
|
||||
"_PyUnicode_GetUTF8Buffer() returned nonzero "
|
||||
"without exception set. (%s:%d)",
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (buf.obj != str) {
|
||||
PyErr_Format(TestError,
|
||||
"buf.obj must be equal to str. (%s:%d)",
|
||||
__FILE__, __LINE__);
|
||||
PyBuffer_Release(&buf);
|
||||
Py_DECREF(str);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (buf.buf != cache) {
|
||||
PyErr_Format(TestError,
|
||||
"buf.buf must be equal to the UTF-8 cache (%s:%d)",
|
||||
__FILE__, __LINE__);
|
||||
PyBuffer_Release(&buf);
|
||||
Py_DECREF(str);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (buf.len != 15) {
|
||||
PyErr_Format(TestError,
|
||||
"Expected buf.len == 15, actual %zd (%s:%d)",
|
||||
buf.len, __FILE__, __LINE__);
|
||||
PyBuffer_Release(&buf);
|
||||
Py_DECREF(str);
|
||||
return NULL;
|
||||
}
|
||||
assert(((const char*)buf.buf)[15] == '\0');
|
||||
|
||||
if (refcnt + 1 != Py_REFCNT(str)) {
|
||||
PyErr_Format(TestError,
|
||||
"Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
|
||||
refcnt + 1, Py_REFCNT(str),
|
||||
__FILE__, __LINE__);
|
||||
// Do not DECREF here because refcnt is broken.
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyBuffer_Release(&buf);
|
||||
|
||||
if (refcnt != Py_REFCNT(str)) {
|
||||
PyErr_Format(TestError,
|
||||
"Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
|
||||
refcnt, Py_REFCNT(str),
|
||||
__FILE__, __LINE__);
|
||||
// Do not DECREF here because refcnt is broken.
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_DECREF(str);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
unicode_findchar(PyObject *self, PyObject *args)
|
||||
{
|
||||
|
@ -5602,8 +5392,6 @@ static PyMethodDef TestMethods[] = {
|
|||
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
|
||||
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
|
||||
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
|
||||
{"unicode_getutf8buffer", unicode_getutf8buffer, METH_VARARGS},
|
||||
{"unicode_test_getutf8buffer", unicode_test_getutf8buffer, METH_NOARGS},
|
||||
{"unicode_findchar", unicode_findchar, METH_VARARGS},
|
||||
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
|
||||
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue