mirror of
https://github.com/python/cpython.git
synced 2025-11-24 20:30:18 +00:00
gh-139156: Use PyBytesWriter in UTF-16 encoder (#139233)
Replace PyBytes_FromStringAndSize() and _PyBytes_Resize() with the PyBytesWriter API.
This commit is contained in:
parent
e578a9e6a5
commit
c497694f77
1 changed files with 52 additions and 52 deletions
|
|
@ -6407,32 +6407,15 @@ _PyUnicode_EncodeUTF16(PyObject *str,
|
|||
const char *errors,
|
||||
int byteorder)
|
||||
{
|
||||
int kind;
|
||||
const void *data;
|
||||
Py_ssize_t len;
|
||||
PyObject *v;
|
||||
unsigned short *out;
|
||||
Py_ssize_t pairs;
|
||||
#if PY_BIG_ENDIAN
|
||||
int native_ordering = byteorder >= 0;
|
||||
#else
|
||||
int native_ordering = byteorder <= 0;
|
||||
#endif
|
||||
const char *encoding;
|
||||
Py_ssize_t nsize, pos;
|
||||
PyObject *errorHandler = NULL;
|
||||
PyObject *exc = NULL;
|
||||
PyObject *rep = NULL;
|
||||
|
||||
if (!PyUnicode_Check(str)) {
|
||||
PyErr_BadArgument();
|
||||
return NULL;
|
||||
}
|
||||
kind = PyUnicode_KIND(str);
|
||||
data = PyUnicode_DATA(str);
|
||||
len = PyUnicode_GET_LENGTH(str);
|
||||
int kind = PyUnicode_KIND(str);
|
||||
const void *data = PyUnicode_DATA(str);
|
||||
Py_ssize_t len = PyUnicode_GET_LENGTH(str);
|
||||
|
||||
pairs = 0;
|
||||
Py_ssize_t pairs = 0;
|
||||
if (kind == PyUnicode_4BYTE_KIND) {
|
||||
const Py_UCS4 *in = (const Py_UCS4 *)data;
|
||||
const Py_UCS4 *end = in + len;
|
||||
|
|
@ -6445,27 +6428,48 @@ _PyUnicode_EncodeUTF16(PyObject *str,
|
|||
if (len > PY_SSIZE_T_MAX / 2 - pairs - (byteorder == 0)) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
nsize = len + pairs + (byteorder == 0);
|
||||
v = PyBytes_FromStringAndSize(NULL, nsize * 2);
|
||||
if (v == NULL) {
|
||||
Py_ssize_t nsize = len + pairs + (byteorder == 0);
|
||||
|
||||
#if PY_BIG_ENDIAN
|
||||
int native_ordering = byteorder >= 0;
|
||||
#else
|
||||
int native_ordering = byteorder <= 0;
|
||||
#endif
|
||||
|
||||
if (kind == PyUnicode_1BYTE_KIND) {
|
||||
PyObject *v = PyBytes_FromStringAndSize(NULL, nsize * 2);
|
||||
if (v == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* output buffer is 2-bytes aligned */
|
||||
assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(v), 2));
|
||||
unsigned short *out = (unsigned short *)PyBytes_AS_STRING(v);
|
||||
if (byteorder == 0) {
|
||||
*out++ = 0xFEFF;
|
||||
}
|
||||
if (len > 0) {
|
||||
ucs1lib_utf16_encode((const Py_UCS1 *)data, len, &out, native_ordering);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
PyBytesWriter *writer = PyBytesWriter_Create(nsize * 2);
|
||||
if (writer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* output buffer is 2-bytes aligned */
|
||||
assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(v), 2));
|
||||
out = (unsigned short *)PyBytes_AS_STRING(v);
|
||||
assert(_Py_IS_ALIGNED(PyBytesWriter_GetData(writer), 2));
|
||||
unsigned short *out = PyBytesWriter_GetData(writer);
|
||||
if (byteorder == 0) {
|
||||
*out++ = 0xFEFF;
|
||||
}
|
||||
if (len == 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (kind == PyUnicode_1BYTE_KIND) {
|
||||
ucs1lib_utf16_encode((const Py_UCS1 *)data, len, &out, native_ordering);
|
||||
goto done;
|
||||
return PyBytesWriter_Finish(writer);
|
||||
}
|
||||
|
||||
const char *encoding;
|
||||
if (byteorder < 0) {
|
||||
encoding = "utf-16-le";
|
||||
}
|
||||
|
|
@ -6476,10 +6480,11 @@ _PyUnicode_EncodeUTF16(PyObject *str,
|
|||
encoding = "utf-16";
|
||||
}
|
||||
|
||||
pos = 0;
|
||||
while (pos < len) {
|
||||
Py_ssize_t newpos, repsize, moreunits;
|
||||
PyObject *errorHandler = NULL;
|
||||
PyObject *exc = NULL;
|
||||
PyObject *rep = NULL;
|
||||
|
||||
for (Py_ssize_t pos = 0; pos < len; ) {
|
||||
if (kind == PyUnicode_2BYTE_KIND) {
|
||||
pos += ucs2lib_utf16_encode((const Py_UCS2 *)data + pos, len - pos,
|
||||
&out, native_ordering);
|
||||
|
|
@ -6492,6 +6497,7 @@ _PyUnicode_EncodeUTF16(PyObject *str,
|
|||
if (pos == len)
|
||||
break;
|
||||
|
||||
Py_ssize_t newpos;
|
||||
rep = unicode_encode_call_errorhandler(
|
||||
errors, &errorHandler,
|
||||
encoding, "surrogates not allowed",
|
||||
|
|
@ -6499,6 +6505,7 @@ _PyUnicode_EncodeUTF16(PyObject *str,
|
|||
if (!rep)
|
||||
goto error;
|
||||
|
||||
Py_ssize_t repsize, moreunits;
|
||||
if (PyBytes_Check(rep)) {
|
||||
repsize = PyBytes_GET_SIZE(rep);
|
||||
if (repsize & 1) {
|
||||
|
|
@ -6524,21 +6531,17 @@ _PyUnicode_EncodeUTF16(PyObject *str,
|
|||
|
||||
/* two bytes are reserved for each surrogate */
|
||||
if (moreunits > 0) {
|
||||
Py_ssize_t outpos = out - (unsigned short*) PyBytes_AS_STRING(v);
|
||||
if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE(v)) / 2) {
|
||||
/* integer overflow */
|
||||
PyErr_NoMemory();
|
||||
out = PyBytesWriter_GrowAndUpdatePointer(writer, 2 * moreunits, out);
|
||||
if (out == NULL) {
|
||||
goto error;
|
||||
}
|
||||
if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 2 * moreunits) < 0)
|
||||
goto error;
|
||||
out = (unsigned short*) PyBytes_AS_STRING(v) + outpos;
|
||||
}
|
||||
|
||||
if (PyBytes_Check(rep)) {
|
||||
memcpy(out, PyBytes_AS_STRING(rep), repsize);
|
||||
out += repsize / 2;
|
||||
} else /* rep is unicode */ {
|
||||
} else {
|
||||
/* rep is unicode */
|
||||
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
|
||||
ucs1lib_utf16_encode(PyUnicode_1BYTE_DATA(rep), repsize,
|
||||
&out, native_ordering);
|
||||
|
|
@ -6547,23 +6550,20 @@ _PyUnicode_EncodeUTF16(PyObject *str,
|
|||
Py_CLEAR(rep);
|
||||
}
|
||||
|
||||
Py_XDECREF(errorHandler);
|
||||
Py_XDECREF(exc);
|
||||
|
||||
/* Cut back to size actually needed. This is necessary for, for example,
|
||||
encoding of a string containing isolated surrogates and the 'ignore' handler
|
||||
is used. */
|
||||
nsize = (unsigned char*) out - (unsigned char*) PyBytes_AS_STRING(v);
|
||||
if (nsize != PyBytes_GET_SIZE(v))
|
||||
_PyBytes_Resize(&v, nsize);
|
||||
Py_XDECREF(errorHandler);
|
||||
Py_XDECREF(exc);
|
||||
done:
|
||||
return v;
|
||||
return PyBytesWriter_FinishWithPointer(writer, out);
|
||||
|
||||
error:
|
||||
Py_XDECREF(rep);
|
||||
Py_XDECREF(errorHandler);
|
||||
Py_XDECREF(exc);
|
||||
Py_XDECREF(v);
|
||||
PyBytesWriter_Discard(writer);
|
||||
return NULL;
|
||||
#undef STORECHAR
|
||||
}
|
||||
|
||||
PyObject *
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue