mirror of
https://github.com/python/cpython.git
synced 2025-08-31 05:58:33 +00:00
Optimize built-in unicode codecs by avoiding unnecessary copying.
The approach used is similiar to what is currently used in the version of unicodeobject.c in Python 2.x. The only difference is we use _PyBytes_Resize instead of _PyString_Resize.
This commit is contained in:
parent
9cb6f7f7a5
commit
44531cb2db
1 changed files with 59 additions and 51 deletions
|
@ -1873,7 +1873,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
|
||||||
int encodeWhiteSpace,
|
int encodeWhiteSpace,
|
||||||
const char *errors)
|
const char *errors)
|
||||||
{
|
{
|
||||||
PyObject *v, *result;
|
PyObject *v;
|
||||||
/* It might be possible to tighten this worst case */
|
/* It might be possible to tighten this worst case */
|
||||||
Py_ssize_t cbAllocated = 5 * size;
|
Py_ssize_t cbAllocated = 5 * size;
|
||||||
int inShift = 0;
|
int inShift = 0;
|
||||||
|
@ -1889,11 +1889,11 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
|
||||||
if (cbAllocated / 5 != size)
|
if (cbAllocated / 5 != size)
|
||||||
return PyErr_NoMemory();
|
return PyErr_NoMemory();
|
||||||
|
|
||||||
v = PyByteArray_FromStringAndSize(NULL, cbAllocated);
|
v = PyBytes_FromStringAndSize(NULL, cbAllocated);
|
||||||
if (v == NULL)
|
if (v == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
start = out = PyByteArray_AS_STRING(v);
|
start = out = PyBytes_AS_STRING(v);
|
||||||
for (;i < size; ++i) {
|
for (;i < size; ++i) {
|
||||||
Py_UNICODE ch = s[i];
|
Py_UNICODE ch = s[i];
|
||||||
|
|
||||||
|
@ -1958,10 +1958,9 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
|
||||||
*out++= B64(charsleft << (6-bitsleft) );
|
*out++= B64(charsleft << (6-bitsleft) );
|
||||||
*out++ = '-';
|
*out++ = '-';
|
||||||
}
|
}
|
||||||
|
if (_PyBytes_Resize(&v, out - start) < 0)
|
||||||
result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), out - start);
|
return NULL;
|
||||||
Py_DECREF(v);
|
return v;
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef SPECIAL
|
#undef SPECIAL
|
||||||
|
@ -2479,7 +2478,7 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s,
|
||||||
const char *errors,
|
const char *errors,
|
||||||
int byteorder)
|
int byteorder)
|
||||||
{
|
{
|
||||||
PyObject *v, *result;
|
PyObject *v;
|
||||||
unsigned char *p;
|
unsigned char *p;
|
||||||
Py_ssize_t nsize, bytesize;
|
Py_ssize_t nsize, bytesize;
|
||||||
#ifndef Py_UNICODE_WIDE
|
#ifndef Py_UNICODE_WIDE
|
||||||
|
@ -2515,11 +2514,11 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s,
|
||||||
bytesize = nsize * 4;
|
bytesize = nsize * 4;
|
||||||
if (bytesize / 4 != nsize)
|
if (bytesize / 4 != nsize)
|
||||||
return PyErr_NoMemory();
|
return PyErr_NoMemory();
|
||||||
v = PyByteArray_FromStringAndSize(NULL, bytesize);
|
v = PyBytes_FromStringAndSize(NULL, bytesize);
|
||||||
if (v == NULL)
|
if (v == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
p = (unsigned char *)PyByteArray_AS_STRING(v);
|
p = (unsigned char *)PyBytes_AS_STRING(v);
|
||||||
if (byteorder == 0)
|
if (byteorder == 0)
|
||||||
STORECHAR(0xFEFF);
|
STORECHAR(0xFEFF);
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
|
@ -2556,9 +2555,7 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s,
|
||||||
}
|
}
|
||||||
|
|
||||||
done:
|
done:
|
||||||
result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v));
|
return v;
|
||||||
Py_DECREF(v);
|
|
||||||
return result;
|
|
||||||
#undef STORECHAR
|
#undef STORECHAR
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2757,7 +2754,7 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s,
|
||||||
const char *errors,
|
const char *errors,
|
||||||
int byteorder)
|
int byteorder)
|
||||||
{
|
{
|
||||||
PyObject *v, *result;
|
PyObject *v;
|
||||||
unsigned char *p;
|
unsigned char *p;
|
||||||
Py_ssize_t nsize, bytesize;
|
Py_ssize_t nsize, bytesize;
|
||||||
#ifdef Py_UNICODE_WIDE
|
#ifdef Py_UNICODE_WIDE
|
||||||
|
@ -2792,11 +2789,11 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s,
|
||||||
bytesize = nsize * 2;
|
bytesize = nsize * 2;
|
||||||
if (bytesize / 2 != nsize)
|
if (bytesize / 2 != nsize)
|
||||||
return PyErr_NoMemory();
|
return PyErr_NoMemory();
|
||||||
v = PyByteArray_FromStringAndSize(NULL, bytesize);
|
v = PyBytes_FromStringAndSize(NULL, bytesize);
|
||||||
if (v == NULL)
|
if (v == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
p = (unsigned char *)PyByteArray_AS_STRING(v);
|
p = (unsigned char *)PyBytes_AS_STRING(v);
|
||||||
if (byteorder == 0)
|
if (byteorder == 0)
|
||||||
STORECHAR(0xFEFF);
|
STORECHAR(0xFEFF);
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
|
@ -2828,9 +2825,7 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s,
|
||||||
}
|
}
|
||||||
|
|
||||||
done:
|
done:
|
||||||
result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v));
|
return v;
|
||||||
Py_DECREF(v);
|
|
||||||
return result;
|
|
||||||
#undef STORECHAR
|
#undef STORECHAR
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3120,7 +3115,7 @@ static const char *hexdigits = "0123456789abcdef";
|
||||||
PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
|
PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
|
||||||
Py_ssize_t size)
|
Py_ssize_t size)
|
||||||
{
|
{
|
||||||
PyObject *repr, *result;
|
PyObject *repr;
|
||||||
char *p;
|
char *p;
|
||||||
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
#ifdef Py_UNICODE_WIDE
|
||||||
|
@ -3147,17 +3142,20 @@ PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
|
||||||
escape.
|
escape.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
if (size == 0)
|
||||||
|
return PyBytes_FromStringAndSize(NULL, 0);
|
||||||
|
|
||||||
if (size > (PY_SSIZE_T_MAX - 2 - 1) / expandsize)
|
if (size > (PY_SSIZE_T_MAX - 2 - 1) / expandsize)
|
||||||
return PyErr_NoMemory();
|
return PyErr_NoMemory();
|
||||||
|
|
||||||
repr = PyByteArray_FromStringAndSize(NULL,
|
repr = PyBytes_FromStringAndSize(NULL,
|
||||||
2
|
2
|
||||||
+ expandsize*size
|
+ expandsize*size
|
||||||
+ 1);
|
+ 1);
|
||||||
if (repr == NULL)
|
if (repr == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
p = PyByteArray_AS_STRING(repr);
|
p = PyBytes_AS_STRING(repr);
|
||||||
|
|
||||||
while (size-- > 0) {
|
while (size-- > 0) {
|
||||||
Py_UNICODE ch = *s++;
|
Py_UNICODE ch = *s++;
|
||||||
|
@ -3249,13 +3247,13 @@ PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
|
||||||
*p++ = (char) ch;
|
*p++ = (char) ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr),
|
assert(p - PyBytes_AS_STRING(repr) > 0);
|
||||||
p - PyByteArray_AS_STRING(repr));
|
if (_PyBytes_Resize(&repr, p - PyBytes_AS_STRING(repr)) < 0)
|
||||||
Py_DECREF(repr);
|
return NULL;
|
||||||
return result;
|
return repr;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
|
PyObject *PyUnicodeAsUnicodeEscapeString(PyObject *unicode)
|
||||||
{
|
{
|
||||||
PyObject *s;
|
PyObject *s;
|
||||||
if (!PyUnicode_Check(unicode)) {
|
if (!PyUnicode_Check(unicode)) {
|
||||||
|
@ -3389,7 +3387,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
|
||||||
PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
|
PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
|
||||||
Py_ssize_t size)
|
Py_ssize_t size)
|
||||||
{
|
{
|
||||||
PyObject *repr, *result;
|
PyObject *repr;
|
||||||
char *p;
|
char *p;
|
||||||
char *q;
|
char *q;
|
||||||
|
|
||||||
|
@ -3402,13 +3400,13 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
|
||||||
if (size > PY_SSIZE_T_MAX / expandsize)
|
if (size > PY_SSIZE_T_MAX / expandsize)
|
||||||
return PyErr_NoMemory();
|
return PyErr_NoMemory();
|
||||||
|
|
||||||
repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
|
repr = PyBytes_FromStringAndSize(NULL, expandsize * size);
|
||||||
if (repr == NULL)
|
if (repr == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
goto done;
|
return repr;
|
||||||
|
|
||||||
p = q = PyByteArray_AS_STRING(repr);
|
p = q = PyBytes_AS_STRING(repr);
|
||||||
while (size-- > 0) {
|
while (size-- > 0) {
|
||||||
Py_UNICODE ch = *s++;
|
Py_UNICODE ch = *s++;
|
||||||
#ifdef Py_UNICODE_WIDE
|
#ifdef Py_UNICODE_WIDE
|
||||||
|
@ -3468,10 +3466,10 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
|
||||||
}
|
}
|
||||||
size = p - q;
|
size = p - q;
|
||||||
|
|
||||||
done:
|
assert(size > 0);
|
||||||
result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
|
if (_PyBytes_Resize(&repr, size) < 0)
|
||||||
Py_DECREF(repr);
|
return NULL;
|
||||||
return result;
|
return repr;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
|
PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
|
||||||
|
@ -3706,7 +3704,6 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
|
||||||
const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
|
const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
|
||||||
PyObject *errorHandler = NULL;
|
PyObject *errorHandler = NULL;
|
||||||
PyObject *exc = NULL;
|
PyObject *exc = NULL;
|
||||||
PyObject *result = NULL;
|
|
||||||
/* the following variable is used for caching string comparisons
|
/* the following variable is used for caching string comparisons
|
||||||
* -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
|
* -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
|
||||||
int known_errorHandler = -1;
|
int known_errorHandler = -1;
|
||||||
|
@ -3715,10 +3712,10 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
|
||||||
replacements, if we need more, we'll resize */
|
replacements, if we need more, we'll resize */
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
return PyBytes_FromStringAndSize(NULL, 0);
|
return PyBytes_FromStringAndSize(NULL, 0);
|
||||||
res = PyByteArray_FromStringAndSize(NULL, size);
|
res = PyBytes_FromStringAndSize(NULL, size);
|
||||||
if (res == NULL)
|
if (res == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
str = PyByteArray_AS_STRING(res);
|
str = PyBytes_AS_STRING(res);
|
||||||
ressize = size;
|
ressize = size;
|
||||||
|
|
||||||
while (p<endp) {
|
while (p<endp) {
|
||||||
|
@ -3768,7 +3765,7 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
|
||||||
p = collend;
|
p = collend;
|
||||||
break;
|
break;
|
||||||
case 4: /* xmlcharrefreplace */
|
case 4: /* xmlcharrefreplace */
|
||||||
respos = str - PyByteArray_AS_STRING(res);
|
respos = str - PyBytes_AS_STRING(res);
|
||||||
/* determine replacement size (temporarily (mis)uses p) */
|
/* determine replacement size (temporarily (mis)uses p) */
|
||||||
for (p = collstart, repsize = 0; p < collend; ++p) {
|
for (p = collstart, repsize = 0; p < collend; ++p) {
|
||||||
if (*p<10)
|
if (*p<10)
|
||||||
|
@ -3795,9 +3792,9 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
|
||||||
if (requiredsize > ressize) {
|
if (requiredsize > ressize) {
|
||||||
if (requiredsize<2*ressize)
|
if (requiredsize<2*ressize)
|
||||||
requiredsize = 2*ressize;
|
requiredsize = 2*ressize;
|
||||||
if (PyByteArray_Resize(res, requiredsize))
|
if (_PyBytes_Resize(&res, requiredsize))
|
||||||
goto onError;
|
goto onError;
|
||||||
str = PyByteArray_AS_STRING(res) + respos;
|
str = PyBytes_AS_STRING(res) + respos;
|
||||||
ressize = requiredsize;
|
ressize = requiredsize;
|
||||||
}
|
}
|
||||||
/* generate replacement (temporarily (mis)uses p) */
|
/* generate replacement (temporarily (mis)uses p) */
|
||||||
|
@ -3815,17 +3812,17 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
|
||||||
/* need more space? (at least enough for what we
|
/* need more space? (at least enough for what we
|
||||||
have+the replacement+the rest of the string, so
|
have+the replacement+the rest of the string, so
|
||||||
we won't have to check space for encodable characters) */
|
we won't have to check space for encodable characters) */
|
||||||
respos = str - PyByteArray_AS_STRING(res);
|
respos = str - PyBytes_AS_STRING(res);
|
||||||
repsize = PyUnicode_GET_SIZE(repunicode);
|
repsize = PyUnicode_GET_SIZE(repunicode);
|
||||||
requiredsize = respos+repsize+(endp-collend);
|
requiredsize = respos+repsize+(endp-collend);
|
||||||
if (requiredsize > ressize) {
|
if (requiredsize > ressize) {
|
||||||
if (requiredsize<2*ressize)
|
if (requiredsize<2*ressize)
|
||||||
requiredsize = 2*ressize;
|
requiredsize = 2*ressize;
|
||||||
if (PyByteArray_Resize(res, requiredsize)) {
|
if (_PyBytes_Resize(&res, requiredsize)) {
|
||||||
Py_DECREF(repunicode);
|
Py_DECREF(repunicode);
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
str = PyByteArray_AS_STRING(res) + respos;
|
str = PyBytes_AS_STRING(res) + respos;
|
||||||
ressize = requiredsize;
|
ressize = requiredsize;
|
||||||
}
|
}
|
||||||
/* check if there is anything unencodable in the replacement
|
/* check if there is anything unencodable in the replacement
|
||||||
|
@ -3845,13 +3842,23 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(res),
|
/* Resize if we allocated to much */
|
||||||
str - PyByteArray_AS_STRING(res));
|
size = str - PyBytes_AS_STRING(res);
|
||||||
onError:
|
if (size < ressize) { /* If this falls res will be NULL */
|
||||||
Py_DECREF(res);
|
assert(size > 0);
|
||||||
|
if (_PyBytes_Resize(&res, size) < 0)
|
||||||
|
goto onError;
|
||||||
|
}
|
||||||
|
|
||||||
Py_XDECREF(errorHandler);
|
Py_XDECREF(errorHandler);
|
||||||
Py_XDECREF(exc);
|
Py_XDECREF(exc);
|
||||||
return result;
|
return res;
|
||||||
|
|
||||||
|
onError:
|
||||||
|
Py_XDECREF(res);
|
||||||
|
Py_XDECREF(errorHandler);
|
||||||
|
Py_XDECREF(exc);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
|
PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
|
||||||
|
@ -4104,7 +4111,7 @@ static int encode_mbcs(PyObject **repr,
|
||||||
else {
|
else {
|
||||||
/* Extend string object */
|
/* Extend string object */
|
||||||
n = PyBytes_Size(*repr);
|
n = PyBytes_Size(*repr);
|
||||||
if (_PyBytes_Resize(repr, n + mbcssize) < 0)
|
if (_PyBytes_Resize(&repr, n + mbcssize) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4834,7 +4841,8 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
|
||||||
|
|
||||||
/* Resize if we allocated to much */
|
/* Resize if we allocated to much */
|
||||||
if (respos<PyBytes_GET_SIZE(res))
|
if (respos<PyBytes_GET_SIZE(res))
|
||||||
_PyBytes_Resize(&res, respos);
|
if (_PyBytes_Resize(&res, respos) < 0)
|
||||||
|
goto onError;
|
||||||
|
|
||||||
Py_XDECREF(exc);
|
Py_XDECREF(exc);
|
||||||
Py_XDECREF(errorHandler);
|
Py_XDECREF(errorHandler);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue