Optimize error handlers of ASCII and Latin1 encoders when the replacement

string is pure ASCII: use _PyBytesWriter_WriteBytes(), don't check individual
character.

Cleanup unicode_encode_ucs1():

* Rename repunicode to rep
* Clear rep object on error
* Factorize code between bytes and unicode path
This commit is contained in:
Victor Stinner 2015-10-09 13:10:05 +02:00
parent ce179bf6ba
commit 6bd525b656
2 changed files with 48 additions and 44 deletions

View file

@ -311,7 +311,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
#if STRINGLIB_SIZEOF_CHAR > 1
else if (Py_UNICODE_IS_SURROGATE(ch)) {
Py_ssize_t startpos, endpos, newpos;
Py_ssize_t repsize, k;
Py_ssize_t k;
if (error_handler == _Py_ERROR_UNKNOWN)
error_handler = get_error_handler(errors);
@ -392,20 +392,12 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
p = _PyBytesWriter_WriteBytes(&writer, p,
PyBytes_AS_STRING(rep),
PyBytes_GET_SIZE(rep));
if (p == NULL)
goto error;
}
else {
/* rep is unicode */
if (PyUnicode_READY(rep) < 0)
goto error;
repsize = PyUnicode_GET_LENGTH(rep);
p = _PyBytesWriter_Prepare(&writer, p, repsize);
if (p == NULL)
goto error;
if (!PyUnicode_IS_ASCII(rep)) {
raise_encode_exception(&exc, "utf-8",
unicode,
@ -415,9 +407,13 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
}
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
memcpy(p, PyUnicode_DATA(rep), repsize);
p += repsize;
p = _PyBytesWriter_WriteBytes(&writer, p,
PyUnicode_DATA(rep),
PyUnicode_GET_LENGTH(rep));
}
if (p == NULL)
goto error;
Py_CLEAR(rep);
i = newpos;