mirror of
https://github.com/python/cpython.git
synced 2025-11-25 04:34:37 +00:00
Optimize backslashreplace error handler
Issue #25318: Optimize backslashreplace and xmlcharrefreplace error handlers in UTF-8 encoder. Optimize also backslashreplace error handler for ASCII and Latin1 encoders. Use the new _PyBytesWriter API to optimize these error handlers for the encoders. It avoids to create an exception and call the slow implementation of the error handler.
This commit is contained in:
parent
fdfbf78114
commit
e7bf86cd7d
2 changed files with 160 additions and 51 deletions
|
|
@ -334,7 +334,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
|
|||
i += (endpos - startpos - 1);
|
||||
break;
|
||||
|
||||
|
||||
case _Py_ERROR_SURROGATEPASS:
|
||||
for (k=startpos; k<endpos; k++) {
|
||||
ch = data[k];
|
||||
|
|
@ -345,6 +344,22 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
|
|||
i += (endpos - startpos - 1);
|
||||
break;
|
||||
|
||||
case _Py_ERROR_BACKSLASHREPLACE:
|
||||
p = backslashreplace(&writer, max_char_size, p,
|
||||
unicode, startpos, endpos);
|
||||
if (p == NULL)
|
||||
goto error;
|
||||
i += (endpos - startpos - 1);
|
||||
break;
|
||||
|
||||
case _Py_ERROR_XMLCHARREFREPLACE:
|
||||
p = xmlcharrefreplace(&writer, max_char_size, p,
|
||||
unicode, startpos, endpos);
|
||||
if (p == NULL)
|
||||
goto error;
|
||||
i += (endpos - startpos - 1);
|
||||
break;
|
||||
|
||||
case _Py_ERROR_SURROGATEESCAPE:
|
||||
for (k=startpos; k<endpos; k++) {
|
||||
ch = data[k];
|
||||
|
|
@ -359,7 +374,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
|
|||
startpos = k;
|
||||
assert(startpos < endpos);
|
||||
/* fall through the default handler */
|
||||
|
||||
default:
|
||||
rep = unicode_encode_call_errorhandler(
|
||||
errors, &error_handler_obj, "utf-8", "surrogates not allowed",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue