Optimize ascii/latin1+surrogateescape encoders

Issue #25227: Optimize ASCII and latin1 encoders with the ``surrogateescape``
error handler: the encoders are now up to 3 times as fast.

Initial patch written by Serhiy Storchaka.
This commit is contained in:
Victor Stinner 2015-09-29 12:32:13 +02:00
parent 5fbeabcbb6
commit c3713e9706
4 changed files with 83 additions and 0 deletions

View file

@ -6532,6 +6532,22 @@ unicode_encode_ucs1(PyObject *unicode,
pos = collend;
break;
case _Py_ERROR_SURROGATEESCAPE:
for (i = collstart; i < collend; ++i) {
ch = PyUnicode_READ(kind, data, i);
if (ch < 0xdc80 || 0xdcff < ch) {
/* Not a UTF-8b surrogate */
break;
}
*str++ = (char)(ch - 0xdc00);
++pos;
}
if (i >= collend)
break;
collstart = pos;
assert(collstart != collend);
/* fallback to general error handling */
default:
repunicode = unicode_encode_call_errorhandler(errors, &error_handler_obj,
encoding, reason, unicode, &exc,