Optimize backslashreplace error handler

Issue #25318: Optimize backslashreplace and xmlcharrefreplace error handlers in UTF-8 encoder. Optimize also backslashreplace error handler for ASCII and Latin1 encoders. Use the new _PyBytesWriter API to optimize these error handlers for the encoders. It avoids to create an exception and call the slow implementation of the error handler.
2025-11-25 04:34:37 +00:00 · 2015-10-09 01:39:28 +02:00 · 2015-10-09 01:39:28 +02:00 · e7bf86cd7d
commit e7bf86cd7d
parent fdfbf78114
2 changed files with 160 additions and 51 deletions
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@ -334,7 +334,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
                i += (endpos - startpos - 1);
                break;

-
            case _Py_ERROR_SURROGATEPASS:
                for (k=startpos; k<endpos; k++) {
                    ch = data[k];
@ -345,6 +344,22 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
                i += (endpos - startpos - 1);
                break;

+            case _Py_ERROR_BACKSLASHREPLACE:
+                p = backslashreplace(&writer, max_char_size, p,
+                                     unicode, startpos, endpos);
+                if (p == NULL)
+                    goto error;
+                i += (endpos - startpos - 1);
+                break;
+
+            case _Py_ERROR_XMLCHARREFREPLACE:
+                p = xmlcharrefreplace(&writer, max_char_size, p,
+                                      unicode, startpos, endpos);
+                if (p == NULL)
+                    goto error;
+                i += (endpos - startpos - 1);
+                break;
+
            case _Py_ERROR_SURROGATEESCAPE:
                for (k=startpos; k<endpos; k++) {
                    ch = data[k];
@ -359,7 +374,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
                startpos = k;
                assert(startpos < endpos);
                /* fall through the default handler */
-
            default:
                rep = unicode_encode_call_errorhandler(
                      errors, &error_handler_obj, "utf-8", "surrogates not allowed",