Issue #25318: Add _PyBytesWriter API

Add a new private API to optimize Unicode encoders. It uses a small buffer allocated on the stack and supports overallocation. Use _PyBytesWriter API for UCS1 (ASCII and Latin1) and UTF-8 encoders. Enable overallocation for the UTF-8 encoder with error handlers. unicode_encode_ucs1(): initialize collend to collstart+1 to not check the current character twice, we already know that it is not ASCII.
2025-11-20 10:57:44 +00:00 · 2015-10-09 00:33:49 +02:00 · 2015-10-09 00:33:49 +02:00 · fdfbf78114
commit fdfbf78114
parent 08ec6d9611
3 changed files with 269 additions and 133 deletions
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@ -263,10 +263,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
 #define MAX_SHORT_UNICHARS 300  /* largest size we'll do on the stack */

    Py_ssize_t i;                /* index into s of next input byte */
-    PyObject *result;            /* result string object */
    char *p;                     /* next free byte in output buffer */
-    Py_ssize_t nallocated;      /* number of result bytes allocated */
-    Py_ssize_t nneeded;            /* number of result bytes needed */
 #if STRINGLIB_SIZEOF_CHAR > 1
    PyObject *error_handler_obj = NULL;
    PyObject *exc = NULL;
@ -275,39 +272,25 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
 #endif
 #if STRINGLIB_SIZEOF_CHAR == 1
    const Py_ssize_t max_char_size = 2;
-    char stackbuf[MAX_SHORT_UNICHARS * 2];
 #elif STRINGLIB_SIZEOF_CHAR == 2
    const Py_ssize_t max_char_size = 3;
-    char stackbuf[MAX_SHORT_UNICHARS * 3];
 #else /*  STRINGLIB_SIZEOF_CHAR == 4 */
    const Py_ssize_t max_char_size = 4;
-    char stackbuf[MAX_SHORT_UNICHARS * 4];
 #endif
+    _PyBytesWriter writer;

    assert(size >= 0);
+    _PyBytesWriter_Init(&writer);

-    if (size <= MAX_SHORT_UNICHARS) {
-        /* Write into the stack buffer; nallocated can't overflow.
-         * At the end, we'll allocate exactly as much heap space as it
-         * turns out we need.
-         */
-        nallocated = Py_SAFE_DOWNCAST(sizeof(stackbuf), size_t, int);
-        result = NULL;   /* will allocate after we're done */
-        p = stackbuf;
-    }
-    else {
-        if (size > PY_SSIZE_T_MAX / max_char_size) {
-            /* integer overflow */
-            return PyErr_NoMemory();
-        }
-        /* Overallocate on the heap, and give the excess back at the end. */
-        nallocated = size * max_char_size;
-        result = PyBytes_FromStringAndSize(NULL, nallocated);
-        if (result == NULL)
-            return NULL;
-        p = PyBytes_AS_STRING(result);
+    if (size > PY_SSIZE_T_MAX / max_char_size) {
+        /* integer overflow */
+        return PyErr_NoMemory();
    }

+    p = _PyBytesWriter_Alloc(&writer, size * max_char_size);
+    if (p == NULL)
+        return NULL;
+
    for (i = 0; i < size;) {
        Py_UCS4 ch = data[i++];

@ -338,6 +321,9 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
            while ((endpos < size) && Py_UNICODE_IS_SURROGATE(data[endpos]))
                endpos++;

+            /* Only overallocate the buffer if it's not the last write */
+            writer.overallocate = (endpos < size);
+
            switch (error_handler)
            {
            case _Py_ERROR_REPLACE:
@ -387,29 +373,10 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
                    repsize = PyUnicode_GET_LENGTH(rep);

                if (repsize > max_char_size) {
-                    Py_ssize_t offset;
-
-                    if (result == NULL)
-                        offset = p - stackbuf;
-                    else
-                        offset = p - PyBytes_AS_STRING(result);
-
-                    if (nallocated > PY_SSIZE_T_MAX - repsize + max_char_size) {
-                        /* integer overflow */
-                        PyErr_NoMemory();
+                    p = _PyBytesWriter_Prepare(&writer, p,
+                                               repsize - max_char_size);
+                    if (p == NULL)
                        goto error;
-                    }
-                    nallocated += repsize - max_char_size;
-                    if (result != NULL) {
-                        if (_PyBytes_Resize(&result, nallocated) < 0)
-                            goto error;
-                    } else {
-                        result = PyBytes_FromStringAndSize(NULL, nallocated);
-                        if (result == NULL)
-                            goto error;
-                        Py_MEMCPY(PyBytes_AS_STRING(result), stackbuf, offset);
-                    }
-                    p = PyBytes_AS_STRING(result) + offset;
                }

                if (PyBytes_Check(rep)) {
@ -437,6 +404,10 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,

                i = newpos;
            }
+
+            /* If overallocation was disabled, ensure that it was the last
+               write. Otherwise, we missed an optimization */
+            assert(writer.overallocate || i == size);
        }
        else
 #if STRINGLIB_SIZEOF_CHAR > 2
@ -461,31 +432,18 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
 #endif /* STRINGLIB_SIZEOF_CHAR > 1 */
    }

-    if (result == NULL) {
-        /* This was stack allocated. */
-        nneeded = p - stackbuf;
-        assert(nneeded <= nallocated);
-        result = PyBytes_FromStringAndSize(stackbuf, nneeded);
-    }
-    else {
-        /* Cut back to size actually needed. */
-        nneeded = p - PyBytes_AS_STRING(result);
-        assert(nneeded <= nallocated);
-        _PyBytes_Resize(&result, nneeded);
-    }
-
 #if STRINGLIB_SIZEOF_CHAR > 1
    Py_XDECREF(error_handler_obj);
    Py_XDECREF(exc);
 #endif
-    return result;
+    return _PyBytesWriter_Finish(&writer, p);

 #if STRINGLIB_SIZEOF_CHAR > 1
 error:
    Py_XDECREF(rep);
    Py_XDECREF(error_handler_obj);
    Py_XDECREF(exc);
-    Py_XDECREF(result);
+    _PyBytesWriter_Dealloc(&writer);
    return NULL;
 #endif