Issue #15958: bytes.join and bytearray.join now accept arbitrary buffer objects.

2025-10-09 16:34:44 +00:00 · 2012-10-16 21:07:23 +02:00 · 2012-10-16 21:07:23 +02:00 · cfc22b4a9b
commit cfc22b4a9b
parent 6ca07a221a
5 changed files with 165 additions and 158 deletions
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@ -10,9 +10,18 @@
 static Py_ssize_t
 _getbuffer(PyObject *obj, Py_buffer *view)
 {
-    PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
+    PyBufferProcs *bufferprocs;
+    if (PyBytes_CheckExact(obj)) {
+        /* Fast path, e.g. for .join() of many bytes objects */
+        Py_INCREF(obj);
+        view->obj = obj;
+        view->buf = PyBytes_AS_STRING(obj);
+        view->len = PyBytes_GET_SIZE(obj);
+        return view->len;
+    }

-    if (buffer == NULL || buffer->bf_getbuffer == NULL)
+    bufferprocs = Py_TYPE(obj)->tp_as_buffer;
+    if (bufferprocs == NULL || bufferprocs->bf_getbuffer == NULL)
    {
        PyErr_Format(PyExc_TypeError,
                     "Type %.100s doesn't support the buffer API",
@ -20,7 +29,7 @@ _getbuffer(PyObject *obj, Py_buffer *view)
        return -1;
    }

-    if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
+    if (bufferprocs->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
        return -1;
    return view->len;
 }
@ -555,6 +564,7 @@ PyBytes_AsStringAndSize(register PyObject *obj,
 #include "stringlib/fastsearch.h"
 #include "stringlib/count.h"
 #include "stringlib/find.h"
+#include "stringlib/join.h"
 #include "stringlib/partition.h"
 #include "stringlib/split.h"
 #include "stringlib/ctype.h"
@ -1107,94 +1117,9 @@ Concatenate any number of bytes objects, with B in between each pair.\n\
 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");

 static PyObject *
-bytes_join(PyObject *self, PyObject *orig)
+bytes_join(PyObject *self, PyObject *iterable)
 {
-    char *sep = PyBytes_AS_STRING(self);
-    const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
-    PyObject *res = NULL;
-    char *p;
-    Py_ssize_t seqlen = 0;
-    size_t sz = 0;
-    Py_ssize_t i;
-    PyObject *seq, *item;
-
-    seq = PySequence_Fast(orig, "");
-    if (seq == NULL) {
-        return NULL;
-    }
-
-    seqlen = PySequence_Size(seq);
-    if (seqlen == 0) {
-        Py_DECREF(seq);
-        return PyBytes_FromString("");
-    }
-    if (seqlen == 1) {
-        item = PySequence_Fast_GET_ITEM(seq, 0);
-        if (PyBytes_CheckExact(item)) {
-            Py_INCREF(item);
-            Py_DECREF(seq);
-            return item;
-        }
-    }
-
-    /* There are at least two things to join, or else we have a subclass
-     * of the builtin types in the sequence.
-     * Do a pre-pass to figure out the total amount of space we'll
-     * need (sz), and see whether all argument are bytes.
-     */
-    /* XXX Shouldn't we use _getbuffer() on these items instead? */
-    for (i = 0; i < seqlen; i++) {
-        const size_t old_sz = sz;
-        item = PySequence_Fast_GET_ITEM(seq, i);
-        if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
-            PyErr_Format(PyExc_TypeError,
-                         "sequence item %zd: expected bytes,"
-                         " %.80s found",
-                         i, Py_TYPE(item)->tp_name);
-            Py_DECREF(seq);
-            return NULL;
-        }
-        sz += Py_SIZE(item);
-        if (i != 0)
-            sz += seplen;
-        if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
-            PyErr_SetString(PyExc_OverflowError,
-                "join() result is too long for bytes");
-            Py_DECREF(seq);
-            return NULL;
-        }
-    }
-
-    /* Allocate result space. */
-    res = PyBytes_FromStringAndSize((char*)NULL, sz);
-    if (res == NULL) {
-        Py_DECREF(seq);
-        return NULL;
-    }
-
-    /* Catenate everything. */
-    /* I'm not worried about a PyByteArray item growing because there's
-       nowhere in this function where we release the GIL. */
-    p = PyBytes_AS_STRING(res);
-    for (i = 0; i < seqlen; ++i) {
-        size_t n;
-        char *q;
-        if (i) {
-            Py_MEMCPY(p, sep, seplen);
-            p += seplen;
-        }
-        item = PySequence_Fast_GET_ITEM(seq, i);
-        n = Py_SIZE(item);
-        if (PyBytes_Check(item))
-            q = PyBytes_AS_STRING(item);
-        else
-            q = PyByteArray_AS_STRING(item);
-        Py_MEMCPY(p, q, n);
-        p += n;
-    }
-
-    Py_DECREF(seq);
-    return res;
+    return stringlib_bytes_join(self, iterable);
 }

 PyObject *