diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-15-17-12-32.gh-issue-140149.cy1m3d.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-15-17-12-32.gh-issue-140149.cy1m3d.rst new file mode 100644 index 00000000000..e98e28802cf --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-15-17-12-32.gh-issue-140149.cy1m3d.rst @@ -0,0 +1,2 @@ +Speed up parsing bytes literals concatenation by using PyBytesWriter API and +a single memory allocation (about 3x faster). diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 57e46b4399c..b7a5b9d5e30 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -1612,19 +1612,46 @@ _build_concatenated_bytes(Parser *p, asdl_expr_seq *strings, int lineno, Py_ssize_t len = asdl_seq_LEN(strings); assert(len > 0); - PyObject* res = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); - /* Bytes literals never get a kind, but just for consistency since they are represented as Constant nodes, we'll mirror the same behavior as unicode strings for determining the kind. */ - PyObject* kind = asdl_seq_GET(strings, 0)->v.Constant.kind; + PyObject *kind = asdl_seq_GET(strings, 0)->v.Constant.kind; + + Py_ssize_t total = 0; for (Py_ssize_t i = 0; i < len; i++) { expr_ty elem = asdl_seq_GET(strings, i); - PyBytes_Concat(&res, elem->v.Constant.value); + PyObject *bytes = elem->v.Constant.value; + Py_ssize_t part = PyBytes_GET_SIZE(bytes); + if (part > PY_SSIZE_T_MAX - total) { + PyErr_NoMemory(); + return NULL; + } + total += part; } - if (!res || _PyArena_AddPyObject(arena, res) < 0) { - Py_XDECREF(res); + + PyBytesWriter *writer = PyBytesWriter_Create(total); + if (writer == NULL) { + return NULL; + } + char *out = PyBytesWriter_GetData(writer); + + for (Py_ssize_t i = 0; i < len; i++) { + expr_ty elem = asdl_seq_GET(strings, i); + PyObject *bytes = elem->v.Constant.value; + Py_ssize_t part = PyBytes_GET_SIZE(bytes); + if (part > 0) { + memcpy(out, PyBytes_AS_STRING(bytes), part); + out += part; + } + } + + PyObject *res = PyBytesWriter_Finish(writer); + if (res == NULL) { + return NULL; + } + if (_PyArena_AddPyObject(arena, res) < 0) { + Py_DECREF(res); return NULL; } return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena);