From 459d493ce3288cda7dcebb868970b199764502f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maurycy=20Paw=C5=82owski-Wiero=C5=84ski?= <5383+maurycy@users.noreply.github.com> Date: Thu, 16 Oct 2025 19:24:34 +0200 Subject: [PATCH] gh-140149: Use PyBytesWriter in _build_concatenated_bytes() (#140150) Use PyBytesWriter in action_helpers.c _build_concatenated_bytes(). 3x faster bytes concat in the parser. Co-authored-by: Victor Stinner --- ...-10-15-17-12-32.gh-issue-140149.cy1m3d.rst | 2 + Parser/action_helpers.c | 39 ++++++++++++++++--- 2 files changed, 35 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-10-15-17-12-32.gh-issue-140149.cy1m3d.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-15-17-12-32.gh-issue-140149.cy1m3d.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-15-17-12-32.gh-issue-140149.cy1m3d.rst new file mode 100644 index 00000000000..e98e28802cf --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-15-17-12-32.gh-issue-140149.cy1m3d.rst @@ -0,0 +1,2 @@ +Speed up parsing bytes literals concatenation by using PyBytesWriter API and +a single memory allocation (about 3x faster). diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 57e46b4399c..b7a5b9d5e30 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -1612,19 +1612,46 @@ _build_concatenated_bytes(Parser *p, asdl_expr_seq *strings, int lineno, Py_ssize_t len = asdl_seq_LEN(strings); assert(len > 0); - PyObject* res = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); - /* Bytes literals never get a kind, but just for consistency since they are represented as Constant nodes, we'll mirror the same behavior as unicode strings for determining the kind. */ - PyObject* kind = asdl_seq_GET(strings, 0)->v.Constant.kind; + PyObject *kind = asdl_seq_GET(strings, 0)->v.Constant.kind; + + Py_ssize_t total = 0; for (Py_ssize_t i = 0; i < len; i++) { expr_ty elem = asdl_seq_GET(strings, i); - PyBytes_Concat(&res, elem->v.Constant.value); + PyObject *bytes = elem->v.Constant.value; + Py_ssize_t part = PyBytes_GET_SIZE(bytes); + if (part > PY_SSIZE_T_MAX - total) { + PyErr_NoMemory(); + return NULL; + } + total += part; } - if (!res || _PyArena_AddPyObject(arena, res) < 0) { - Py_XDECREF(res); + + PyBytesWriter *writer = PyBytesWriter_Create(total); + if (writer == NULL) { + return NULL; + } + char *out = PyBytesWriter_GetData(writer); + + for (Py_ssize_t i = 0; i < len; i++) { + expr_ty elem = asdl_seq_GET(strings, i); + PyObject *bytes = elem->v.Constant.value; + Py_ssize_t part = PyBytes_GET_SIZE(bytes); + if (part > 0) { + memcpy(out, PyBytes_AS_STRING(bytes), part); + out += part; + } + } + + PyObject *res = PyBytesWriter_Finish(writer); + if (res == NULL) { + return NULL; + } + if (_PyArena_AddPyObject(arena, res) < 0) { + Py_DECREF(res); return NULL; } return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena);