gh-140149: Use PyBytesWriter in _build_concatenated_bytes() (#140150)

Use PyBytesWriter in action_helpers.c _build_concatenated_bytes().
3x faster bytes concat in the parser.

Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
Maurycy Pawłowski-Wieroński 2025-10-16 19:24:34 +02:00 committed by GitHub
parent 5f357f3b0d
commit 459d493ce3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 35 additions and 6 deletions

View file

@ -0,0 +1,2 @@
Speed up parsing bytes literals concatenation by using PyBytesWriter API and
a single memory allocation (about 3x faster).

View file

@ -1612,19 +1612,46 @@ _build_concatenated_bytes(Parser *p, asdl_expr_seq *strings, int lineno,
Py_ssize_t len = asdl_seq_LEN(strings); Py_ssize_t len = asdl_seq_LEN(strings);
assert(len > 0); assert(len > 0);
PyObject* res = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
/* Bytes literals never get a kind, but just for consistency /* Bytes literals never get a kind, but just for consistency
since they are represented as Constant nodes, we'll mirror since they are represented as Constant nodes, we'll mirror
the same behavior as unicode strings for determining the the same behavior as unicode strings for determining the
kind. */ kind. */
PyObject* kind = asdl_seq_GET(strings, 0)->v.Constant.kind; PyObject *kind = asdl_seq_GET(strings, 0)->v.Constant.kind;
Py_ssize_t total = 0;
for (Py_ssize_t i = 0; i < len; i++) { for (Py_ssize_t i = 0; i < len; i++) {
expr_ty elem = asdl_seq_GET(strings, i); expr_ty elem = asdl_seq_GET(strings, i);
PyBytes_Concat(&res, elem->v.Constant.value); PyObject *bytes = elem->v.Constant.value;
Py_ssize_t part = PyBytes_GET_SIZE(bytes);
if (part > PY_SSIZE_T_MAX - total) {
PyErr_NoMemory();
return NULL;
}
total += part;
} }
if (!res || _PyArena_AddPyObject(arena, res) < 0) {
Py_XDECREF(res); PyBytesWriter *writer = PyBytesWriter_Create(total);
if (writer == NULL) {
return NULL;
}
char *out = PyBytesWriter_GetData(writer);
for (Py_ssize_t i = 0; i < len; i++) {
expr_ty elem = asdl_seq_GET(strings, i);
PyObject *bytes = elem->v.Constant.value;
Py_ssize_t part = PyBytes_GET_SIZE(bytes);
if (part > 0) {
memcpy(out, PyBytes_AS_STRING(bytes), part);
out += part;
}
}
PyObject *res = PyBytesWriter_Finish(writer);
if (res == NULL) {
return NULL;
}
if (_PyArena_AddPyObject(arena, res) < 0) {
Py_DECREF(res);
return NULL; return NULL;
} }
return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena); return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena);