mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
Issue #12911: Fix memory consumption when calculating the repr() of huge tuples or lists.
This introduces a small private API for this common pattern. The issue has been discovered thanks to Martin's huge-mem buildbot.
This commit is contained in:
parent
bb2095f1e2
commit
eeb7eea1f9
13 changed files with 271 additions and 87 deletions
114
Objects/accu.c
Normal file
114
Objects/accu.c
Normal file
|
@ -0,0 +1,114 @@
|
|||
/* Accumulator struct implementation */
|
||||
|
||||
#include "Python.h"
|
||||
|
||||
static PyObject *
|
||||
join_list_unicode(PyObject *lst)
|
||||
{
|
||||
/* return ''.join(lst) */
|
||||
PyObject *sep, *ret;
|
||||
sep = PyUnicode_FromStringAndSize("", 0);
|
||||
ret = PyUnicode_Join(sep, lst);
|
||||
Py_DECREF(sep);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
_PyAccu_Init(_PyAccu *acc)
|
||||
{
|
||||
/* Lazily allocated */
|
||||
acc->large = NULL;
|
||||
acc->small = PyList_New(0);
|
||||
if (acc->small == NULL)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
flush_accumulator(_PyAccu *acc)
|
||||
{
|
||||
Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
|
||||
if (nsmall) {
|
||||
int ret;
|
||||
PyObject *joined;
|
||||
if (acc->large == NULL) {
|
||||
acc->large = PyList_New(0);
|
||||
if (acc->large == NULL)
|
||||
return -1;
|
||||
}
|
||||
joined = join_list_unicode(acc->small);
|
||||
if (joined == NULL)
|
||||
return -1;
|
||||
if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
|
||||
Py_DECREF(joined);
|
||||
return -1;
|
||||
}
|
||||
ret = PyList_Append(acc->large, joined);
|
||||
Py_DECREF(joined);
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode)
|
||||
{
|
||||
Py_ssize_t nsmall;
|
||||
assert(PyUnicode_Check(unicode));
|
||||
|
||||
if (PyList_Append(acc->small, unicode))
|
||||
return -1;
|
||||
nsmall = PyList_GET_SIZE(acc->small);
|
||||
/* Each item in a list of unicode objects has an overhead (in 64-bit
|
||||
* builds) of:
|
||||
* - 8 bytes for the list slot
|
||||
* - 56 bytes for the header of the unicode object
|
||||
* that is, 64 bytes. 100000 such objects waste more than 6MB
|
||||
* compared to a single concatenated string.
|
||||
*/
|
||||
if (nsmall < 100000)
|
||||
return 0;
|
||||
return flush_accumulator(acc);
|
||||
}
|
||||
|
||||
PyObject *
|
||||
_PyAccu_FinishAsList(_PyAccu *acc)
|
||||
{
|
||||
int ret;
|
||||
PyObject *res;
|
||||
|
||||
ret = flush_accumulator(acc);
|
||||
Py_CLEAR(acc->small);
|
||||
if (ret) {
|
||||
Py_CLEAR(acc->large);
|
||||
return NULL;
|
||||
}
|
||||
res = acc->large;
|
||||
acc->large = NULL;
|
||||
return res;
|
||||
}
|
||||
|
||||
PyObject *
|
||||
_PyAccu_Finish(_PyAccu *acc)
|
||||
{
|
||||
PyObject *list, *res;
|
||||
if (acc->large == NULL) {
|
||||
list = acc->small;
|
||||
acc->small = NULL;
|
||||
}
|
||||
else {
|
||||
list = _PyAccu_FinishAsList(acc);
|
||||
if (!list)
|
||||
return NULL;
|
||||
}
|
||||
res = join_list_unicode(list);
|
||||
Py_DECREF(list);
|
||||
return res;
|
||||
}
|
||||
|
||||
void
|
||||
_PyAccu_Destroy(_PyAccu *acc)
|
||||
{
|
||||
Py_CLEAR(acc->small);
|
||||
Py_CLEAR(acc->large);
|
||||
}
|
|
@ -321,70 +321,59 @@ static PyObject *
|
|||
list_repr(PyListObject *v)
|
||||
{
|
||||
Py_ssize_t i;
|
||||
PyObject *s, *temp;
|
||||
PyObject *pieces = NULL, *result = NULL;
|
||||
PyObject *s = NULL;
|
||||
_PyAccu acc;
|
||||
static PyObject *sep = NULL;
|
||||
|
||||
if (Py_SIZE(v) == 0) {
|
||||
return PyUnicode_FromString("[]");
|
||||
}
|
||||
|
||||
if (sep == NULL) {
|
||||
sep = PyUnicode_FromString(", ");
|
||||
if (sep == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
i = Py_ReprEnter((PyObject*)v);
|
||||
if (i != 0) {
|
||||
return i > 0 ? PyUnicode_FromString("[...]") : NULL;
|
||||
}
|
||||
|
||||
if (Py_SIZE(v) == 0) {
|
||||
result = PyUnicode_FromString("[]");
|
||||
goto Done;
|
||||
}
|
||||
if (_PyAccu_Init(&acc))
|
||||
goto error;
|
||||
|
||||
pieces = PyList_New(0);
|
||||
if (pieces == NULL)
|
||||
goto Done;
|
||||
s = PyUnicode_FromString("[");
|
||||
if (s == NULL || _PyAccu_Accumulate(&acc, s))
|
||||
goto error;
|
||||
Py_CLEAR(s);
|
||||
|
||||
/* Do repr() on each element. Note that this may mutate the list,
|
||||
so must refetch the list size on each iteration. */
|
||||
for (i = 0; i < Py_SIZE(v); ++i) {
|
||||
int status;
|
||||
if (Py_EnterRecursiveCall(" while getting the repr of a list"))
|
||||
goto Done;
|
||||
goto error;
|
||||
s = PyObject_Repr(v->ob_item[i]);
|
||||
Py_LeaveRecursiveCall();
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
status = PyList_Append(pieces, s);
|
||||
Py_DECREF(s); /* append created a new ref */
|
||||
if (status < 0)
|
||||
goto Done;
|
||||
if (i > 0 && _PyAccu_Accumulate(&acc, sep))
|
||||
goto error;
|
||||
if (s == NULL || _PyAccu_Accumulate(&acc, s))
|
||||
goto error;
|
||||
Py_CLEAR(s);
|
||||
}
|
||||
|
||||
/* Add "[]" decorations to the first and last items. */
|
||||
assert(PyList_GET_SIZE(pieces) > 0);
|
||||
s = PyUnicode_FromString("[");
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
temp = PyList_GET_ITEM(pieces, 0);
|
||||
PyUnicode_AppendAndDel(&s, temp);
|
||||
PyList_SET_ITEM(pieces, 0, s);
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
|
||||
s = PyUnicode_FromString("]");
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1);
|
||||
PyUnicode_AppendAndDel(&temp, s);
|
||||
PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp);
|
||||
if (temp == NULL)
|
||||
goto Done;
|
||||
if (s == NULL || _PyAccu_Accumulate(&acc, s))
|
||||
goto error;
|
||||
Py_CLEAR(s);
|
||||
|
||||
/* Paste them all together with ", " between. */
|
||||
s = PyUnicode_FromString(", ");
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
result = PyUnicode_Join(s, pieces);
|
||||
Py_DECREF(s);
|
||||
|
||||
Done:
|
||||
Py_XDECREF(pieces);
|
||||
Py_ReprLeave((PyObject *)v);
|
||||
return result;
|
||||
return _PyAccu_Finish(&acc);
|
||||
|
||||
error:
|
||||
_PyAccu_Destroy(&acc);
|
||||
Py_XDECREF(s);
|
||||
Py_ReprLeave((PyObject *)v);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static Py_ssize_t
|
||||
|
|
|
@ -240,13 +240,20 @@ static PyObject *
|
|||
tuplerepr(PyTupleObject *v)
|
||||
{
|
||||
Py_ssize_t i, n;
|
||||
PyObject *s, *temp;
|
||||
PyObject *pieces, *result = NULL;
|
||||
PyObject *s = NULL;
|
||||
_PyAccu acc;
|
||||
static PyObject *sep = NULL;
|
||||
|
||||
n = Py_SIZE(v);
|
||||
if (n == 0)
|
||||
return PyUnicode_FromString("()");
|
||||
|
||||
if (sep == NULL) {
|
||||
sep = PyUnicode_FromString(", ");
|
||||
if (sep == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* While not mutable, it is still possible to end up with a cycle in a
|
||||
tuple through an object that stores itself within a tuple (and thus
|
||||
infinitely asks for the repr of itself). This should only be
|
||||
|
@ -256,52 +263,42 @@ tuplerepr(PyTupleObject *v)
|
|||
return i > 0 ? PyUnicode_FromString("(...)") : NULL;
|
||||
}
|
||||
|
||||
pieces = PyTuple_New(n);
|
||||
if (pieces == NULL)
|
||||
return NULL;
|
||||
if (_PyAccu_Init(&acc))
|
||||
goto error;
|
||||
|
||||
s = PyUnicode_FromString("(");
|
||||
if (s == NULL || _PyAccu_Accumulate(&acc, s))
|
||||
goto error;
|
||||
Py_CLEAR(s);
|
||||
|
||||
/* Do repr() on each element. */
|
||||
for (i = 0; i < n; ++i) {
|
||||
if (Py_EnterRecursiveCall(" while getting the repr of a tuple"))
|
||||
goto Done;
|
||||
goto error;
|
||||
s = PyObject_Repr(v->ob_item[i]);
|
||||
Py_LeaveRecursiveCall();
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
PyTuple_SET_ITEM(pieces, i, s);
|
||||
if (i > 0 && _PyAccu_Accumulate(&acc, sep))
|
||||
goto error;
|
||||
if (s == NULL || _PyAccu_Accumulate(&acc, s))
|
||||
goto error;
|
||||
Py_CLEAR(s);
|
||||
}
|
||||
if (n > 1)
|
||||
s = PyUnicode_FromString(")");
|
||||
else
|
||||
s = PyUnicode_FromString(",)");
|
||||
if (s == NULL || _PyAccu_Accumulate(&acc, s))
|
||||
goto error;
|
||||
Py_CLEAR(s);
|
||||
|
||||
/* Add "()" decorations to the first and last items. */
|
||||
assert(n > 0);
|
||||
s = PyUnicode_FromString("(");
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
temp = PyTuple_GET_ITEM(pieces, 0);
|
||||
PyUnicode_AppendAndDel(&s, temp);
|
||||
PyTuple_SET_ITEM(pieces, 0, s);
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
|
||||
s = PyUnicode_FromString(n == 1 ? ",)" : ")");
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
temp = PyTuple_GET_ITEM(pieces, n-1);
|
||||
PyUnicode_AppendAndDel(&temp, s);
|
||||
PyTuple_SET_ITEM(pieces, n-1, temp);
|
||||
if (temp == NULL)
|
||||
goto Done;
|
||||
|
||||
/* Paste them all together with ", " between. */
|
||||
s = PyUnicode_FromString(", ");
|
||||
if (s == NULL)
|
||||
goto Done;
|
||||
result = PyUnicode_Join(s, pieces);
|
||||
Py_DECREF(s);
|
||||
|
||||
Done:
|
||||
Py_DECREF(pieces);
|
||||
Py_ReprLeave((PyObject *)v);
|
||||
return result;
|
||||
return _PyAccu_Finish(&acc);
|
||||
|
||||
error:
|
||||
_PyAccu_Destroy(&acc);
|
||||
Py_XDECREF(s);
|
||||
Py_ReprLeave((PyObject *)v);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* The addend 82520, was selected from the range(0, 1000000) for
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue