Issue #12911: Fix memory consumption when calculating the repr() of huge tuples or lists.

This introduces a small private API for this common pattern.
The issue has been discovered thanks to Martin's huge-mem buildbot.
This commit is contained in:
Antoine Pitrou 2011-10-06 18:57:27 +02:00
parent bb2095f1e2
commit eeb7eea1f9
13 changed files with 271 additions and 87 deletions

114
Objects/accu.c Normal file
View file

@ -0,0 +1,114 @@
/* Accumulator struct implementation */
#include "Python.h"
static PyObject *
join_list_unicode(PyObject *lst)
{
/* return ''.join(lst) */
PyObject *sep, *ret;
sep = PyUnicode_FromStringAndSize("", 0);
ret = PyUnicode_Join(sep, lst);
Py_DECREF(sep);
return ret;
}
int
_PyAccu_Init(_PyAccu *acc)
{
/* Lazily allocated */
acc->large = NULL;
acc->small = PyList_New(0);
if (acc->small == NULL)
return -1;
return 0;
}
static int
flush_accumulator(_PyAccu *acc)
{
Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
if (nsmall) {
int ret;
PyObject *joined;
if (acc->large == NULL) {
acc->large = PyList_New(0);
if (acc->large == NULL)
return -1;
}
joined = join_list_unicode(acc->small);
if (joined == NULL)
return -1;
if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
Py_DECREF(joined);
return -1;
}
ret = PyList_Append(acc->large, joined);
Py_DECREF(joined);
return ret;
}
return 0;
}
int
_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode)
{
Py_ssize_t nsmall;
assert(PyUnicode_Check(unicode));
if (PyList_Append(acc->small, unicode))
return -1;
nsmall = PyList_GET_SIZE(acc->small);
/* Each item in a list of unicode objects has an overhead (in 64-bit
* builds) of:
* - 8 bytes for the list slot
* - 56 bytes for the header of the unicode object
* that is, 64 bytes. 100000 such objects waste more than 6MB
* compared to a single concatenated string.
*/
if (nsmall < 100000)
return 0;
return flush_accumulator(acc);
}
PyObject *
_PyAccu_FinishAsList(_PyAccu *acc)
{
int ret;
PyObject *res;
ret = flush_accumulator(acc);
Py_CLEAR(acc->small);
if (ret) {
Py_CLEAR(acc->large);
return NULL;
}
res = acc->large;
acc->large = NULL;
return res;
}
PyObject *
_PyAccu_Finish(_PyAccu *acc)
{
PyObject *list, *res;
if (acc->large == NULL) {
list = acc->small;
acc->small = NULL;
}
else {
list = _PyAccu_FinishAsList(acc);
if (!list)
return NULL;
}
res = join_list_unicode(list);
Py_DECREF(list);
return res;
}
void
_PyAccu_Destroy(_PyAccu *acc)
{
Py_CLEAR(acc->small);
Py_CLEAR(acc->large);
}

View file

@ -321,70 +321,59 @@ static PyObject *
list_repr(PyListObject *v)
{
Py_ssize_t i;
PyObject *s, *temp;
PyObject *pieces = NULL, *result = NULL;
PyObject *s = NULL;
_PyAccu acc;
static PyObject *sep = NULL;
if (Py_SIZE(v) == 0) {
return PyUnicode_FromString("[]");
}
if (sep == NULL) {
sep = PyUnicode_FromString(", ");
if (sep == NULL)
return NULL;
}
i = Py_ReprEnter((PyObject*)v);
if (i != 0) {
return i > 0 ? PyUnicode_FromString("[...]") : NULL;
}
if (Py_SIZE(v) == 0) {
result = PyUnicode_FromString("[]");
goto Done;
}
if (_PyAccu_Init(&acc))
goto error;
pieces = PyList_New(0);
if (pieces == NULL)
goto Done;
s = PyUnicode_FromString("[");
if (s == NULL || _PyAccu_Accumulate(&acc, s))
goto error;
Py_CLEAR(s);
/* Do repr() on each element. Note that this may mutate the list,
so must refetch the list size on each iteration. */
for (i = 0; i < Py_SIZE(v); ++i) {
int status;
if (Py_EnterRecursiveCall(" while getting the repr of a list"))
goto Done;
goto error;
s = PyObject_Repr(v->ob_item[i]);
Py_LeaveRecursiveCall();
if (s == NULL)
goto Done;
status = PyList_Append(pieces, s);
Py_DECREF(s); /* append created a new ref */
if (status < 0)
goto Done;
if (i > 0 && _PyAccu_Accumulate(&acc, sep))
goto error;
if (s == NULL || _PyAccu_Accumulate(&acc, s))
goto error;
Py_CLEAR(s);
}
/* Add "[]" decorations to the first and last items. */
assert(PyList_GET_SIZE(pieces) > 0);
s = PyUnicode_FromString("[");
if (s == NULL)
goto Done;
temp = PyList_GET_ITEM(pieces, 0);
PyUnicode_AppendAndDel(&s, temp);
PyList_SET_ITEM(pieces, 0, s);
if (s == NULL)
goto Done;
s = PyUnicode_FromString("]");
if (s == NULL)
goto Done;
temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1);
PyUnicode_AppendAndDel(&temp, s);
PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp);
if (temp == NULL)
goto Done;
if (s == NULL || _PyAccu_Accumulate(&acc, s))
goto error;
Py_CLEAR(s);
/* Paste them all together with ", " between. */
s = PyUnicode_FromString(", ");
if (s == NULL)
goto Done;
result = PyUnicode_Join(s, pieces);
Py_DECREF(s);
Done:
Py_XDECREF(pieces);
Py_ReprLeave((PyObject *)v);
return result;
return _PyAccu_Finish(&acc);
error:
_PyAccu_Destroy(&acc);
Py_XDECREF(s);
Py_ReprLeave((PyObject *)v);
return NULL;
}
static Py_ssize_t

View file

@ -240,13 +240,20 @@ static PyObject *
tuplerepr(PyTupleObject *v)
{
Py_ssize_t i, n;
PyObject *s, *temp;
PyObject *pieces, *result = NULL;
PyObject *s = NULL;
_PyAccu acc;
static PyObject *sep = NULL;
n = Py_SIZE(v);
if (n == 0)
return PyUnicode_FromString("()");
if (sep == NULL) {
sep = PyUnicode_FromString(", ");
if (sep == NULL)
return NULL;
}
/* While not mutable, it is still possible to end up with a cycle in a
tuple through an object that stores itself within a tuple (and thus
infinitely asks for the repr of itself). This should only be
@ -256,52 +263,42 @@ tuplerepr(PyTupleObject *v)
return i > 0 ? PyUnicode_FromString("(...)") : NULL;
}
pieces = PyTuple_New(n);
if (pieces == NULL)
return NULL;
if (_PyAccu_Init(&acc))
goto error;
s = PyUnicode_FromString("(");
if (s == NULL || _PyAccu_Accumulate(&acc, s))
goto error;
Py_CLEAR(s);
/* Do repr() on each element. */
for (i = 0; i < n; ++i) {
if (Py_EnterRecursiveCall(" while getting the repr of a tuple"))
goto Done;
goto error;
s = PyObject_Repr(v->ob_item[i]);
Py_LeaveRecursiveCall();
if (s == NULL)
goto Done;
PyTuple_SET_ITEM(pieces, i, s);
if (i > 0 && _PyAccu_Accumulate(&acc, sep))
goto error;
if (s == NULL || _PyAccu_Accumulate(&acc, s))
goto error;
Py_CLEAR(s);
}
if (n > 1)
s = PyUnicode_FromString(")");
else
s = PyUnicode_FromString(",)");
if (s == NULL || _PyAccu_Accumulate(&acc, s))
goto error;
Py_CLEAR(s);
/* Add "()" decorations to the first and last items. */
assert(n > 0);
s = PyUnicode_FromString("(");
if (s == NULL)
goto Done;
temp = PyTuple_GET_ITEM(pieces, 0);
PyUnicode_AppendAndDel(&s, temp);
PyTuple_SET_ITEM(pieces, 0, s);
if (s == NULL)
goto Done;
s = PyUnicode_FromString(n == 1 ? ",)" : ")");
if (s == NULL)
goto Done;
temp = PyTuple_GET_ITEM(pieces, n-1);
PyUnicode_AppendAndDel(&temp, s);
PyTuple_SET_ITEM(pieces, n-1, temp);
if (temp == NULL)
goto Done;
/* Paste them all together with ", " between. */
s = PyUnicode_FromString(", ");
if (s == NULL)
goto Done;
result = PyUnicode_Join(s, pieces);
Py_DECREF(s);
Done:
Py_DECREF(pieces);
Py_ReprLeave((PyObject *)v);
return result;
return _PyAccu_Finish(&acc);
error:
_PyAccu_Destroy(&acc);
Py_XDECREF(s);
Py_ReprLeave((PyObject *)v);
return NULL;
}
/* The addend 82520, was selected from the range(0, 1000000) for