bpo-44032: Move data stack to thread from FrameObject. (GH-26076)

* Remove 'zombie' frames. We won't need them once we are allocating fixed-size frames.

* Add co_nlocalplus field to code object to avoid recomputing size of locals + frees + cells.

* Move locals, cells and freevars out of frame object into separate memory buffer.

* Use per-threadstate allocated memory chunks for local variables.

* Move globals and builtins from frame object to per-thread stack.

* Move (slow) locals frame object to per-thread stack.

* Move internal frame functions to internal header.
This commit is contained in:
Mark Shannon 2021-05-21 10:57:35 +01:00 committed by GitHub
parent be4dd7fcd9
commit b11a951f16
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 454 additions and 250 deletions

View file

@ -6,6 +6,7 @@
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
#include "frameobject.h" // PyFrameObject
#include "pycore_frame.h"
#include "opcode.h" // EXTENDED_ARG
#include "structmember.h" // PyMemberDef
@ -13,9 +14,6 @@
static PyMemberDef frame_memberlist[] = {
{"f_back", T_OBJECT, OFF(f_back), READONLY},
{"f_code", T_OBJECT, OFF(f_code), READONLY|PY_AUDIT_READ},
{"f_builtins", T_OBJECT, OFF(f_builtins), READONLY},
{"f_globals", T_OBJECT, OFF(f_globals), READONLY},
{"f_trace_lines", T_BOOL, OFF(f_trace_lines), 0},
{"f_trace_opcodes", T_BOOL, OFF(f_trace_opcodes), 0},
{NULL} /* Sentinel */
@ -34,8 +32,9 @@ frame_getlocals(PyFrameObject *f, void *closure)
{
if (PyFrame_FastToLocalsWithError(f) < 0)
return NULL;
Py_INCREF(f->f_locals);
return f->f_locals;
PyObject *locals = _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET];
Py_INCREF(locals);
return locals;
}
int
@ -71,6 +70,36 @@ frame_getlasti(PyFrameObject *f, void *closure)
return PyLong_FromLong(f->f_lasti*2);
}
static PyObject *
frame_getglobals(PyFrameObject *f, void *closure)
{
PyObject *globals = _PyFrame_GetGlobals(f);
if (globals == NULL) {
globals = Py_None;
}
Py_INCREF(globals);
return globals;
}
static PyObject *
frame_getbuiltins(PyFrameObject *f, void *closure)
{
PyObject *builtins = _PyFrame_GetBuiltins(f);
if (builtins == NULL) {
builtins = Py_None;
}
Py_INCREF(builtins);
return builtins;
}
static PyObject *
frame_getcode(PyFrameObject *f, void *closure)
{
if (PySys_Audit("object.__getattr__", "Os", f, "f_code") < 0) {
return NULL;
}
return (PyObject *)PyFrame_GetCode(f);
}
/* Given the index of the effective opcode,
scan back to construct the oparg with EXTENDED_ARG */
@ -554,50 +583,21 @@ static PyGetSetDef frame_getsetlist[] = {
(setter)frame_setlineno, NULL},
{"f_trace", (getter)frame_gettrace, (setter)frame_settrace, NULL},
{"f_lasti", (getter)frame_getlasti, NULL, NULL},
{"f_globals", (getter)frame_getglobals, NULL, NULL},
{"f_builtins", (getter)frame_getbuiltins, NULL, NULL},
{"f_code", (getter)frame_getcode, NULL, NULL},
{0}
};
/* Stack frames are allocated and deallocated at a considerable rate.
In an attempt to improve the speed of function calls, we:
1. Hold a single "zombie" frame on each code object. This retains
the allocated and initialised frame object from an invocation of
the code object. The zombie is reanimated the next time we need a
frame object for that code object. Doing this saves the malloc/
realloc required when using a free_list frame that isn't the
correct size. It also saves some field initialisation.
In zombie mode, no field of PyFrameObject holds a reference, but
the following fields are still valid:
* ob_type, ob_size, f_code, f_valuestack;
* f_locals, f_trace are NULL;
* f_localsplus does not require re-allocation and
the local variables in f_localsplus are NULL.
2. We also maintain a separate free list of stack frames (just like
floats are allocated in a special way -- see floatobject.c). When
a stack frame is on the free list, only the following members have
a meaning:
In an attempt to improve the speed of function calls, we maintain
a separate free list of stack frames (just like floats are
allocated in a special way -- see floatobject.c). When a stack
frame is on the free list, only the following members have a meaning:
ob_type == &Frametype
f_back next item on free list, or NULL
f_stacksize size of value stack
ob_size size of localsplus
Note that the value and block stacks are preserved -- this can save
another malloc() call or two (and two free() calls as well!).
Also note that, unlike for integers, each frame object is a
malloc'ed object in its own right -- it is only the actual calls to
malloc() that we are trying to save here, not the administration.
After all, while a typical program may make millions of calls, a
call depth of more than 20 or 30 is probably already exceptional
unless the program contains run-away recursion. I hope.
Later, PyFrame_MAXFREELIST was added to bound the # of frames saved on
free_list. Else programs creating lots of cyclic trash involving
frames could provoke free_list into growing without bound.
*/
/* max value for numfree */
#define PyFrame_MAXFREELIST 200
@ -609,42 +609,37 @@ frame_dealloc(PyFrameObject *f)
}
Py_TRASHCAN_SAFE_BEGIN(f)
/* Kill all local variables */
PyObject **valuestack = f->f_valuestack;
for (PyObject **p = f->f_localsplus; p < valuestack; p++) {
Py_CLEAR(*p);
}
PyCodeObject *co = f->f_code;
/* Free stack */
for (int i = 0; i < f->f_stackdepth; i++) {
Py_XDECREF(f->f_valuestack[i]);
/* Kill all local variables */
if (f->f_localsptr) {
for (int i = 0; i < co->co_nlocalsplus+FRAME_SPECIALS_SIZE; i++) {
Py_CLEAR(f->f_localsptr[i]);
}
/* Free items on stack */
for (int i = 0; i < f->f_stackdepth; i++) {
Py_XDECREF(f->f_valuestack[i]);
}
if (f->f_own_locals_memory) {
PyMem_Free(f->f_localsptr);
f->f_own_locals_memory = 0;
}
}
f->f_stackdepth = 0;
Py_XDECREF(f->f_back);
Py_DECREF(f->f_builtins);
Py_DECREF(f->f_globals);
Py_CLEAR(f->f_locals);
Py_CLEAR(f->f_trace);
PyCodeObject *co = f->f_code;
if (co->co_zombieframe == NULL) {
co->co_zombieframe = f;
struct _Py_frame_state *state = get_frame_state();
#ifdef Py_DEBUG
// frame_dealloc() must not be called after _PyFrame_Fini()
assert(state->numfree != -1);
#endif
if (state->numfree < PyFrame_MAXFREELIST) {
++state->numfree;
f->f_back = state->free_list;
state->free_list = f;
}
else {
struct _Py_frame_state *state = get_frame_state();
#ifdef Py_DEBUG
// frame_dealloc() must not be called after _PyFrame_Fini()
assert(state->numfree != -1);
#endif
if (state->numfree < PyFrame_MAXFREELIST) {
++state->numfree;
f->f_back = state->free_list;
state->free_list = f;
}
else {
PyObject_GC_Del(f);
}
PyObject_GC_Del(f);
}
Py_DECREF(co);
@ -654,24 +649,17 @@ frame_dealloc(PyFrameObject *f)
static inline Py_ssize_t
frame_nslots(PyFrameObject *frame)
{
PyCodeObject *code = frame->f_code;
return (code->co_nlocals
+ PyTuple_GET_SIZE(code->co_cellvars)
+ PyTuple_GET_SIZE(code->co_freevars));
return frame->f_valuestack - frame->f_localsptr;
}
static int
frame_traverse(PyFrameObject *f, visitproc visit, void *arg)
{
Py_VISIT(f->f_back);
Py_VISIT(f->f_code);
Py_VISIT(f->f_builtins);
Py_VISIT(f->f_globals);
Py_VISIT(f->f_locals);
Py_VISIT(f->f_trace);
/* locals */
PyObject **fastlocals = f->f_localsplus;
PyObject **fastlocals = f->f_localsptr;
for (Py_ssize_t i = frame_nslots(f); --i >= 0; ++fastlocals) {
Py_VISIT(*fastlocals);
}
@ -696,7 +684,7 @@ frame_tp_clear(PyFrameObject *f)
Py_CLEAR(f->f_trace);
/* locals */
PyObject **fastlocals = f->f_localsplus;
PyObject **fastlocals = f->f_localsptr;
for (Py_ssize_t i = frame_nslots(f); --i >= 0; ++fastlocals) {
Py_CLEAR(*fastlocals);
}
@ -731,15 +719,12 @@ PyDoc_STRVAR(clear__doc__,
static PyObject *
frame_sizeof(PyFrameObject *f, PyObject *Py_UNUSED(ignored))
{
Py_ssize_t res, extras, ncells, nfrees;
PyCodeObject *code = f->f_code;
ncells = PyTuple_GET_SIZE(code->co_cellvars);
nfrees = PyTuple_GET_SIZE(code->co_freevars);
extras = code->co_stacksize + code->co_nlocals + ncells + nfrees;
/* subtract one as it is already included in PyFrameObject */
res = sizeof(PyFrameObject) + (extras-1) * sizeof(PyObject *);
Py_ssize_t res;
res = sizeof(PyFrameObject);
if (f->f_own_locals_memory) {
PyCodeObject *code = f->f_code;
res += (code->co_nlocalsplus+code->co_stacksize) * sizeof(PyObject *);
}
return PyLong_FromSsize_t(res);
}
@ -802,24 +787,33 @@ PyTypeObject PyFrame_Type = {
_Py_IDENTIFIER(__builtins__);
static inline PyFrameObject*
frame_alloc(PyCodeObject *code)
frame_alloc(PyCodeObject *code, PyObject **localsarray)
{
PyFrameObject *f = code->co_zombieframe;
if (f != NULL) {
code->co_zombieframe = NULL;
_Py_NewReference((PyObject *)f);
assert(f->f_code == code);
return f;
int owns;
PyFrameObject *f;
if (localsarray == NULL) {
int size = code->co_nlocalsplus+code->co_stacksize + FRAME_SPECIALS_SIZE;
localsarray = PyMem_Malloc(sizeof(PyObject *)*size);
if (localsarray == NULL) {
PyErr_NoMemory();
return NULL;
}
for (Py_ssize_t i=0; i < code->co_nlocalsplus; i++) {
localsarray[i] = NULL;
}
owns = 1;
}
else {
owns = 0;
}
Py_ssize_t ncells = PyTuple_GET_SIZE(code->co_cellvars);
Py_ssize_t nfrees = PyTuple_GET_SIZE(code->co_freevars);
Py_ssize_t extras = code->co_stacksize + code->co_nlocals + ncells + nfrees;
struct _Py_frame_state *state = get_frame_state();
if (state->free_list == NULL)
{
f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type, extras);
f = PyObject_GC_New(PyFrameObject, &PyFrame_Type);
if (f == NULL) {
if (owns) {
PyMem_Free(localsarray);
}
return NULL;
}
}
@ -832,46 +826,60 @@ frame_alloc(PyCodeObject *code)
--state->numfree;
f = state->free_list;
state->free_list = state->free_list->f_back;
if (Py_SIZE(f) < extras) {
PyFrameObject *new_f = PyObject_GC_Resize(PyFrameObject, f, extras);
if (new_f == NULL) {
PyObject_GC_Del(f);
return NULL;
}
f = new_f;
}
_Py_NewReference((PyObject *)f);
}
extras = code->co_nlocals + ncells + nfrees;
f->f_valuestack = f->f_localsplus + extras;
for (Py_ssize_t i=0; i < extras; i++) {
f->f_localsplus[i] = NULL;
}
f->f_localsptr = localsarray;
f->f_own_locals_memory = owns;
return f;
}
int
_PyFrame_TakeLocals(PyFrameObject *f)
{
assert(f->f_own_locals_memory == 0);
assert(f->f_stackdepth == 0);
int size = frame_nslots(f);
PyObject **copy = PyMem_Malloc(sizeof(PyObject *)*size);
if (copy == NULL) {
for (int i = 0; i < size; i++) {
PyObject *o = f->f_localsptr[i];
Py_XDECREF(o);
}
PyErr_NoMemory();
return -1;
}
for (int i = 0; i < size; i++) {
PyObject *o = f->f_localsptr[i];
copy[i] = o;
}
f->f_own_locals_memory = 1;
f->f_localsptr = copy;
f->f_valuestack = copy + size;
return 0;
}
PyFrameObject* _Py_HOT_FUNCTION
_PyFrame_New_NoTrack(PyThreadState *tstate, PyFrameConstructor *con, PyObject *locals)
_PyFrame_New_NoTrack(PyThreadState *tstate, PyFrameConstructor *con, PyObject *locals, PyObject **localsarray)
{
assert(con != NULL);
assert(con->fc_globals != NULL);
assert(con->fc_builtins != NULL);
assert(con->fc_code != NULL);
assert(locals == NULL || PyMapping_Check(locals));
PyCodeObject *code = (PyCodeObject *)con->fc_code;
PyFrameObject *f = frame_alloc((PyCodeObject *)con->fc_code);
PyFrameObject *f = frame_alloc(code, localsarray);
if (f == NULL) {
return NULL;
}
PyObject **specials = f->f_localsptr + code->co_nlocalsplus;
f->f_valuestack = specials + FRAME_SPECIALS_SIZE;
f->f_back = (PyFrameObject*)Py_XNewRef(tstate->frame);
f->f_code = (PyCodeObject *)Py_NewRef(con->fc_code);
f->f_builtins = Py_NewRef(con->fc_builtins);
f->f_globals = Py_NewRef(con->fc_globals);
f->f_locals = Py_XNewRef(locals);
// f_valuestack initialized by frame_alloc()
specials[FRAME_SPECIALS_BUILTINS_OFFSET] = Py_NewRef(con->fc_builtins);
specials[FRAME_SPECIALS_GLOBALS_OFFSET] = Py_NewRef(con->fc_globals);
specials[FRAME_SPECIALS_LOCALS_OFFSET] = Py_XNewRef(locals);
f->f_trace = NULL;
f->f_stackdepth = 0;
f->f_trace_lines = 1;
@ -880,7 +888,6 @@ _PyFrame_New_NoTrack(PyThreadState *tstate, PyFrameConstructor *con, PyObject *l
f->f_lasti = -1;
f->f_lineno = 0;
f->f_state = FRAME_CREATED;
// f_blockstack and f_localsplus initialized by frame_alloc()
return f;
}
@ -903,7 +910,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code,
.fc_kwdefaults = NULL,
.fc_closure = NULL
};
PyFrameObject *f = _PyFrame_New_NoTrack(tstate, &desc, locals);
PyFrameObject *f = _PyFrame_New_NoTrack(tstate, &desc, locals, NULL);
if (f) {
_PyObject_GC_TRACK(f);
}
@ -1022,9 +1029,9 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f)
PyErr_BadInternalCall();
return -1;
}
locals = f->f_locals;
locals = _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET];
if (locals == NULL) {
locals = f->f_locals = PyDict_New();
locals = _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET] = PyDict_New();
if (locals == NULL)
return -1;
}
@ -1036,7 +1043,7 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f)
Py_TYPE(map)->tp_name);
return -1;
}
fast = f->f_localsplus;
fast = f->f_localsptr;
j = PyTuple_GET_SIZE(map);
if (j > co->co_nlocals)
j = co->co_nlocals;
@ -1083,7 +1090,7 @@ PyFrame_FastToLocals(PyFrameObject *f)
void
PyFrame_LocalsToFast(PyFrameObject *f, int clear)
{
/* Merge f->f_locals into fast locals */
/* Merge locals into fast locals */
PyObject *locals, *map;
PyObject **fast;
PyObject *error_type, *error_value, *error_traceback;
@ -1092,7 +1099,7 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
Py_ssize_t ncells, nfreevars;
if (f == NULL)
return;
locals = f->f_locals;
locals = _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET];
co = f->f_code;
map = co->co_varnames;
if (locals == NULL)
@ -1100,7 +1107,7 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
if (!PyTuple_Check(map))
return;
PyErr_Fetch(&error_type, &error_value, &error_traceback);
fast = f->f_localsplus;
fast = f->f_localsptr;
j = PyTuple_GET_SIZE(map);
if (j > co->co_nlocals)
j = co->co_nlocals;