mirror of
https://github.com/python/cpython.git
synced 2025-08-22 17:55:18 +00:00
bpo-44032: Move data stack to thread from FrameObject. (GH-26076)
* Remove 'zombie' frames. We won't need them once we are allocating fixed-size frames. * Add co_nlocalplus field to code object to avoid recomputing size of locals + frees + cells. * Move locals, cells and freevars out of frame object into separate memory buffer. * Use per-threadstate allocated memory chunks for local variables. * Move globals and builtins from frame object to per-thread stack. * Move (slow) locals frame object to per-thread stack. * Move internal frame functions to internal header.
This commit is contained in:
parent
be4dd7fcd9
commit
b11a951f16
20 changed files with 454 additions and 250 deletions
|
@ -6,6 +6,7 @@
|
|||
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
|
||||
|
||||
#include "frameobject.h" // PyFrameObject
|
||||
#include "pycore_frame.h"
|
||||
#include "opcode.h" // EXTENDED_ARG
|
||||
#include "structmember.h" // PyMemberDef
|
||||
|
||||
|
@ -13,9 +14,6 @@
|
|||
|
||||
static PyMemberDef frame_memberlist[] = {
|
||||
{"f_back", T_OBJECT, OFF(f_back), READONLY},
|
||||
{"f_code", T_OBJECT, OFF(f_code), READONLY|PY_AUDIT_READ},
|
||||
{"f_builtins", T_OBJECT, OFF(f_builtins), READONLY},
|
||||
{"f_globals", T_OBJECT, OFF(f_globals), READONLY},
|
||||
{"f_trace_lines", T_BOOL, OFF(f_trace_lines), 0},
|
||||
{"f_trace_opcodes", T_BOOL, OFF(f_trace_opcodes), 0},
|
||||
{NULL} /* Sentinel */
|
||||
|
@ -34,8 +32,9 @@ frame_getlocals(PyFrameObject *f, void *closure)
|
|||
{
|
||||
if (PyFrame_FastToLocalsWithError(f) < 0)
|
||||
return NULL;
|
||||
Py_INCREF(f->f_locals);
|
||||
return f->f_locals;
|
||||
PyObject *locals = _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET];
|
||||
Py_INCREF(locals);
|
||||
return locals;
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -71,6 +70,36 @@ frame_getlasti(PyFrameObject *f, void *closure)
|
|||
return PyLong_FromLong(f->f_lasti*2);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
frame_getglobals(PyFrameObject *f, void *closure)
|
||||
{
|
||||
PyObject *globals = _PyFrame_GetGlobals(f);
|
||||
if (globals == NULL) {
|
||||
globals = Py_None;
|
||||
}
|
||||
Py_INCREF(globals);
|
||||
return globals;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
frame_getbuiltins(PyFrameObject *f, void *closure)
|
||||
{
|
||||
PyObject *builtins = _PyFrame_GetBuiltins(f);
|
||||
if (builtins == NULL) {
|
||||
builtins = Py_None;
|
||||
}
|
||||
Py_INCREF(builtins);
|
||||
return builtins;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
frame_getcode(PyFrameObject *f, void *closure)
|
||||
{
|
||||
if (PySys_Audit("object.__getattr__", "Os", f, "f_code") < 0) {
|
||||
return NULL;
|
||||
}
|
||||
return (PyObject *)PyFrame_GetCode(f);
|
||||
}
|
||||
|
||||
/* Given the index of the effective opcode,
|
||||
scan back to construct the oparg with EXTENDED_ARG */
|
||||
|
@ -554,50 +583,21 @@ static PyGetSetDef frame_getsetlist[] = {
|
|||
(setter)frame_setlineno, NULL},
|
||||
{"f_trace", (getter)frame_gettrace, (setter)frame_settrace, NULL},
|
||||
{"f_lasti", (getter)frame_getlasti, NULL, NULL},
|
||||
{"f_globals", (getter)frame_getglobals, NULL, NULL},
|
||||
{"f_builtins", (getter)frame_getbuiltins, NULL, NULL},
|
||||
{"f_code", (getter)frame_getcode, NULL, NULL},
|
||||
{0}
|
||||
};
|
||||
|
||||
/* Stack frames are allocated and deallocated at a considerable rate.
|
||||
In an attempt to improve the speed of function calls, we:
|
||||
|
||||
1. Hold a single "zombie" frame on each code object. This retains
|
||||
the allocated and initialised frame object from an invocation of
|
||||
the code object. The zombie is reanimated the next time we need a
|
||||
frame object for that code object. Doing this saves the malloc/
|
||||
realloc required when using a free_list frame that isn't the
|
||||
correct size. It also saves some field initialisation.
|
||||
|
||||
In zombie mode, no field of PyFrameObject holds a reference, but
|
||||
the following fields are still valid:
|
||||
|
||||
* ob_type, ob_size, f_code, f_valuestack;
|
||||
|
||||
* f_locals, f_trace are NULL;
|
||||
|
||||
* f_localsplus does not require re-allocation and
|
||||
the local variables in f_localsplus are NULL.
|
||||
|
||||
2. We also maintain a separate free list of stack frames (just like
|
||||
floats are allocated in a special way -- see floatobject.c). When
|
||||
a stack frame is on the free list, only the following members have
|
||||
a meaning:
|
||||
In an attempt to improve the speed of function calls, we maintain
|
||||
a separate free list of stack frames (just like floats are
|
||||
allocated in a special way -- see floatobject.c). When a stack
|
||||
frame is on the free list, only the following members have a meaning:
|
||||
ob_type == &Frametype
|
||||
f_back next item on free list, or NULL
|
||||
f_stacksize size of value stack
|
||||
ob_size size of localsplus
|
||||
Note that the value and block stacks are preserved -- this can save
|
||||
another malloc() call or two (and two free() calls as well!).
|
||||
Also note that, unlike for integers, each frame object is a
|
||||
malloc'ed object in its own right -- it is only the actual calls to
|
||||
malloc() that we are trying to save here, not the administration.
|
||||
After all, while a typical program may make millions of calls, a
|
||||
call depth of more than 20 or 30 is probably already exceptional
|
||||
unless the program contains run-away recursion. I hope.
|
||||
|
||||
Later, PyFrame_MAXFREELIST was added to bound the # of frames saved on
|
||||
free_list. Else programs creating lots of cyclic trash involving
|
||||
frames could provoke free_list into growing without bound.
|
||||
*/
|
||||
|
||||
/* max value for numfree */
|
||||
#define PyFrame_MAXFREELIST 200
|
||||
|
||||
|
@ -609,42 +609,37 @@ frame_dealloc(PyFrameObject *f)
|
|||
}
|
||||
|
||||
Py_TRASHCAN_SAFE_BEGIN(f)
|
||||
/* Kill all local variables */
|
||||
PyObject **valuestack = f->f_valuestack;
|
||||
for (PyObject **p = f->f_localsplus; p < valuestack; p++) {
|
||||
Py_CLEAR(*p);
|
||||
}
|
||||
PyCodeObject *co = f->f_code;
|
||||
|
||||
/* Free stack */
|
||||
for (int i = 0; i < f->f_stackdepth; i++) {
|
||||
Py_XDECREF(f->f_valuestack[i]);
|
||||
/* Kill all local variables */
|
||||
if (f->f_localsptr) {
|
||||
for (int i = 0; i < co->co_nlocalsplus+FRAME_SPECIALS_SIZE; i++) {
|
||||
Py_CLEAR(f->f_localsptr[i]);
|
||||
}
|
||||
/* Free items on stack */
|
||||
for (int i = 0; i < f->f_stackdepth; i++) {
|
||||
Py_XDECREF(f->f_valuestack[i]);
|
||||
}
|
||||
if (f->f_own_locals_memory) {
|
||||
PyMem_Free(f->f_localsptr);
|
||||
f->f_own_locals_memory = 0;
|
||||
}
|
||||
}
|
||||
f->f_stackdepth = 0;
|
||||
|
||||
Py_XDECREF(f->f_back);
|
||||
Py_DECREF(f->f_builtins);
|
||||
Py_DECREF(f->f_globals);
|
||||
Py_CLEAR(f->f_locals);
|
||||
Py_CLEAR(f->f_trace);
|
||||
|
||||
PyCodeObject *co = f->f_code;
|
||||
if (co->co_zombieframe == NULL) {
|
||||
co->co_zombieframe = f;
|
||||
struct _Py_frame_state *state = get_frame_state();
|
||||
#ifdef Py_DEBUG
|
||||
// frame_dealloc() must not be called after _PyFrame_Fini()
|
||||
assert(state->numfree != -1);
|
||||
#endif
|
||||
if (state->numfree < PyFrame_MAXFREELIST) {
|
||||
++state->numfree;
|
||||
f->f_back = state->free_list;
|
||||
state->free_list = f;
|
||||
}
|
||||
else {
|
||||
struct _Py_frame_state *state = get_frame_state();
|
||||
#ifdef Py_DEBUG
|
||||
// frame_dealloc() must not be called after _PyFrame_Fini()
|
||||
assert(state->numfree != -1);
|
||||
#endif
|
||||
if (state->numfree < PyFrame_MAXFREELIST) {
|
||||
++state->numfree;
|
||||
f->f_back = state->free_list;
|
||||
state->free_list = f;
|
||||
}
|
||||
else {
|
||||
PyObject_GC_Del(f);
|
||||
}
|
||||
PyObject_GC_Del(f);
|
||||
}
|
||||
|
||||
Py_DECREF(co);
|
||||
|
@ -654,24 +649,17 @@ frame_dealloc(PyFrameObject *f)
|
|||
static inline Py_ssize_t
|
||||
frame_nslots(PyFrameObject *frame)
|
||||
{
|
||||
PyCodeObject *code = frame->f_code;
|
||||
return (code->co_nlocals
|
||||
+ PyTuple_GET_SIZE(code->co_cellvars)
|
||||
+ PyTuple_GET_SIZE(code->co_freevars));
|
||||
return frame->f_valuestack - frame->f_localsptr;
|
||||
}
|
||||
|
||||
static int
|
||||
frame_traverse(PyFrameObject *f, visitproc visit, void *arg)
|
||||
{
|
||||
Py_VISIT(f->f_back);
|
||||
Py_VISIT(f->f_code);
|
||||
Py_VISIT(f->f_builtins);
|
||||
Py_VISIT(f->f_globals);
|
||||
Py_VISIT(f->f_locals);
|
||||
Py_VISIT(f->f_trace);
|
||||
|
||||
/* locals */
|
||||
PyObject **fastlocals = f->f_localsplus;
|
||||
PyObject **fastlocals = f->f_localsptr;
|
||||
for (Py_ssize_t i = frame_nslots(f); --i >= 0; ++fastlocals) {
|
||||
Py_VISIT(*fastlocals);
|
||||
}
|
||||
|
@ -696,7 +684,7 @@ frame_tp_clear(PyFrameObject *f)
|
|||
Py_CLEAR(f->f_trace);
|
||||
|
||||
/* locals */
|
||||
PyObject **fastlocals = f->f_localsplus;
|
||||
PyObject **fastlocals = f->f_localsptr;
|
||||
for (Py_ssize_t i = frame_nslots(f); --i >= 0; ++fastlocals) {
|
||||
Py_CLEAR(*fastlocals);
|
||||
}
|
||||
|
@ -731,15 +719,12 @@ PyDoc_STRVAR(clear__doc__,
|
|||
static PyObject *
|
||||
frame_sizeof(PyFrameObject *f, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
Py_ssize_t res, extras, ncells, nfrees;
|
||||
|
||||
PyCodeObject *code = f->f_code;
|
||||
ncells = PyTuple_GET_SIZE(code->co_cellvars);
|
||||
nfrees = PyTuple_GET_SIZE(code->co_freevars);
|
||||
extras = code->co_stacksize + code->co_nlocals + ncells + nfrees;
|
||||
/* subtract one as it is already included in PyFrameObject */
|
||||
res = sizeof(PyFrameObject) + (extras-1) * sizeof(PyObject *);
|
||||
|
||||
Py_ssize_t res;
|
||||
res = sizeof(PyFrameObject);
|
||||
if (f->f_own_locals_memory) {
|
||||
PyCodeObject *code = f->f_code;
|
||||
res += (code->co_nlocalsplus+code->co_stacksize) * sizeof(PyObject *);
|
||||
}
|
||||
return PyLong_FromSsize_t(res);
|
||||
}
|
||||
|
||||
|
@ -802,24 +787,33 @@ PyTypeObject PyFrame_Type = {
|
|||
_Py_IDENTIFIER(__builtins__);
|
||||
|
||||
static inline PyFrameObject*
|
||||
frame_alloc(PyCodeObject *code)
|
||||
frame_alloc(PyCodeObject *code, PyObject **localsarray)
|
||||
{
|
||||
PyFrameObject *f = code->co_zombieframe;
|
||||
if (f != NULL) {
|
||||
code->co_zombieframe = NULL;
|
||||
_Py_NewReference((PyObject *)f);
|
||||
assert(f->f_code == code);
|
||||
return f;
|
||||
int owns;
|
||||
PyFrameObject *f;
|
||||
if (localsarray == NULL) {
|
||||
int size = code->co_nlocalsplus+code->co_stacksize + FRAME_SPECIALS_SIZE;
|
||||
localsarray = PyMem_Malloc(sizeof(PyObject *)*size);
|
||||
if (localsarray == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
for (Py_ssize_t i=0; i < code->co_nlocalsplus; i++) {
|
||||
localsarray[i] = NULL;
|
||||
}
|
||||
owns = 1;
|
||||
}
|
||||
else {
|
||||
owns = 0;
|
||||
}
|
||||
|
||||
Py_ssize_t ncells = PyTuple_GET_SIZE(code->co_cellvars);
|
||||
Py_ssize_t nfrees = PyTuple_GET_SIZE(code->co_freevars);
|
||||
Py_ssize_t extras = code->co_stacksize + code->co_nlocals + ncells + nfrees;
|
||||
struct _Py_frame_state *state = get_frame_state();
|
||||
if (state->free_list == NULL)
|
||||
{
|
||||
f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type, extras);
|
||||
f = PyObject_GC_New(PyFrameObject, &PyFrame_Type);
|
||||
if (f == NULL) {
|
||||
if (owns) {
|
||||
PyMem_Free(localsarray);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
@ -832,46 +826,60 @@ frame_alloc(PyCodeObject *code)
|
|||
--state->numfree;
|
||||
f = state->free_list;
|
||||
state->free_list = state->free_list->f_back;
|
||||
if (Py_SIZE(f) < extras) {
|
||||
PyFrameObject *new_f = PyObject_GC_Resize(PyFrameObject, f, extras);
|
||||
if (new_f == NULL) {
|
||||
PyObject_GC_Del(f);
|
||||
return NULL;
|
||||
}
|
||||
f = new_f;
|
||||
}
|
||||
_Py_NewReference((PyObject *)f);
|
||||
}
|
||||
|
||||
extras = code->co_nlocals + ncells + nfrees;
|
||||
f->f_valuestack = f->f_localsplus + extras;
|
||||
for (Py_ssize_t i=0; i < extras; i++) {
|
||||
f->f_localsplus[i] = NULL;
|
||||
}
|
||||
f->f_localsptr = localsarray;
|
||||
f->f_own_locals_memory = owns;
|
||||
return f;
|
||||
}
|
||||
|
||||
int
|
||||
_PyFrame_TakeLocals(PyFrameObject *f)
|
||||
{
|
||||
assert(f->f_own_locals_memory == 0);
|
||||
assert(f->f_stackdepth == 0);
|
||||
int size = frame_nslots(f);
|
||||
PyObject **copy = PyMem_Malloc(sizeof(PyObject *)*size);
|
||||
if (copy == NULL) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
PyObject *o = f->f_localsptr[i];
|
||||
Py_XDECREF(o);
|
||||
}
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
for (int i = 0; i < size; i++) {
|
||||
PyObject *o = f->f_localsptr[i];
|
||||
copy[i] = o;
|
||||
}
|
||||
f->f_own_locals_memory = 1;
|
||||
f->f_localsptr = copy;
|
||||
f->f_valuestack = copy + size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyFrameObject* _Py_HOT_FUNCTION
|
||||
_PyFrame_New_NoTrack(PyThreadState *tstate, PyFrameConstructor *con, PyObject *locals)
|
||||
_PyFrame_New_NoTrack(PyThreadState *tstate, PyFrameConstructor *con, PyObject *locals, PyObject **localsarray)
|
||||
{
|
||||
assert(con != NULL);
|
||||
assert(con->fc_globals != NULL);
|
||||
assert(con->fc_builtins != NULL);
|
||||
assert(con->fc_code != NULL);
|
||||
assert(locals == NULL || PyMapping_Check(locals));
|
||||
PyCodeObject *code = (PyCodeObject *)con->fc_code;
|
||||
|
||||
PyFrameObject *f = frame_alloc((PyCodeObject *)con->fc_code);
|
||||
PyFrameObject *f = frame_alloc(code, localsarray);
|
||||
if (f == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject **specials = f->f_localsptr + code->co_nlocalsplus;
|
||||
f->f_valuestack = specials + FRAME_SPECIALS_SIZE;
|
||||
f->f_back = (PyFrameObject*)Py_XNewRef(tstate->frame);
|
||||
f->f_code = (PyCodeObject *)Py_NewRef(con->fc_code);
|
||||
f->f_builtins = Py_NewRef(con->fc_builtins);
|
||||
f->f_globals = Py_NewRef(con->fc_globals);
|
||||
f->f_locals = Py_XNewRef(locals);
|
||||
// f_valuestack initialized by frame_alloc()
|
||||
specials[FRAME_SPECIALS_BUILTINS_OFFSET] = Py_NewRef(con->fc_builtins);
|
||||
specials[FRAME_SPECIALS_GLOBALS_OFFSET] = Py_NewRef(con->fc_globals);
|
||||
specials[FRAME_SPECIALS_LOCALS_OFFSET] = Py_XNewRef(locals);
|
||||
f->f_trace = NULL;
|
||||
f->f_stackdepth = 0;
|
||||
f->f_trace_lines = 1;
|
||||
|
@ -880,7 +888,6 @@ _PyFrame_New_NoTrack(PyThreadState *tstate, PyFrameConstructor *con, PyObject *l
|
|||
f->f_lasti = -1;
|
||||
f->f_lineno = 0;
|
||||
f->f_state = FRAME_CREATED;
|
||||
// f_blockstack and f_localsplus initialized by frame_alloc()
|
||||
return f;
|
||||
}
|
||||
|
||||
|
@ -903,7 +910,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code,
|
|||
.fc_kwdefaults = NULL,
|
||||
.fc_closure = NULL
|
||||
};
|
||||
PyFrameObject *f = _PyFrame_New_NoTrack(tstate, &desc, locals);
|
||||
PyFrameObject *f = _PyFrame_New_NoTrack(tstate, &desc, locals, NULL);
|
||||
if (f) {
|
||||
_PyObject_GC_TRACK(f);
|
||||
}
|
||||
|
@ -1022,9 +1029,9 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f)
|
|||
PyErr_BadInternalCall();
|
||||
return -1;
|
||||
}
|
||||
locals = f->f_locals;
|
||||
locals = _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET];
|
||||
if (locals == NULL) {
|
||||
locals = f->f_locals = PyDict_New();
|
||||
locals = _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET] = PyDict_New();
|
||||
if (locals == NULL)
|
||||
return -1;
|
||||
}
|
||||
|
@ -1036,7 +1043,7 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f)
|
|||
Py_TYPE(map)->tp_name);
|
||||
return -1;
|
||||
}
|
||||
fast = f->f_localsplus;
|
||||
fast = f->f_localsptr;
|
||||
j = PyTuple_GET_SIZE(map);
|
||||
if (j > co->co_nlocals)
|
||||
j = co->co_nlocals;
|
||||
|
@ -1083,7 +1090,7 @@ PyFrame_FastToLocals(PyFrameObject *f)
|
|||
void
|
||||
PyFrame_LocalsToFast(PyFrameObject *f, int clear)
|
||||
{
|
||||
/* Merge f->f_locals into fast locals */
|
||||
/* Merge locals into fast locals */
|
||||
PyObject *locals, *map;
|
||||
PyObject **fast;
|
||||
PyObject *error_type, *error_value, *error_traceback;
|
||||
|
@ -1092,7 +1099,7 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
|
|||
Py_ssize_t ncells, nfreevars;
|
||||
if (f == NULL)
|
||||
return;
|
||||
locals = f->f_locals;
|
||||
locals = _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET];
|
||||
co = f->f_code;
|
||||
map = co->co_varnames;
|
||||
if (locals == NULL)
|
||||
|
@ -1100,7 +1107,7 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
|
|||
if (!PyTuple_Check(map))
|
||||
return;
|
||||
PyErr_Fetch(&error_type, &error_value, &error_traceback);
|
||||
fast = f->f_localsplus;
|
||||
fast = f->f_localsptr;
|
||||
j = PyTuple_GET_SIZE(map);
|
||||
if (j > co->co_nlocals)
|
||||
j = co->co_nlocals;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue