bpo-43693: Eliminate unused "fast locals". (gh-26587)

Currently, if an arg value escapes (into the closure for an inner function) we end up allocating two indices in the fast locals even though only one gets used.  Additionally, using the lower index would be better in some cases, such as with no-arg `super()`.  To address this, we update the compiler to fix the offsets so each variable only gets one "fast local".  As a consequence, now some cell offsets are interspersed with the locals (only when an arg escapes to an inner function).

https://bugs.python.org/issue43693
This commit is contained in:
Eric Snow 2021-06-15 16:35:25 -06:00 committed by GitHub
parent 1d10bf0bb9
commit ac38a9f2df
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 4189 additions and 4214 deletions

View file

@ -162,43 +162,28 @@ _Py_set_localsplus_info(int offset, PyObject *name, _PyLocalsPlusKind kind,
Py_INCREF(name);
PyTuple_SET_ITEM(names, offset, name);
kinds[offset] = kind;
if (kind == CO_FAST_CELL) {
// Cells can overlap with args, so mark those cases.
int nlocalsplus = (int)PyTuple_GET_SIZE(names);
for (int i = 0; i < nlocalsplus; i++) {
_PyLocalsPlusKind kind = kinds[i];
if (kind && !(kind & CO_FAST_LOCAL)) {
// We've moved past the locals.
break;
}
PyObject *varname = PyTuple_GET_ITEM(names, i);
int cmp = PyUnicode_Compare(name, varname);
if (cmp == 0) {
kinds[i] |= CO_FAST_CELL;
break;
}
assert(cmp > 0 || !PyErr_Occurred());
}
}
}
static void
get_localsplus_counts(PyObject *names, _PyLocalsPlusKinds kinds,
int *pnlocals, int *pncellvars,
int *pnlocals, int *pnplaincellvars, int *pncellvars,
int *pnfreevars)
{
int nlocals = 0;
int nplaincellvars = 0;
int ncellvars = 0;
int nfreevars = 0;
int nlocalsplus = Py_SAFE_DOWNCAST(PyTuple_GET_SIZE(names),
Py_ssize_t, int);
Py_ssize_t nlocalsplus = PyTuple_GET_SIZE(names);
for (int i = 0; i < nlocalsplus; i++) {
if (kinds[i] & CO_FAST_LOCAL) {
nlocals += 1;
if (kinds[i] & CO_FAST_CELL) {
ncellvars += 1;
}
}
else if (kinds[i] & CO_FAST_CELL) {
ncellvars += 1;
nplaincellvars += 1;
}
else if (kinds[i] & CO_FAST_FREE) {
nfreevars += 1;
@ -207,6 +192,9 @@ get_localsplus_counts(PyObject *names, _PyLocalsPlusKinds kinds,
if (pnlocals != NULL) {
*pnlocals = nlocals;
}
if (pnplaincellvars != NULL) {
*pnplaincellvars = nplaincellvars;
}
if (pncellvars != NULL) {
*pncellvars = ncellvars;
}
@ -227,10 +215,6 @@ get_localsplus_names(PyCodeObject *co, _PyLocalsPlusKind kind, int num)
if ((co->co_localspluskinds[offset] & kind) == 0) {
continue;
}
// For now there may be duplicates, which we ignore.
if (kind == CO_FAST_CELL && co->co_localspluskinds[offset] != kind) {
continue;
}
assert(index < num);
PyObject *name = PyTuple_GET_ITEM(co->co_localsplusnames, offset);
Py_INCREF(name);
@ -283,7 +267,7 @@ _PyCode_Validate(struct _PyCodeConstructor *con)
* here to avoid the possibility of overflow (however remote). */
int nlocals;
get_localsplus_counts(con->localsplusnames, con->localspluskinds,
&nlocals, NULL, NULL);
&nlocals, NULL, NULL, NULL);
int nplainlocals = nlocals -
con->argcount -
con->kwonlyargcount -
@ -301,9 +285,9 @@ static void
init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
{
int nlocalsplus = (int)PyTuple_GET_SIZE(con->localsplusnames);
int nlocals, ncellvars, nfreevars;
int nlocals, nplaincellvars, ncellvars, nfreevars;
get_localsplus_counts(con->localsplusnames, con->localspluskinds,
&nlocals, &ncellvars, &nfreevars);
&nlocals, &nplaincellvars, &ncellvars, &nfreevars);
Py_INCREF(con->filename);
co->co_filename = con->filename;
@ -338,9 +322,9 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
co->co_exceptiontable = con->exceptiontable;
/* derived values */
co->co_cell2arg = NULL; // This will be set soon.
co->co_nlocalsplus = nlocalsplus;
co->co_nlocals = nlocals;
co->co_nplaincellvars = nplaincellvars;
co->co_ncellvars = ncellvars;
co->co_nfreevars = nfreevars;
co->co_varnames = NULL;
@ -392,44 +376,6 @@ _PyCode_New(struct _PyCodeConstructor *con)
co->co_flags &= ~CO_NOFREE;
}
/* Create mapping between cells and arguments if needed. */
if (co->co_ncellvars) {
int totalargs = co->co_argcount +
co->co_kwonlyargcount +
((co->co_flags & CO_VARARGS) != 0) +
((co->co_flags & CO_VARKEYWORDS) != 0);
assert(totalargs <= co->co_nlocals);
/* Find cells which are also arguments. */
for (int i = 0; i < co->co_ncellvars; i++) {
PyObject *cellname = PyTuple_GET_ITEM(co->co_localsplusnames,
i + co->co_nlocals);
for (int j = 0; j < totalargs; j++) {
PyObject *argname = PyTuple_GET_ITEM(co->co_localsplusnames, j);
int cmp = PyUnicode_Compare(cellname, argname);
if (cmp == -1 && PyErr_Occurred()) {
Py_DECREF(co);
return NULL;
}
if (cmp == 0) {
if (co->co_cell2arg == NULL) {
co->co_cell2arg = PyMem_NEW(int, co->co_ncellvars);
if (co->co_cell2arg == NULL) {
Py_DECREF(co);
PyErr_NoMemory();
return NULL;
}
for (int k = 0; k < co->co_ncellvars; k++) {
co->co_cell2arg[k] = CO_CELL_NOT_AN_ARG;
}
}
co->co_cell2arg[i] = j;
// Go to the next cell name.
break;
}
}
}
}
return co;
}
@ -478,6 +424,23 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
}
for (int i = 0; i < ncellvars; i++, offset++) {
PyObject *name = PyTuple_GET_ITEM(cellvars, i);
int argoffset = -1;
for (int j = 0; j < nvarnames; j++) {
int cmp = PyUnicode_Compare(PyTuple_GET_ITEM(varnames, j),
name);
assert(!PyErr_Occurred());
if (cmp == 0) {
argoffset = j;
break;
}
}
if (argoffset >= 0) {
// Merge the localsplus indices.
nlocalsplus -= 1;
offset -= 1;
localspluskinds[argoffset] |= CO_FAST_CELL;
continue;
}
_Py_set_localsplus_info(offset, name, CO_FAST_CELL,
localsplusnames, localspluskinds);
}
@ -486,6 +449,11 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
_Py_set_localsplus_info(offset, name, CO_FAST_FREE,
localsplusnames, localspluskinds);
}
// If any cells were args then nlocalsplus will have shrunk.
// We don't bother resizing localspluskinds.
if (_PyTuple_Resize(&localsplusnames, nlocalsplus) < 0) {
goto error;
}
struct _PyCodeConstructor con = {
.filename = filename,
@ -1182,8 +1150,6 @@ code_dealloc(PyCodeObject *co)
Py_XDECREF(co->co_name);
Py_XDECREF(co->co_linetable);
Py_XDECREF(co->co_exceptiontable);
if (co->co_cell2arg != NULL)
PyMem_Free(co->co_cell2arg);
if (co->co_weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject*)co);
if (co->co_quickened) {
@ -1377,10 +1343,6 @@ code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args))
(co_extra->ce_size-1) * sizeof(co_extra->ce_extras[0]);
}
if (co->co_cell2arg != NULL && co->co_cellvars != NULL) {
res += co->co_ncellvars * sizeof(Py_ssize_t);
}
if (co->co_quickened != NULL) {
Py_ssize_t count = co->co_quickened[0].entry.zero.cache_count;
count += (PyBytes_GET_SIZE(co->co_code)+sizeof(SpecializedCacheEntry)-1)/

View file

@ -918,7 +918,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code,
return f;
}
int
static int
_PyFrame_OpAlreadyRan(PyFrameObject *f, int opcode, int oparg)
{
const _Py_CODEUNIT *code =
@ -966,26 +966,9 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f)
continue;
}
/* Some args are also cells. For now each of those variables
has two indices in the fast array, with both marked as cells
but only one marked as an arg. That one is always set
to NULL in _PyEval_MakeFrameVector() and the other index
gets the cell holding the arg value. So we ignore the
former here and will later use the cell for the variable.
*/
if (kind & CO_FAST_LOCAL && kind & CO_FAST_CELL) {
continue;
}
PyObject *name = PyTuple_GET_ITEM(co->co_localsplusnames, i);
PyObject *value = fast[i];
if (f->f_state != FRAME_CLEARED) {
int cellargoffset = CO_CELL_NOT_AN_ARG;
if (kind & CO_FAST_CELL && co->co_cell2arg != NULL) {
assert(i - co->co_nlocals >= 0);
assert(i - co->co_nlocals < co->co_ncellvars);
cellargoffset = co->co_cell2arg[i - co->co_nlocals];
}
if (kind & CO_FAST_FREE) {
// The cell was set by _PyEval_MakeFrameVector() from
// the function's closure.
@ -1003,20 +986,10 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f)
// (likely) MAKE_CELL must have executed already.
value = PyCell_GET(value);
}
// (unlikely) Otherwise it must be an initial value set
// by an earlier call to PyFrame_FastToLocals().
}
else {
// (unlikely) MAKE_CELL hasn't executed yet.
if (cellargoffset != CO_CELL_NOT_AN_ARG) {
// It is an arg that escapes into an inner
// function so we use the initial value that
// was already set by _PyEval_MakeFrameVector().
// Normally the arg value would always be set.
// However, it can be NULL if it was deleted via
// PyFrame_LocalsToFast().
value = fast[cellargoffset];
}
// (likely) Otherwise it it is an arg (kind & CO_FAST_LOCAL),
// with the initial value set by _PyEval_MakeFrameVector()...
// (unlikely) ...or it was set to some initial value by
// an earlier call to PyFrame_LocalsToFast().
}
}
}
@ -1079,10 +1052,6 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
if (kind & CO_FAST_FREE && !(co->co_flags & CO_OPTIMIZED)) {
continue;
}
/* Same test as in PyFrame_FastToLocals() above. */
if (kind & CO_FAST_LOCAL && kind & CO_FAST_CELL) {
continue;
}
PyObject *name = PyTuple_GET_ITEM(co->co_localsplusnames, i);
PyObject *value = PyObject_GetItem(locals, name);
/* We only care about NULLs if clear is true. */
@ -1093,12 +1062,6 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
}
}
PyObject *oldvalue = fast[i];
int cellargoffset = CO_CELL_NOT_AN_ARG;
if (kind & CO_FAST_CELL && co->co_cell2arg != NULL) {
assert(i - co->co_nlocals >= 0);
assert(i - co->co_nlocals < co->co_ncellvars);
cellargoffset = co->co_cell2arg[i - co->co_nlocals];
}
PyObject *cell = NULL;
if (kind == CO_FAST_FREE) {
// The cell was set by _PyEval_MakeFrameVector() from
@ -1107,21 +1070,14 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
cell = oldvalue;
}
else if (kind & CO_FAST_CELL && oldvalue != NULL) {
if (cellargoffset != CO_CELL_NOT_AN_ARG) {
/* Same test as in PyFrame_FastToLocals() above. */
if (PyCell_Check(oldvalue) &&
_PyFrame_OpAlreadyRan(f, MAKE_CELL, i)) {
// (likely) MAKE_CELL must have executed already.
// It's the cell for an arg.
assert(PyCell_Check(oldvalue));
cell = oldvalue;
}
else {
if (PyCell_Check(oldvalue) &&
_PyFrame_OpAlreadyRan(f, MAKE_CELL, i)) {
// (likely) MAKE_CELL must have executed already.
cell = oldvalue;
}
// (unlikely) Otherwise, it must have been set to some
// initial value by an earlier call to PyFrame_LocalsToFast().
}
// (unlikely) Otherwise, it must have been set to some
// initial value by an earlier call to PyFrame_LocalsToFast().
}
if (cell != NULL) {
oldvalue = PyCell_GET(cell);
@ -1131,30 +1087,9 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
PyCell_SET(cell, value);
}
}
else {
int offset = i;
if (kind & CO_FAST_CELL) {
// (unlikely) MAKE_CELL hasn't executed yet.
// Note that there is no need to create the cell that
// MAKE_CELL would otherwise create later, since no
// *_DEREF ops can happen before MAKE_CELL has run.
if (cellargoffset != CO_CELL_NOT_AN_ARG) {
// It's the cell for an arg.
// Replace the initial value that was set by
// _PyEval_MakeFrameVector().
// Normally the arg value would always be set.
// However, it can be NULL if it was deleted
// via an earlier PyFrame_LocalsToFast() call.
offset = cellargoffset;
oldvalue = fast[offset];
}
// Otherwise set an initial value for MAKE_CELL to use
// when it runs later.
}
if (value != oldvalue) {
Py_XINCREF(value);
Py_XSETREF(fast[offset], value);
}
else if (value != oldvalue) {
Py_XINCREF(value);
Py_XSETREF(fast[i], value);
}
Py_XDECREF(value);
}

View file

@ -11,7 +11,6 @@
#include "pycore_pystate.h" // _PyThreadState_GET()
#include "pycore_unionobject.h" // _Py_Union(), _Py_union_type_or
#include "frameobject.h"
#include "pycore_frame.h" // _PyFrame_OpAlreadyRan
#include "opcode.h" // MAKE_CELL
#include "structmember.h" // PyMemberDef
@ -8878,23 +8877,18 @@ super_init_without_args(PyFrameObject *f, PyCodeObject *co,
return -1;
}
PyObject *obj = f->f_localsptr[0];
int i;
if (obj == NULL && co->co_cell2arg) {
/* The first argument might be a cell. */
for (i = 0; i < co->co_ncellvars; i++) {
if (co->co_cell2arg[i] == 0) {
int celloffset = co->co_nlocals + i;
PyObject *cell = f->f_localsptr[celloffset];
if (PyCell_Check(cell) &&
_PyFrame_OpAlreadyRan(f, MAKE_CELL, celloffset)) {
obj = PyCell_GET(cell);
}
break;
}
PyObject *firstarg = f->f_localsptr[0];
// The first argument might be a cell.
if (firstarg != NULL && (co->co_localspluskinds[0] & CO_FAST_CELL)) {
// "firstarg" is a cell here unless (very unlikely) super()
// was called from the C-API before the first MAKE_CELL op.
if (f->f_lasti >= 0) {
assert(_Py_OPCODE(*co->co_firstinstr) == MAKE_CELL);
assert(PyCell_Check(firstarg));
firstarg = PyCell_GET(firstarg);
}
}
if (obj == NULL) {
if (firstarg == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"super(): arg[0] deleted");
return -1;
@ -8902,9 +8896,9 @@ super_init_without_args(PyFrameObject *f, PyCodeObject *co,
// Look for __class__ in the free vars.
PyTypeObject *type = NULL;
i = co->co_nlocals + co->co_ncellvars;
int i = co->co_nlocals + co->co_nplaincellvars;
for (; i < co->co_nlocalsplus; i++) {
assert(co->co_localspluskinds[i] & CO_FAST_FREE);
assert((co->co_localspluskinds[i] & CO_FAST_FREE) != 0);
PyObject *name = PyTuple_GET_ITEM(co->co_localsplusnames, i);
assert(PyUnicode_Check(name));
if (_PyUnicode_EqualToASCIIId(name, &PyId___class__)) {
@ -8936,7 +8930,7 @@ super_init_without_args(PyFrameObject *f, PyCodeObject *co,
}
*type_p = type;
*obj_p = obj;
*obj_p = firstarg;
return 0;
}