gh-87859: Track Code Object Local Kinds For Arguments (gh-132980)

Doing this was always the intention. I was finally motivated to find the time to do it.

See #87859 (comment).
This commit is contained in:
Eric Snow 2025-04-28 20:21:47 -06:00 committed by GitHub
parent 96a7fb93a8
commit 219d8d24b5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 372 additions and 194 deletions

View file

@ -177,12 +177,14 @@ typedef struct {
*/
// Note that these all fit within a byte, as do combinations.
// Later, we will use the smaller numbers to differentiate the different
// kinds of locals (e.g. pos-only arg, varkwargs, local-only).
#define CO_FAST_HIDDEN 0x10
#define CO_FAST_LOCAL 0x20
#define CO_FAST_CELL 0x40
#define CO_FAST_FREE 0x80
#define CO_FAST_ARG_POS (0x02) // pos-only, pos-or-kw, varargs
#define CO_FAST_ARG_KW (0x04) // kw-only, pos-or-kw, varkwargs
#define CO_FAST_ARG_VAR (0x08) // varargs, varkwargs
#define CO_FAST_ARG (CO_FAST_ARG_POS | CO_FAST_ARG_KW | CO_FAST_ARG_VAR)
#define CO_FAST_HIDDEN (0x10)
#define CO_FAST_LOCAL (0x20)
#define CO_FAST_CELL (0x40)
#define CO_FAST_FREE (0x80)
typedef unsigned char _PyLocals_Kind;

View file

@ -0,0 +1,163 @@
def spam_minimal():
# no arg defaults or kwarg defaults
# no annotations
# no local vars
# no free vars
# no globals
# no builtins
# no attr access (names)
# no code
return
def spam_full(a, b, /, c, d:int=1, *args, e, f:object=None, **kwargs) -> tuple:
# arg defaults, kwarg defaults
# annotations
# all kinds of local vars, except cells
# no free vars
# some globals
# some builtins
# some attr access (names)
x = args
y = kwargs
z = (a, b, c, d)
kwargs['e'] = e
kwargs['f'] = f
extras = list((x, y, z, spam, spam.__name__))
return tuple(a, b, c, d, e, f, args, kwargs), extras
def spam(x):
return x, None
def spam_N(x):
def eggs_nested(y):
return None, y
return eggs_nested, x
def spam_C(x):
a = 1
def eggs_closure(y):
return None, y, a, x
return eggs_closure, a, x
def spam_NN(x):
def eggs_nested_N(y):
def ham_nested(z):
return None, z
return ham_nested, y
return eggs_nested_N, x
def spam_NC(x):
a = 1
def eggs_nested_C(y):
def ham_closure(z):
return None, z, y, a, x
return ham_closure, y
return eggs_nested_C, a, x
def spam_CN(x):
a = 1
def eggs_closure_N(y):
def ham_C_nested(z):
return None, z
return ham_C_nested, y, a, x
return eggs_closure_N, a, x
def spam_CC(x):
a = 1
def eggs_closure_C(y):
b = 2
def ham_C_closure(z):
return None, z, b, y, a, x
return ham_C_closure, b, y, a, x
return eggs_closure_C, a, x
eggs_nested, *_ = spam_N(1)
eggs_closure, *_ = spam_C(1)
eggs_nested_N, *_ = spam_NN(1)
eggs_nested_C, *_ = spam_NC(1)
eggs_closure_N, *_ = spam_CN(1)
eggs_closure_C, *_ = spam_CC(1)
ham_nested, *_ = eggs_nested_N(2)
ham_closure, *_ = eggs_nested_C(2)
ham_C_nested, *_ = eggs_closure_N(2)
ham_C_closure, *_ = eggs_closure_C(2)
TOP_FUNCTIONS = [
# shallow
spam_minimal,
spam_full,
spam,
# outer func
spam_N,
spam_C,
spam_NN,
spam_NC,
spam_CN,
spam_CC,
]
NESTED_FUNCTIONS = [
# inner func
eggs_nested,
eggs_closure,
eggs_nested_N,
eggs_nested_C,
eggs_closure_N,
eggs_closure_C,
# inner inner func
ham_nested,
ham_closure,
ham_C_nested,
ham_C_closure,
]
FUNCTIONS = [
*TOP_FUNCTIONS,
*NESTED_FUNCTIONS,
]
# generators
def gen_spam_1(*args):
for arg in args:
yield arg
def gen_spam_2(*args):
yield from args
async def async_spam():
pass
coro_spam = async_spam()
coro_spam.close()
async def asyncgen_spam(*args):
for arg in args:
yield arg
asynccoro_spam = asyncgen_spam(1, 2, 3)
FUNCTION_LIKE = [
gen_spam_1,
gen_spam_2,
async_spam,
asyncgen_spam,
]
FUNCTION_LIKE_APPLIED = [
coro_spam, # actually FunctionType?
asynccoro_spam, # actually FunctionType?
]

View file

@ -3,172 +3,9 @@
#######################################
# functions
# functions and generators
def spam_minimal():
# no arg defaults or kwarg defaults
# no annotations
# no local vars
# no free vars
# no globals
# no builtins
# no attr access (names)
# no code
return
def spam_full(a, b, /, c, d:int=1, *args, e, f:object=None, **kwargs) -> tuple:
# arg defaults, kwarg defaults
# annotations
# all kinds of local vars, except cells
# no free vars
# some globals
# some builtins
# some attr access (names)
x = args
y = kwargs
z = (a, b, c, d)
kwargs['e'] = e
kwargs['f'] = f
extras = list((x, y, z, spam, spam.__name__))
return tuple(a, b, c, d, e, f, args, kwargs), extras
def spam(x):
return x, None
def spam_N(x):
def eggs_nested(y):
return None, y
return eggs_nested, x
def spam_C(x):
a = 1
def eggs_closure(y):
return None, y, a, x
return eggs_closure, a, x
def spam_NN(x):
def eggs_nested_N(y):
def ham_nested(z):
return None, z
return ham_nested, y
return eggs_nested_N, x
def spam_NC(x):
a = 1
def eggs_nested_C(y):
def ham_closure(z):
return None, z, y, a, x
return ham_closure, y
return eggs_nested_C, a, x
def spam_CN(x):
a = 1
def eggs_closure_N(y):
def ham_C_nested(z):
return None, z
return ham_C_nested, y, a, x
return eggs_closure_N, a, x
def spam_CC(x):
a = 1
def eggs_closure_C(y):
b = 2
def ham_C_closure(z):
return None, z, b, y, a, x
return ham_C_closure, b, y, a, x
return eggs_closure_N, a, x
eggs_nested, *_ = spam_N(1)
eggs_closure, *_ = spam_C(1)
eggs_nested_N, *_ = spam_NN(1)
eggs_nested_C, *_ = spam_NC(1)
eggs_closure_N, *_ = spam_CN(1)
eggs_closure_C, *_ = spam_CC(1)
ham_nested, *_ = eggs_nested_N(2)
ham_closure, *_ = eggs_nested_C(2)
ham_C_nested, *_ = eggs_closure_N(2)
ham_C_closure, *_ = eggs_closure_C(2)
TOP_FUNCTIONS = [
# shallow
spam_minimal,
spam_full,
spam,
# outer func
spam_N,
spam_C,
spam_NN,
spam_NC,
spam_CN,
spam_CC,
]
NESTED_FUNCTIONS = [
# inner func
eggs_nested,
eggs_closure,
eggs_nested_N,
eggs_nested_C,
eggs_closure_N,
eggs_closure_C,
# inner inner func
ham_nested,
ham_closure,
ham_C_nested,
ham_C_closure,
]
FUNCTIONS = [
*TOP_FUNCTIONS,
*NESTED_FUNCTIONS,
]
#######################################
# function-like
# generators
def gen_spam_1(*args):
for arg in args:
yield arg
def gen_spam_2(*args):
yield from args
async def async_spam():
pass
coro_spam = async_spam()
coro_spam.close()
async def asyncgen_spam(*args):
for arg in args:
yield arg
asynccoro_spam = asyncgen_spam(1, 2, 3)
FUNCTION_LIKE = [
gen_spam_1,
gen_spam_2,
async_spam,
asyncgen_spam,
]
FUNCTION_LIKE_APPLIED = [
coro_spam, # actually FunctionType?
asynccoro_spam, # actually FunctionType?
]
from test._code_definitions import *
#######################################

View file

@ -246,6 +246,7 @@ def dump(co):
def external_getitem(self, i):
return f"Foreign getitem: {super().__getitem__(i)}"
class CodeTest(unittest.TestCase):
@cpython_only
@ -654,6 +655,128 @@ class CodeTest(unittest.TestCase):
self.assertNotEqual(code1, code2)
sys.settrace(None)
@unittest.skipIf(_testinternalcapi is None, "missing _testinternalcapi")
def test_local_kinds(self):
CO_FAST_ARG_POS = 0x02
CO_FAST_ARG_KW = 0x04
CO_FAST_ARG_VAR = 0x08
CO_FAST_HIDDEN = 0x10
CO_FAST_LOCAL = 0x20
CO_FAST_CELL = 0x40
CO_FAST_FREE = 0x80
POSONLY = CO_FAST_LOCAL | CO_FAST_ARG_POS
POSORKW = CO_FAST_LOCAL | CO_FAST_ARG_POS | CO_FAST_ARG_KW
KWONLY = CO_FAST_LOCAL | CO_FAST_ARG_KW
VARARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_POS
VARKWARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_KW
import test._code_definitions as defs
funcs = {
defs.spam_minimal: {},
defs.spam_full: {
'a': POSONLY,
'b': POSONLY,
'c': POSORKW,
'd': POSORKW,
'e': KWONLY,
'f': KWONLY,
'args': VARARGS,
'kwargs': VARKWARGS,
'x': CO_FAST_LOCAL,
'y': CO_FAST_LOCAL,
'z': CO_FAST_LOCAL,
'extras': CO_FAST_LOCAL,
},
defs.spam: {
'x': POSORKW,
},
defs.spam_N: {
'x': POSORKW,
'eggs_nested': CO_FAST_LOCAL,
},
defs.spam_C: {
'x': POSORKW | CO_FAST_CELL,
'a': CO_FAST_CELL,
'eggs_closure': CO_FAST_LOCAL,
},
defs.spam_NN: {
'x': POSORKW,
'eggs_nested_N': CO_FAST_LOCAL,
},
defs.spam_NC: {
'x': POSORKW | CO_FAST_CELL,
'a': CO_FAST_CELL,
'eggs_nested_C': CO_FAST_LOCAL,
},
defs.spam_CN: {
'x': POSORKW | CO_FAST_CELL,
'a': CO_FAST_CELL,
'eggs_closure_N': CO_FAST_LOCAL,
},
defs.spam_CC: {
'x': POSORKW | CO_FAST_CELL,
'a': CO_FAST_CELL,
'eggs_closure_C': CO_FAST_LOCAL,
},
defs.eggs_nested: {
'y': POSORKW,
},
defs.eggs_closure: {
'y': POSORKW,
'x': CO_FAST_FREE,
'a': CO_FAST_FREE,
},
defs.eggs_nested_N: {
'y': POSORKW,
'ham_nested': CO_FAST_LOCAL,
},
defs.eggs_nested_C: {
'y': POSORKW | CO_FAST_CELL,
'x': CO_FAST_FREE,
'a': CO_FAST_FREE,
'ham_closure': CO_FAST_LOCAL,
},
defs.eggs_closure_N: {
'y': POSORKW,
'x': CO_FAST_FREE,
'a': CO_FAST_FREE,
'ham_C_nested': CO_FAST_LOCAL,
},
defs.eggs_closure_C: {
'y': POSORKW | CO_FAST_CELL,
'b': CO_FAST_CELL,
'x': CO_FAST_FREE,
'a': CO_FAST_FREE,
'ham_C_closure': CO_FAST_LOCAL,
},
defs.ham_nested: {
'z': POSORKW,
},
defs.ham_closure: {
'z': POSORKW,
'y': CO_FAST_FREE,
'x': CO_FAST_FREE,
'a': CO_FAST_FREE,
},
defs.ham_C_nested: {
'z': POSORKW,
},
defs.ham_C_closure: {
'z': POSORKW,
'y': CO_FAST_FREE,
'b': CO_FAST_FREE,
'x': CO_FAST_FREE,
'a': CO_FAST_FREE,
},
}
assert len(funcs) == len(defs.FUNCTIONS)
for func in defs.FUNCTIONS:
with self.subTest(func):
expected = funcs[func]
kinds = _testinternalcapi.get_co_localskinds(func.__code__)
self.assertEqual(kinds, expected)
def isinterned(s):
return s is sys.intern(('_' + s + '_')[1:-1])

View file

@ -968,6 +968,37 @@ get_co_framesize(PyObject *self, PyObject *arg)
return PyLong_FromLong(code->co_framesize);
}
static PyObject *
get_co_localskinds(PyObject *self, PyObject *arg)
{
if (!PyCode_Check(arg)) {
PyErr_SetString(PyExc_TypeError, "argument must be a code object");
return NULL;
}
PyCodeObject *co = (PyCodeObject *)arg;
PyObject *kinds = PyDict_New();
if (kinds == NULL) {
return NULL;
}
for (int offset = 0; offset < co->co_nlocalsplus; offset++) {
PyObject *name = PyTuple_GET_ITEM(co->co_localsplusnames, offset);
_PyLocals_Kind k = _PyLocals_GetKind(co->co_localspluskinds, offset);
PyObject *kind = PyLong_FromLong(k);
if (kind == NULL) {
Py_DECREF(kinds);
return NULL;
}
int res = PyDict_SetItem(kinds, name, kind);
Py_DECREF(kind);
if (res < 0) {
Py_DECREF(kinds);
return NULL;
}
}
return kinds;
}
static PyObject *
jit_enabled(PyObject *self, PyObject *arg)
{
@ -2088,6 +2119,7 @@ static PyMethodDef module_functions[] = {
{"iframe_getlasti", iframe_getlasti, METH_O, NULL},
{"code_returns_only_none", code_returns_only_none, METH_O, NULL},
{"get_co_framesize", get_co_framesize, METH_O, NULL},
{"get_co_localskinds", get_co_localskinds, METH_O, NULL},
{"jit_enabled", jit_enabled, METH_NOARGS, NULL},
#ifdef _Py_TIER2
{"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL},

View file

@ -482,33 +482,52 @@ extern void _Py_set_localsplus_info(int, PyObject *, unsigned char,
static int
compute_localsplus_info(_PyCompile_CodeUnitMetadata *umd, int nlocalsplus,
PyObject *names, PyObject *kinds)
int flags, PyObject *names, PyObject *kinds)
{
PyObject *k, *v;
Py_ssize_t pos = 0;
while (PyDict_Next(umd->u_varnames, &pos, &k, &v)) {
int offset = PyLong_AsInt(v);
if (offset == -1 && PyErr_Occurred()) {
return ERROR;
}
assert(offset >= 0);
assert(offset < nlocalsplus);
// For now we do not distinguish arg kinds.
_PyLocals_Kind kind = CO_FAST_LOCAL;
int has_key = PyDict_Contains(umd->u_fasthidden, k);
RETURN_IF_ERROR(has_key);
if (has_key) {
kind |= CO_FAST_HIDDEN;
}
// Set the locals kinds. Arg vars fill the first portion of the list.
struct {
int count;
_PyLocals_Kind kind;
} argvarkinds[6] = {
{(int)umd->u_posonlyargcount, CO_FAST_ARG_POS},
{(int)umd->u_argcount, CO_FAST_ARG_POS | CO_FAST_ARG_KW},
{(int)umd->u_kwonlyargcount, CO_FAST_ARG_KW},
{!!(flags & CO_VARARGS), CO_FAST_ARG_VAR | CO_FAST_ARG_POS},
{!!(flags & CO_VARKEYWORDS), CO_FAST_ARG_VAR | CO_FAST_ARG_KW},
{-1, 0}, // the remaining local vars
};
int max = 0;
for (int i = 0; i < 6; i++) {
max = argvarkinds[i].count < 0
? INT_MAX
: max + argvarkinds[i].count;
while (pos < max && PyDict_Next(umd->u_varnames, &pos, &k, &v)) {
int offset = PyLong_AsInt(v);
if (offset == -1 && PyErr_Occurred()) {
return ERROR;
}
assert(offset >= 0);
assert(offset < nlocalsplus);
has_key = PyDict_Contains(umd->u_cellvars, k);
RETURN_IF_ERROR(has_key);
if (has_key) {
kind |= CO_FAST_CELL;
}
_PyLocals_Kind kind = CO_FAST_LOCAL | argvarkinds[i].kind;
_Py_set_localsplus_info(offset, k, kind, names, kinds);
int has_key = PyDict_Contains(umd->u_fasthidden, k);
RETURN_IF_ERROR(has_key);
if (has_key) {
kind |= CO_FAST_HIDDEN;
}
has_key = PyDict_Contains(umd->u_cellvars, k);
RETURN_IF_ERROR(has_key);
if (has_key) {
kind |= CO_FAST_CELL;
}
_Py_set_localsplus_info(offset, k, kind, names, kinds);
}
}
int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
@ -594,8 +613,10 @@ makecode(_PyCompile_CodeUnitMetadata *umd, struct assembler *a, PyObject *const_
if (localspluskinds == NULL) {
goto error;
}
if (compute_localsplus_info(umd, nlocalsplus,
localsplusnames, localspluskinds) == ERROR) {
if (compute_localsplus_info(
umd, nlocalsplus, code_flags,
localsplusnames, localspluskinds) == ERROR)
{
goto error;
}