gh-132775: Add _PyCode_VerifyStateless() (gh-133221)

"Stateless" code is a function or code object which does not rely on external state or internal state.
It may rely on arguments and builtins, but not globals or a closure. I've left a comment in
pycore_code.h that provides more detail.

We also add _PyFunction_VerifyStateless(). The new functions will be used in several later changes
that facilitate "sharing" functions and code objects between interpreters.
This commit is contained in:
Eric Snow 2025-05-05 15:48:58 -06:00 committed by GitHub
parent f610bbdf74
commit d270bb5792
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 442 additions and 38 deletions

View file

@ -621,6 +621,47 @@ PyAPI_FUNC(int) _PyCode_SetUnboundVarCounts(
PyObject *globalsns,
PyObject *builtinsns);
/* "Stateless" code is a function or code object which does not rely on
* external state or internal state. It may rely on arguments and
* builtins, but not globals or a closure. Thus it does not rely
* on __globals__ or __closure__, and a stateless function
* is equivalent to its code object.
*
* Stateless code also does not keep any persistent state
* of its own, so it can't have any executors, monitoring,
* instrumentation, or "extras" (i.e. co_extra).
*
* Stateless code may create nested functions, including closures.
* However, nested functions must themselves be stateless, except they
* *can* close on the enclosing locals.
*
* Stateless code may return any value, including nested functions and closures.
*
* Stateless code that takes no arguments and doesn't return anything
* may be treated like a script.
*
* We consider stateless code to be "portable" if it does not return any
* any object that holds a reference to any of the code's locals. Thus
* generators and coroutines are not portable. Likewise a function
* that returns a closure is not portable. The concept of
* portability is useful in cases where the code is run
* in a different execution context than where
* the return value will be used. */
PyAPI_FUNC(int) _PyCode_CheckNoInternalState(PyCodeObject *, const char **);
PyAPI_FUNC(int) _PyCode_CheckNoExternalState(
PyCodeObject *,
_PyCode_var_counts_t *,
const char **);
PyAPI_FUNC(int) _PyCode_VerifyStateless(
PyThreadState *,
PyCodeObject *,
PyObject *globalnames,
PyObject *globalsns,
PyObject *builtinsns);
PyAPI_FUNC(int) _PyCode_CheckPureFunction(PyCodeObject *, const char **);
PyAPI_FUNC(int) _PyCode_ReturnsOnlyNone(PyCodeObject *);

View file

@ -35,6 +35,13 @@ PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version, PyObject **p_cod
extern PyObject *_Py_set_function_type_params(
PyThreadState* unused, PyObject *func, PyObject *type_params);
/* See pycore_code.h for explanation about what "stateless" means. */
PyAPI_FUNC(int)
_PyFunction_VerifyStateless(PyThreadState *, PyObject *);
#ifdef __cplusplus
}
#endif

View file

@ -56,6 +56,8 @@ extern "C" {
#define IS_RETURN_OPCODE(opcode) \
(opcode == RETURN_VALUE)
#define IS_RAISE_OPCODE(opcode) \
(opcode == RAISE_VARARGS || opcode == RERAISE)
/* Flags used in the oparg for MAKE_FUNCTION */

View file

@ -12,6 +12,40 @@ def spam_minimal():
return
def spam_with_builtins():
x = 42
values = (42,)
checks = tuple(callable(v) for v in values)
res = callable(values), tuple(values), list(values), checks
print(res)
def spam_with_globals_and_builtins():
func1 = spam
func2 = spam_minimal
funcs = (func1, func2)
checks = tuple(callable(f) for f in funcs)
res = callable(funcs), tuple(funcs), list(funcs), checks
print(res)
def spam_returns_arg(x):
return x
def spam_with_inner_not_closure():
def eggs():
pass
eggs()
def spam_with_inner_closure():
x = 42
def eggs():
print(x)
eggs()
def spam_full(a, b, /, c, d:int=1, *args, e, f:object=None, **kwargs) -> tuple:
# arg defaults, kwarg defaults
# annotations
@ -98,6 +132,11 @@ ham_C_closure, *_ = eggs_closure_C(2)
TOP_FUNCTIONS = [
# shallow
spam_minimal,
spam_with_builtins,
spam_with_globals_and_builtins,
spam_returns_arg,
spam_with_inner_not_closure,
spam_with_inner_closure,
spam_full,
spam,
# outer func
@ -127,6 +166,30 @@ FUNCTIONS = [
*NESTED_FUNCTIONS,
]
STATELESS_FUNCTIONS = [
spam,
spam_minimal,
spam_with_builtins,
spam_returns_arg,
spam_with_inner_not_closure,
spam_with_inner_closure,
spam_N,
spam_C,
spam_NN,
spam_NC,
spam_CN,
spam_CC,
eggs_nested,
eggs_nested_N,
ham_nested,
ham_C_nested
]
STATELESS_CODE = [
*STATELESS_FUNCTIONS,
spam_with_globals_and_builtins,
spam_full,
]
# generators

View file

@ -220,6 +220,7 @@ try:
import _testinternalcapi
except ModuleNotFoundError:
_testinternalcapi = None
import test._code_definitions as defs
COPY_FREE_VARS = opmap['COPY_FREE_VARS']
@ -671,9 +672,31 @@ class CodeTest(unittest.TestCase):
VARARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_POS
VARKWARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_KW
import test._code_definitions as defs
funcs = {
defs.spam_minimal: {},
defs.spam_with_builtins: {
'x': CO_FAST_LOCAL,
'values': CO_FAST_LOCAL,
'checks': CO_FAST_LOCAL,
'res': CO_FAST_LOCAL,
},
defs.spam_with_globals_and_builtins: {
'func1': CO_FAST_LOCAL,
'func2': CO_FAST_LOCAL,
'funcs': CO_FAST_LOCAL,
'checks': CO_FAST_LOCAL,
'res': CO_FAST_LOCAL,
},
defs.spam_returns_arg: {
'x': POSORKW,
},
defs.spam_with_inner_not_closure: {
'eggs': CO_FAST_LOCAL,
},
defs.spam_with_inner_closure: {
'x': CO_FAST_CELL,
'eggs': CO_FAST_LOCAL,
},
defs.spam_full: {
'a': POSONLY,
'b': POSONLY,
@ -859,9 +882,26 @@ class CodeTest(unittest.TestCase):
},
}
import test._code_definitions as defs
funcs = {
defs.spam_minimal: new_var_counts(),
defs.spam_with_builtins: new_var_counts(
purelocals=4,
globalvars=4,
),
defs.spam_with_globals_and_builtins: new_var_counts(
purelocals=5,
globalvars=6,
),
defs.spam_returns_arg: new_var_counts(
posorkw=1,
),
defs.spam_with_inner_not_closure: new_var_counts(
purelocals=1,
),
defs.spam_with_inner_closure: new_var_counts(
othercells=1,
purelocals=1,
),
defs.spam_full: new_var_counts(
posonly=2,
posorkw=2,
@ -958,42 +998,35 @@ class CodeTest(unittest.TestCase):
counts = _testinternalcapi.get_code_var_counts(func.__code__)
self.assertEqual(counts, expected)
def func_with_globals_and_builtins():
mod1 = _testinternalcapi
mod2 = dis
mods = (mod1, mod2)
checks = tuple(callable(m) for m in mods)
return callable(mod2), tuple(mods), list(mods), checks
func = func_with_globals_and_builtins
func = defs.spam_with_globals_and_builtins
with self.subTest(f'{func} code'):
expected = new_var_counts(
purelocals=4,
globalvars=5,
purelocals=5,
globalvars=6,
)
counts = _testinternalcapi.get_code_var_counts(func.__code__)
self.assertEqual(counts, expected)
with self.subTest(f'{func} with own globals and builtins'):
expected = new_var_counts(
purelocals=4,
globalvars=(2, 3),
purelocals=5,
globalvars=(2, 4),
)
counts = _testinternalcapi.get_code_var_counts(func)
self.assertEqual(counts, expected)
with self.subTest(f'{func} without globals'):
expected = new_var_counts(
purelocals=4,
globalvars=(0, 3, 2),
purelocals=5,
globalvars=(0, 4, 2),
)
counts = _testinternalcapi.get_code_var_counts(func, globalsns={})
self.assertEqual(counts, expected)
with self.subTest(f'{func} without both'):
expected = new_var_counts(
purelocals=4,
globalvars=5,
purelocals=5,
globalvars=6,
)
counts = _testinternalcapi.get_code_var_counts(func, globalsns={},
builtinsns={})
@ -1001,12 +1034,34 @@ class CodeTest(unittest.TestCase):
with self.subTest(f'{func} without builtins'):
expected = new_var_counts(
purelocals=4,
globalvars=(2, 0, 3),
purelocals=5,
globalvars=(2, 0, 4),
)
counts = _testinternalcapi.get_code_var_counts(func, builtinsns={})
self.assertEqual(counts, expected)
@unittest.skipIf(_testinternalcapi is None, "missing _testinternalcapi")
def test_stateless(self):
self.maxDiff = None
for func in defs.STATELESS_CODE:
with self.subTest((func, '(code)')):
_testinternalcapi.verify_stateless_code(func.__code__)
for func in defs.STATELESS_FUNCTIONS:
with self.subTest((func, '(func)')):
_testinternalcapi.verify_stateless_code(func)
for func in defs.FUNCTIONS:
if func not in defs.STATELESS_CODE:
with self.subTest((func, '(code)')):
with self.assertRaises(Exception):
_testinternalcapi.verify_stateless_code(func.__code__)
if func not in defs.STATELESS_FUNCTIONS:
with self.subTest((func, '(func)')):
with self.assertRaises(Exception):
_testinternalcapi.verify_stateless_code(func)
def isinterned(s):
return s is sys.intern(('_' + s + '_')[1:-1])

View file

@ -1165,6 +1165,48 @@ error:
return NULL;
}
static PyObject *
verify_stateless_code(PyObject *self, PyObject *args, PyObject *kwargs)
{
PyThreadState *tstate = _PyThreadState_GET();
PyObject *codearg;
PyObject *globalnames = NULL;
PyObject *globalsns = NULL;
PyObject *builtinsns = NULL;
static char *kwlist[] = {"code", "globalnames",
"globalsns", "builtinsns", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
"O|O!O!O!:get_code_var_counts", kwlist,
&codearg, &PySet_Type, &globalnames,
&PyDict_Type, &globalsns, &PyDict_Type, &builtinsns))
{
return NULL;
}
if (PyFunction_Check(codearg)) {
if (globalsns == NULL) {
globalsns = PyFunction_GET_GLOBALS(codearg);
}
if (builtinsns == NULL) {
builtinsns = PyFunction_GET_BUILTINS(codearg);
}
codearg = PyFunction_GET_CODE(codearg);
}
else if (!PyCode_Check(codearg)) {
PyErr_SetString(PyExc_TypeError,
"argument must be a code object or a function");
return NULL;
}
PyCodeObject *code = (PyCodeObject *)codearg;
if (_PyCode_VerifyStateless(
tstate, code, globalnames, globalsns, builtinsns) < 0)
{
return NULL;
}
Py_RETURN_NONE;
}
static PyObject *
jit_enabled(PyObject *self, PyObject *arg)
{
@ -2293,6 +2335,8 @@ static PyMethodDef module_functions[] = {
{"get_co_localskinds", get_co_localskinds, METH_O, NULL},
{"get_code_var_counts", _PyCFunction_CAST(get_code_var_counts),
METH_VARARGS | METH_KEYWORDS, NULL},
{"verify_stateless_code", _PyCFunction_CAST(verify_stateless_code),
METH_VARARGS | METH_KEYWORDS, NULL},
{"jit_enabled", jit_enabled, METH_NOARGS, NULL},
#ifdef _Py_TIER2
{"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL},

View file

@ -1955,12 +1955,134 @@ finally:
}
int
_PyCode_CheckNoInternalState(PyCodeObject *co, const char **p_errmsg)
{
const char *errmsg = NULL;
if (_PyCode_HAS_EXECUTORS(co) || _PyCode_HAS_INSTRUMENTATION(co)) {
errmsg = "only basic code objects are supported";
}
else if (co->_co_monitoring != NULL) {
errmsg = "only basic code objects are supported";
}
else if (co->co_extra != NULL) {
errmsg = "only basic code objects are supported";
}
if (errmsg != NULL) {
if (p_errmsg != NULL) {
*p_errmsg = errmsg;
}
return 0;
}
return 1;
}
int
_PyCode_CheckNoExternalState(PyCodeObject *co, _PyCode_var_counts_t *counts,
const char **p_errmsg)
{
const char *errmsg = NULL;
assert(counts->locals.hidden.total == 0);
if (counts->numfree > 0) { // It's a closure.
errmsg = "closures not supported";
}
else if (counts->unbound.globals.numglobal > 0) {
errmsg = "globals not supported";
}
else if (counts->unbound.globals.numbuiltin > 0
&& counts->unbound.globals.numunknown > 0)
{
errmsg = "globals not supported";
}
// Otherwise we don't check counts.unbound.globals.numunknown since we can't
// distinguish beween globals and builtins here.
if (errmsg != NULL) {
if (p_errmsg != NULL) {
*p_errmsg = errmsg;
}
return 0;
}
return 1;
}
int
_PyCode_VerifyStateless(PyThreadState *tstate,
PyCodeObject *co, PyObject *globalnames,
PyObject *globalsns, PyObject *builtinsns)
{
const char *errmsg;
_PyCode_var_counts_t counts = {0};
_PyCode_GetVarCounts(co, &counts);
if (_PyCode_SetUnboundVarCounts(
tstate, co, &counts, globalnames, NULL,
globalsns, builtinsns) < 0)
{
return -1;
}
// We may consider relaxing the internal state constraints
// if it becomes a problem.
if (!_PyCode_CheckNoInternalState(co, &errmsg)) {
_PyErr_SetString(tstate, PyExc_ValueError, errmsg);
return -1;
}
if (builtinsns != NULL) {
// Make sure the next check will fail for globals,
// even if there aren't any builtins.
counts.unbound.globals.numbuiltin += 1;
}
if (!_PyCode_CheckNoExternalState(co, &counts, &errmsg)) {
_PyErr_SetString(tstate, PyExc_ValueError, errmsg);
return -1;
}
// Note that we don't check co->co_flags & CO_NESTED for anything here.
return 0;
}
int
_PyCode_CheckPureFunction(PyCodeObject *co, const char **p_errmsg)
{
const char *errmsg = NULL;
if (co->co_flags & CO_GENERATOR) {
errmsg = "generators not supported";
}
else if (co->co_flags & CO_COROUTINE) {
errmsg = "coroutines not supported";
}
else if (co->co_flags & CO_ITERABLE_COROUTINE) {
errmsg = "coroutines not supported";
}
else if (co->co_flags & CO_ASYNC_GENERATOR) {
errmsg = "generators not supported";
}
if (errmsg != NULL) {
if (p_errmsg != NULL) {
*p_errmsg = errmsg;
}
return 0;
}
return 1;
}
/* Here "value" means a non-None value, since a bare return is identical
* to returning None explicitly. Likewise a missing return statement
* at the end of the function is turned into "return None". */
static int
code_returns_only_none(PyCodeObject *co)
{
if (!_PyCode_CheckPureFunction(co, NULL)) {
return 0;
}
int len = (int)Py_SIZE(co);
assert(len > 0);
// The last instruction either returns or raises. We can take advantage
// of that for a quick exit.
_Py_CODEUNIT final = _Py_GetBaseCodeUnit(co, len-1);
// Look up None in co_consts.
Py_ssize_t nconsts = PyTuple_Size(co->co_consts);
int none_index = 0;
@ -1971,27 +2093,43 @@ code_returns_only_none(PyCodeObject *co)
}
if (none_index == nconsts) {
// None wasn't there, which means there was no implicit return,
// "return", or "return None". That means there must be
// an explicit return (non-None).
return 0;
}
// "return", or "return None".
// Walk the bytecode, looking for RETURN_VALUE.
Py_ssize_t len = Py_SIZE(co);
for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) {
_Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i);
if (IS_RETURN_OPCODE(inst.op.code)) {
assert(i != 0);
// Ignore it if it returns None.
_Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1);
if (prev.op.code == LOAD_CONST) {
// We don't worry about EXTENDED_ARG for now.
if (prev.op.arg == none_index) {
continue;
}
}
// That means there must be
// an explicit return (non-None), or it only raises.
if (IS_RETURN_OPCODE(final.op.code)) {
// It was an explicit return (non-None).
return 0;
}
// It must end with a raise then. We still have to walk the
// bytecode to see if there's any explicit return (non-None).
assert(IS_RAISE_OPCODE(final.op.code));
for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) {
_Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i);
if (IS_RETURN_OPCODE(inst.op.code)) {
// We alraedy know it isn't returning None.
return 0;
}
}
// It must only raise.
}
else {
// Walk the bytecode, looking for RETURN_VALUE.
for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) {
_Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i);
if (IS_RETURN_OPCODE(inst.op.code)) {
assert(i != 0);
// Ignore it if it returns None.
_Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1);
if (prev.op.code == LOAD_CONST) {
// We don't worry about EXTENDED_ARG for now.
if (prev.op.arg == none_index) {
continue;
}
}
return 0;
}
}
}
return 1;
}

View file

@ -1,12 +1,14 @@
/* Function object implementation */
#include "Python.h"
#include "pycore_code.h" // _PyCode_VerifyStateless()
#include "pycore_dict.h" // _Py_INCREF_DICT()
#include "pycore_function.h" // _PyFunction_Vectorcall
#include "pycore_long.h" // _PyLong_GetOne()
#include "pycore_modsupport.h" // _PyArg_NoKeywords()
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
#include "pycore_pyerrors.h" // _PyErr_Occurred()
#include "pycore_setobject.h" // _PySet_NextEntry()
#include "pycore_stats.h"
@ -1240,6 +1242,58 @@ PyTypeObject PyFunction_Type = {
};
int
_PyFunction_VerifyStateless(PyThreadState *tstate, PyObject *func)
{
assert(!PyErr_Occurred());
assert(PyFunction_Check(func));
// Check the globals.
PyObject *globalsns = PyFunction_GET_GLOBALS(func);
if (globalsns != NULL && !PyDict_Check(globalsns)) {
_PyErr_Format(tstate, PyExc_TypeError,
"unsupported globals %R", globalsns);
return -1;
}
// Check the builtins.
PyObject *builtinsns = PyFunction_GET_BUILTINS(func);
if (builtinsns != NULL && !PyDict_Check(builtinsns)) {
_PyErr_Format(tstate, PyExc_TypeError,
"unsupported builtins %R", builtinsns);
return -1;
}
// Disallow __defaults__.
PyObject *defaults = PyFunction_GET_DEFAULTS(func);
if (defaults != NULL && defaults != Py_None && PyDict_Size(defaults) > 0)
{
_PyErr_SetString(tstate, PyExc_ValueError, "defaults not supported");
return -1;
}
// Disallow __kwdefaults__.
PyObject *kwdefaults = PyFunction_GET_KW_DEFAULTS(func);
if (kwdefaults != NULL && kwdefaults != Py_None
&& PyDict_Size(kwdefaults) > 0)
{
_PyErr_SetString(tstate, PyExc_ValueError,
"keyword defaults not supported");
return -1;
}
// Disallow __closure__.
PyObject *closure = PyFunction_GET_CLOSURE(func);
if (closure != NULL && closure != Py_None && PyTuple_GET_SIZE(closure) > 0)
{
_PyErr_SetString(tstate, PyExc_ValueError, "closures not supported");
return -1;
}
// Check the code.
PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
if (_PyCode_VerifyStateless(tstate, co, NULL, globalsns, builtinsns) < 0) {
return -1;
}
return 0;
}
static int
functools_copy_attr(PyObject *wrapper, PyObject *wrapped, PyObject *name)
{