gh-132775: Revert "gh-132775: Add _PyCode_VerifyStateless() (gh-133221)" (#133497)

This commit is contained in:
Petr Viktorin 2025-05-06 12:09:41 +02:00 committed by GitHub
parent fd37f1a8ad
commit 3c73cf51df
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 37 additions and 362 deletions

View file

@ -619,47 +619,6 @@ PyAPI_FUNC(int) _PyCode_SetUnboundVarCounts(
PyObject *globalsns, PyObject *globalsns,
PyObject *builtinsns); PyObject *builtinsns);
/* "Stateless" code is a function or code object which does not rely on
* external state or internal state. It may rely on arguments and
* builtins, but not globals or a closure. Thus it does not rely
* on __globals__ or __closure__, and a stateless function
* is equivalent to its code object.
*
* Stateless code also does not keep any persistent state
* of its own, so it can't have any executors, monitoring,
* instrumentation, or "extras" (i.e. co_extra).
*
* Stateless code may create nested functions, including closures.
* However, nested functions must themselves be stateless, except they
* *can* close on the enclosing locals.
*
* Stateless code may return any value, including nested functions and closures.
*
* Stateless code that takes no arguments and doesn't return anything
* may be treated like a script.
*
* We consider stateless code to be "portable" if it does not return any
* any object that holds a reference to any of the code's locals. Thus
* generators and coroutines are not portable. Likewise a function
* that returns a closure is not portable. The concept of
* portability is useful in cases where the code is run
* in a different execution context than where
* the return value will be used. */
PyAPI_FUNC(int) _PyCode_CheckNoInternalState(PyCodeObject *, const char **);
PyAPI_FUNC(int) _PyCode_CheckNoExternalState(
PyCodeObject *,
_PyCode_var_counts_t *,
const char **);
PyAPI_FUNC(int) _PyCode_VerifyStateless(
PyThreadState *,
PyCodeObject *,
PyObject *globalnames,
PyObject *globalsns,
PyObject *builtinsns);
PyAPI_FUNC(int) _PyCode_CheckPureFunction(PyCodeObject *, const char **);
PyAPI_FUNC(int) _PyCode_ReturnsOnlyNone(PyCodeObject *); PyAPI_FUNC(int) _PyCode_ReturnsOnlyNone(PyCodeObject *);

View file

@ -35,13 +35,6 @@ PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version, PyObject **p_cod
extern PyObject *_Py_set_function_type_params( extern PyObject *_Py_set_function_type_params(
PyThreadState* unused, PyObject *func, PyObject *type_params); PyThreadState* unused, PyObject *func, PyObject *type_params);
/* See pycore_code.h for explanation about what "stateless" means. */
PyAPI_FUNC(int)
_PyFunction_VerifyStateless(PyThreadState *, PyObject *);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View file

@ -56,8 +56,6 @@ extern "C" {
#define IS_RETURN_OPCODE(opcode) \ #define IS_RETURN_OPCODE(opcode) \
(opcode == RETURN_VALUE) (opcode == RETURN_VALUE)
#define IS_RAISE_OPCODE(opcode) \
(opcode == RAISE_VARARGS || opcode == RERAISE)
/* Flags used in the oparg for MAKE_FUNCTION */ /* Flags used in the oparg for MAKE_FUNCTION */

View file

@ -178,32 +178,6 @@ FUNCTIONS = [
*NESTED_FUNCTIONS, *NESTED_FUNCTIONS,
] ]
STATELESS_FUNCTIONS = [
spam,
spam_minimal,
spam_with_builtins,
spam_args_attrs_and_builtins,
spam_returns_arg,
spam_annotated,
spam_with_inner_not_closure,
spam_with_inner_closure,
spam_N,
spam_C,
spam_NN,
spam_NC,
spam_CN,
spam_CC,
eggs_nested,
eggs_nested_N,
ham_nested,
ham_C_nested
]
STATELESS_CODE = [
*STATELESS_FUNCTIONS,
spam_with_globals_and_builtins,
spam_full,
]
# generators # generators

View file

@ -220,7 +220,6 @@ try:
import _testinternalcapi import _testinternalcapi
except ModuleNotFoundError: except ModuleNotFoundError:
_testinternalcapi = None _testinternalcapi = None
import test._code_definitions as defs
COPY_FREE_VARS = opmap['COPY_FREE_VARS'] COPY_FREE_VARS = opmap['COPY_FREE_VARS']
@ -672,6 +671,7 @@ class CodeTest(unittest.TestCase):
VARARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_POS VARARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_POS
VARKWARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_KW VARKWARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_KW
import test._code_definitions as defs
funcs = { funcs = {
defs.spam_minimal: {}, defs.spam_minimal: {},
defs.spam_with_builtins: { defs.spam_with_builtins: {
@ -897,6 +897,7 @@ class CodeTest(unittest.TestCase):
}, },
} }
import test._code_definitions as defs
funcs = { funcs = {
defs.spam_minimal: new_var_counts(), defs.spam_minimal: new_var_counts(),
defs.spam_with_builtins: new_var_counts( defs.spam_with_builtins: new_var_counts(
@ -1024,35 +1025,42 @@ class CodeTest(unittest.TestCase):
counts = _testinternalcapi.get_code_var_counts(func.__code__) counts = _testinternalcapi.get_code_var_counts(func.__code__)
self.assertEqual(counts, expected) self.assertEqual(counts, expected)
func = defs.spam_with_globals_and_builtins def func_with_globals_and_builtins():
mod1 = _testinternalcapi
mod2 = dis
mods = (mod1, mod2)
checks = tuple(callable(m) for m in mods)
return callable(mod2), tuple(mods), list(mods), checks
func = func_with_globals_and_builtins
with self.subTest(f'{func} code'): with self.subTest(f'{func} code'):
expected = new_var_counts( expected = new_var_counts(
purelocals=5, purelocals=4,
globalvars=6, globalvars=5,
) )
counts = _testinternalcapi.get_code_var_counts(func.__code__) counts = _testinternalcapi.get_code_var_counts(func.__code__)
self.assertEqual(counts, expected) self.assertEqual(counts, expected)
with self.subTest(f'{func} with own globals and builtins'): with self.subTest(f'{func} with own globals and builtins'):
expected = new_var_counts( expected = new_var_counts(
purelocals=5, purelocals=4,
globalvars=(2, 4), globalvars=(2, 3),
) )
counts = _testinternalcapi.get_code_var_counts(func) counts = _testinternalcapi.get_code_var_counts(func)
self.assertEqual(counts, expected) self.assertEqual(counts, expected)
with self.subTest(f'{func} without globals'): with self.subTest(f'{func} without globals'):
expected = new_var_counts( expected = new_var_counts(
purelocals=5, purelocals=4,
globalvars=(0, 4, 2), globalvars=(0, 3, 2),
) )
counts = _testinternalcapi.get_code_var_counts(func, globalsns={}) counts = _testinternalcapi.get_code_var_counts(func, globalsns={})
self.assertEqual(counts, expected) self.assertEqual(counts, expected)
with self.subTest(f'{func} without both'): with self.subTest(f'{func} without both'):
expected = new_var_counts( expected = new_var_counts(
purelocals=5, purelocals=4,
globalvars=6, globalvars=5,
) )
counts = _testinternalcapi.get_code_var_counts(func, globalsns={}, counts = _testinternalcapi.get_code_var_counts(func, globalsns={},
builtinsns={}) builtinsns={})
@ -1060,34 +1068,12 @@ class CodeTest(unittest.TestCase):
with self.subTest(f'{func} without builtins'): with self.subTest(f'{func} without builtins'):
expected = new_var_counts( expected = new_var_counts(
purelocals=5, purelocals=4,
globalvars=(2, 0, 4), globalvars=(2, 0, 3),
) )
counts = _testinternalcapi.get_code_var_counts(func, builtinsns={}) counts = _testinternalcapi.get_code_var_counts(func, builtinsns={})
self.assertEqual(counts, expected) self.assertEqual(counts, expected)
@unittest.skipIf(_testinternalcapi is None, "missing _testinternalcapi")
def test_stateless(self):
self.maxDiff = None
for func in defs.STATELESS_CODE:
with self.subTest((func, '(code)')):
_testinternalcapi.verify_stateless_code(func.__code__)
for func in defs.STATELESS_FUNCTIONS:
with self.subTest((func, '(func)')):
_testinternalcapi.verify_stateless_code(func)
for func in defs.FUNCTIONS:
if func not in defs.STATELESS_CODE:
with self.subTest((func, '(code)')):
with self.assertRaises(Exception):
_testinternalcapi.verify_stateless_code(func.__code__)
if func not in defs.STATELESS_FUNCTIONS:
with self.subTest((func, '(func)')):
with self.assertRaises(Exception):
_testinternalcapi.verify_stateless_code(func)
def isinterned(s): def isinterned(s):
return s is sys.intern(('_' + s + '_')[1:-1]) return s is sys.intern(('_' + s + '_')[1:-1])

View file

@ -1165,47 +1165,6 @@ error:
return NULL; return NULL;
} }
static PyObject *
verify_stateless_code(PyObject *self, PyObject *args, PyObject *kwargs)
{
PyThreadState *tstate = _PyThreadState_GET();
PyObject *codearg;
PyObject *globalnames = NULL;
PyObject *globalsns = NULL;
PyObject *builtinsns = NULL;
static char *kwlist[] = {"code", "globalnames",
"globalsns", "builtinsns", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
"O|O!O!O!:get_code_var_counts", kwlist,
&codearg, &PySet_Type, &globalnames,
&PyDict_Type, &globalsns, &PyDict_Type, &builtinsns))
{
return NULL;
}
if (PyFunction_Check(codearg)) {
if (globalsns == NULL) {
globalsns = PyFunction_GET_GLOBALS(codearg);
}
if (builtinsns == NULL) {
builtinsns = PyFunction_GET_BUILTINS(codearg);
}
codearg = PyFunction_GET_CODE(codearg);
}
else if (!PyCode_Check(codearg)) {
PyErr_SetString(PyExc_TypeError,
"argument must be a code object or a function");
return NULL;
}
PyCodeObject *code = (PyCodeObject *)codearg;
if (_PyCode_VerifyStateless(
tstate, code, globalnames, globalsns, builtinsns) < 0)
{
return NULL;
}
Py_RETURN_NONE;
}
#ifdef _Py_TIER2 #ifdef _Py_TIER2
static PyObject * static PyObject *
@ -2333,8 +2292,6 @@ static PyMethodDef module_functions[] = {
{"get_co_localskinds", get_co_localskinds, METH_O, NULL}, {"get_co_localskinds", get_co_localskinds, METH_O, NULL},
{"get_code_var_counts", _PyCFunction_CAST(get_code_var_counts), {"get_code_var_counts", _PyCFunction_CAST(get_code_var_counts),
METH_VARARGS | METH_KEYWORDS, NULL}, METH_VARARGS | METH_KEYWORDS, NULL},
{"verify_stateless_code", _PyCFunction_CAST(verify_stateless_code),
METH_VARARGS | METH_KEYWORDS, NULL},
#ifdef _Py_TIER2 #ifdef _Py_TIER2
{"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL}, {"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL},
{"invalidate_executors", invalidate_executors, METH_O, NULL}, {"invalidate_executors", invalidate_executors, METH_O, NULL},

View file

@ -1955,134 +1955,12 @@ finally:
} }
int
_PyCode_CheckNoInternalState(PyCodeObject *co, const char **p_errmsg)
{
const char *errmsg = NULL;
if (_PyCode_HAS_EXECUTORS(co) || _PyCode_HAS_INSTRUMENTATION(co)) {
errmsg = "only basic code objects are supported";
}
else if (co->_co_monitoring != NULL) {
errmsg = "only basic code objects are supported";
}
else if (co->co_extra != NULL) {
errmsg = "only basic code objects are supported";
}
if (errmsg != NULL) {
if (p_errmsg != NULL) {
*p_errmsg = errmsg;
}
return 0;
}
return 1;
}
int
_PyCode_CheckNoExternalState(PyCodeObject *co, _PyCode_var_counts_t *counts,
const char **p_errmsg)
{
const char *errmsg = NULL;
assert(counts->locals.hidden.total == 0);
if (counts->numfree > 0) { // It's a closure.
errmsg = "closures not supported";
}
else if (counts->unbound.globals.numglobal > 0) {
errmsg = "globals not supported";
}
else if (counts->unbound.globals.numbuiltin > 0
&& counts->unbound.globals.numunknown > 0)
{
errmsg = "globals not supported";
}
// Otherwise we don't check counts.unbound.globals.numunknown since we can't
// distinguish beween globals and builtins here.
if (errmsg != NULL) {
if (p_errmsg != NULL) {
*p_errmsg = errmsg;
}
return 0;
}
return 1;
}
int
_PyCode_VerifyStateless(PyThreadState *tstate,
PyCodeObject *co, PyObject *globalnames,
PyObject *globalsns, PyObject *builtinsns)
{
const char *errmsg;
_PyCode_var_counts_t counts = {0};
_PyCode_GetVarCounts(co, &counts);
if (_PyCode_SetUnboundVarCounts(
tstate, co, &counts, globalnames, NULL,
globalsns, builtinsns) < 0)
{
return -1;
}
// We may consider relaxing the internal state constraints
// if it becomes a problem.
if (!_PyCode_CheckNoInternalState(co, &errmsg)) {
_PyErr_SetString(tstate, PyExc_ValueError, errmsg);
return -1;
}
if (builtinsns != NULL) {
// Make sure the next check will fail for globals,
// even if there aren't any builtins.
counts.unbound.globals.numbuiltin += 1;
}
if (!_PyCode_CheckNoExternalState(co, &counts, &errmsg)) {
_PyErr_SetString(tstate, PyExc_ValueError, errmsg);
return -1;
}
// Note that we don't check co->co_flags & CO_NESTED for anything here.
return 0;
}
int
_PyCode_CheckPureFunction(PyCodeObject *co, const char **p_errmsg)
{
const char *errmsg = NULL;
if (co->co_flags & CO_GENERATOR) {
errmsg = "generators not supported";
}
else if (co->co_flags & CO_COROUTINE) {
errmsg = "coroutines not supported";
}
else if (co->co_flags & CO_ITERABLE_COROUTINE) {
errmsg = "coroutines not supported";
}
else if (co->co_flags & CO_ASYNC_GENERATOR) {
errmsg = "generators not supported";
}
if (errmsg != NULL) {
if (p_errmsg != NULL) {
*p_errmsg = errmsg;
}
return 0;
}
return 1;
}
/* Here "value" means a non-None value, since a bare return is identical /* Here "value" means a non-None value, since a bare return is identical
* to returning None explicitly. Likewise a missing return statement * to returning None explicitly. Likewise a missing return statement
* at the end of the function is turned into "return None". */ * at the end of the function is turned into "return None". */
static int static int
code_returns_only_none(PyCodeObject *co) code_returns_only_none(PyCodeObject *co)
{ {
if (!_PyCode_CheckPureFunction(co, NULL)) {
return 0;
}
int len = (int)Py_SIZE(co);
assert(len > 0);
// The last instruction either returns or raises. We can take advantage
// of that for a quick exit.
_Py_CODEUNIT final = _Py_GetBaseCodeUnit(co, len-1);
// Look up None in co_consts. // Look up None in co_consts.
Py_ssize_t nconsts = PyTuple_Size(co->co_consts); Py_ssize_t nconsts = PyTuple_Size(co->co_consts);
int none_index = 0; int none_index = 0;
@ -2093,42 +1971,26 @@ code_returns_only_none(PyCodeObject *co)
} }
if (none_index == nconsts) { if (none_index == nconsts) {
// None wasn't there, which means there was no implicit return, // None wasn't there, which means there was no implicit return,
// "return", or "return None". // "return", or "return None". That means there must be
// an explicit return (non-None).
// That means there must be return 0;
// an explicit return (non-None), or it only raises.
if (IS_RETURN_OPCODE(final.op.code)) {
// It was an explicit return (non-None).
return 0;
}
// It must end with a raise then. We still have to walk the
// bytecode to see if there's any explicit return (non-None).
assert(IS_RAISE_OPCODE(final.op.code));
for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) {
_Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i);
if (IS_RETURN_OPCODE(inst.op.code)) {
// We alraedy know it isn't returning None.
return 0;
}
}
// It must only raise.
} }
else {
// Walk the bytecode, looking for RETURN_VALUE. // Walk the bytecode, looking for RETURN_VALUE.
for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { Py_ssize_t len = Py_SIZE(co);
_Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) {
if (IS_RETURN_OPCODE(inst.op.code)) { _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i);
assert(i != 0); if (IS_RETURN_OPCODE(inst.op.code)) {
// Ignore it if it returns None. assert(i != 0);
_Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1); // Ignore it if it returns None.
if (prev.op.code == LOAD_CONST) { _Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1);
// We don't worry about EXTENDED_ARG for now. if (prev.op.code == LOAD_CONST) {
if (prev.op.arg == none_index) { // We don't worry about EXTENDED_ARG for now.
continue; if (prev.op.arg == none_index) {
} continue;
} }
return 0;
} }
return 0;
} }
} }
return 1; return 1;

View file

@ -1,14 +1,12 @@
/* Function object implementation */ /* Function object implementation */
#include "Python.h" #include "Python.h"
#include "pycore_code.h" // _PyCode_VerifyStateless()
#include "pycore_dict.h" // _Py_INCREF_DICT() #include "pycore_dict.h" // _Py_INCREF_DICT()
#include "pycore_function.h" // _PyFunction_Vectorcall #include "pycore_function.h" // _PyFunction_Vectorcall
#include "pycore_long.h" // _PyLong_GetOne() #include "pycore_long.h" // _PyLong_GetOne()
#include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_modsupport.h" // _PyArg_NoKeywords()
#include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_object.h" // _PyObject_GC_UNTRACK()
#include "pycore_pyerrors.h" // _PyErr_Occurred() #include "pycore_pyerrors.h" // _PyErr_Occurred()
#include "pycore_setobject.h" // _PySet_NextEntry()
#include "pycore_stats.h" #include "pycore_stats.h"
@ -1242,58 +1240,6 @@ PyTypeObject PyFunction_Type = {
}; };
int
_PyFunction_VerifyStateless(PyThreadState *tstate, PyObject *func)
{
assert(!PyErr_Occurred());
assert(PyFunction_Check(func));
// Check the globals.
PyObject *globalsns = PyFunction_GET_GLOBALS(func);
if (globalsns != NULL && !PyDict_Check(globalsns)) {
_PyErr_Format(tstate, PyExc_TypeError,
"unsupported globals %R", globalsns);
return -1;
}
// Check the builtins.
PyObject *builtinsns = PyFunction_GET_BUILTINS(func);
if (builtinsns != NULL && !PyDict_Check(builtinsns)) {
_PyErr_Format(tstate, PyExc_TypeError,
"unsupported builtins %R", builtinsns);
return -1;
}
// Disallow __defaults__.
PyObject *defaults = PyFunction_GET_DEFAULTS(func);
if (defaults != NULL && defaults != Py_None && PyDict_Size(defaults) > 0)
{
_PyErr_SetString(tstate, PyExc_ValueError, "defaults not supported");
return -1;
}
// Disallow __kwdefaults__.
PyObject *kwdefaults = PyFunction_GET_KW_DEFAULTS(func);
if (kwdefaults != NULL && kwdefaults != Py_None
&& PyDict_Size(kwdefaults) > 0)
{
_PyErr_SetString(tstate, PyExc_ValueError,
"keyword defaults not supported");
return -1;
}
// Disallow __closure__.
PyObject *closure = PyFunction_GET_CLOSURE(func);
if (closure != NULL && closure != Py_None && PyTuple_GET_SIZE(closure) > 0)
{
_PyErr_SetString(tstate, PyExc_ValueError, "closures not supported");
return -1;
}
// Check the code.
PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
if (_PyCode_VerifyStateless(tstate, co, NULL, globalsns, builtinsns) < 0) {
return -1;
}
return 0;
}
static int static int
functools_copy_attr(PyObject *wrapper, PyObject *wrapped, PyObject *name) functools_copy_attr(PyObject *wrapper, PyObject *wrapped, PyObject *name)
{ {