[3.14] gh-132775: Unrevert "Add _PyCode_VerifyStateless()" (gh-133625)

This reverts commit 3c73cf5 (gh-133497), which itself reverted
the original commit d270bb5 (gh-133221).

We reverted the original change due to failing android tests.
The checks in _PyCode_CheckNoInternalState() were too strict,
so we've relaxed them.
This commit is contained in:
Eric Snow 2025-05-07 18:00:33 -06:00 committed by GitHub
parent 54c3aa1597
commit c39bc81b70
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 359 additions and 38 deletions

View file

@ -1955,12 +1955,130 @@ finally:
}
int
_PyCode_CheckNoInternalState(PyCodeObject *co, const char **p_errmsg)
{
const char *errmsg = NULL;
// We don't worry about co_executors, co_instrumentation,
// or co_monitoring. They are essentially ephemeral.
if (co->co_extra != NULL) {
errmsg = "only basic code objects are supported";
}
if (errmsg != NULL) {
if (p_errmsg != NULL) {
*p_errmsg = errmsg;
}
return 0;
}
return 1;
}
int
_PyCode_CheckNoExternalState(PyCodeObject *co, _PyCode_var_counts_t *counts,
const char **p_errmsg)
{
const char *errmsg = NULL;
assert(counts->locals.hidden.total == 0);
if (counts->numfree > 0) { // It's a closure.
errmsg = "closures not supported";
}
else if (counts->unbound.globals.numglobal > 0) {
errmsg = "globals not supported";
}
else if (counts->unbound.globals.numbuiltin > 0
&& counts->unbound.globals.numunknown > 0)
{
errmsg = "globals not supported";
}
// Otherwise we don't check counts.unbound.globals.numunknown since we can't
// distinguish beween globals and builtins here.
if (errmsg != NULL) {
if (p_errmsg != NULL) {
*p_errmsg = errmsg;
}
return 0;
}
return 1;
}
int
_PyCode_VerifyStateless(PyThreadState *tstate,
PyCodeObject *co, PyObject *globalnames,
PyObject *globalsns, PyObject *builtinsns)
{
const char *errmsg;
_PyCode_var_counts_t counts = {0};
_PyCode_GetVarCounts(co, &counts);
if (_PyCode_SetUnboundVarCounts(
tstate, co, &counts, globalnames, NULL,
globalsns, builtinsns) < 0)
{
return -1;
}
// We may consider relaxing the internal state constraints
// if it becomes a problem.
if (!_PyCode_CheckNoInternalState(co, &errmsg)) {
_PyErr_SetString(tstate, PyExc_ValueError, errmsg);
return -1;
}
if (builtinsns != NULL) {
// Make sure the next check will fail for globals,
// even if there aren't any builtins.
counts.unbound.globals.numbuiltin += 1;
}
if (!_PyCode_CheckNoExternalState(co, &counts, &errmsg)) {
_PyErr_SetString(tstate, PyExc_ValueError, errmsg);
return -1;
}
// Note that we don't check co->co_flags & CO_NESTED for anything here.
return 0;
}
int
_PyCode_CheckPureFunction(PyCodeObject *co, const char **p_errmsg)
{
const char *errmsg = NULL;
if (co->co_flags & CO_GENERATOR) {
errmsg = "generators not supported";
}
else if (co->co_flags & CO_COROUTINE) {
errmsg = "coroutines not supported";
}
else if (co->co_flags & CO_ITERABLE_COROUTINE) {
errmsg = "coroutines not supported";
}
else if (co->co_flags & CO_ASYNC_GENERATOR) {
errmsg = "generators not supported";
}
if (errmsg != NULL) {
if (p_errmsg != NULL) {
*p_errmsg = errmsg;
}
return 0;
}
return 1;
}
/* Here "value" means a non-None value, since a bare return is identical
* to returning None explicitly. Likewise a missing return statement
* at the end of the function is turned into "return None". */
static int
code_returns_only_none(PyCodeObject *co)
{
if (!_PyCode_CheckPureFunction(co, NULL)) {
return 0;
}
int len = (int)Py_SIZE(co);
assert(len > 0);
// The last instruction either returns or raises. We can take advantage
// of that for a quick exit.
_Py_CODEUNIT final = _Py_GetBaseCodeUnit(co, len-1);
// Look up None in co_consts.
Py_ssize_t nconsts = PyTuple_Size(co->co_consts);
int none_index = 0;
@ -1971,27 +2089,43 @@ code_returns_only_none(PyCodeObject *co)
}
if (none_index == nconsts) {
// None wasn't there, which means there was no implicit return,
// "return", or "return None". That means there must be
// an explicit return (non-None).
return 0;
}
// "return", or "return None".
// Walk the bytecode, looking for RETURN_VALUE.
Py_ssize_t len = Py_SIZE(co);
for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) {
_Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i);
if (IS_RETURN_OPCODE(inst.op.code)) {
assert(i != 0);
// Ignore it if it returns None.
_Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1);
if (prev.op.code == LOAD_CONST) {
// We don't worry about EXTENDED_ARG for now.
if (prev.op.arg == none_index) {
continue;
}
}
// That means there must be
// an explicit return (non-None), or it only raises.
if (IS_RETURN_OPCODE(final.op.code)) {
// It was an explicit return (non-None).
return 0;
}
// It must end with a raise then. We still have to walk the
// bytecode to see if there's any explicit return (non-None).
assert(IS_RAISE_OPCODE(final.op.code));
for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) {
_Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i);
if (IS_RETURN_OPCODE(inst.op.code)) {
// We alraedy know it isn't returning None.
return 0;
}
}
// It must only raise.
}
else {
// Walk the bytecode, looking for RETURN_VALUE.
for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) {
_Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i);
if (IS_RETURN_OPCODE(inst.op.code)) {
assert(i != 0);
// Ignore it if it returns None.
_Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1);
if (prev.op.code == LOAD_CONST) {
// We don't worry about EXTENDED_ARG for now.
if (prev.op.arg == none_index) {
continue;
}
}
return 0;
}
}
}
return 1;
}

View file

@ -1,12 +1,14 @@
/* Function object implementation */
#include "Python.h"
#include "pycore_code.h" // _PyCode_VerifyStateless()
#include "pycore_dict.h" // _Py_INCREF_DICT()
#include "pycore_function.h" // _PyFunction_Vectorcall
#include "pycore_long.h" // _PyLong_GetOne()
#include "pycore_modsupport.h" // _PyArg_NoKeywords()
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
#include "pycore_pyerrors.h" // _PyErr_Occurred()
#include "pycore_setobject.h" // _PySet_NextEntry()
#include "pycore_stats.h"
@ -1240,6 +1242,58 @@ PyTypeObject PyFunction_Type = {
};
int
_PyFunction_VerifyStateless(PyThreadState *tstate, PyObject *func)
{
assert(!PyErr_Occurred());
assert(PyFunction_Check(func));
// Check the globals.
PyObject *globalsns = PyFunction_GET_GLOBALS(func);
if (globalsns != NULL && !PyDict_Check(globalsns)) {
_PyErr_Format(tstate, PyExc_TypeError,
"unsupported globals %R", globalsns);
return -1;
}
// Check the builtins.
PyObject *builtinsns = PyFunction_GET_BUILTINS(func);
if (builtinsns != NULL && !PyDict_Check(builtinsns)) {
_PyErr_Format(tstate, PyExc_TypeError,
"unsupported builtins %R", builtinsns);
return -1;
}
// Disallow __defaults__.
PyObject *defaults = PyFunction_GET_DEFAULTS(func);
if (defaults != NULL && defaults != Py_None && PyDict_Size(defaults) > 0)
{
_PyErr_SetString(tstate, PyExc_ValueError, "defaults not supported");
return -1;
}
// Disallow __kwdefaults__.
PyObject *kwdefaults = PyFunction_GET_KW_DEFAULTS(func);
if (kwdefaults != NULL && kwdefaults != Py_None
&& PyDict_Size(kwdefaults) > 0)
{
_PyErr_SetString(tstate, PyExc_ValueError,
"keyword defaults not supported");
return -1;
}
// Disallow __closure__.
PyObject *closure = PyFunction_GET_CLOSURE(func);
if (closure != NULL && closure != Py_None && PyTuple_GET_SIZE(closure) > 0)
{
_PyErr_SetString(tstate, PyExc_ValueError, "closures not supported");
return -1;
}
// Check the code.
PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
if (_PyCode_VerifyStateless(tstate, co, NULL, globalsns, builtinsns) < 0) {
return -1;
}
return 0;
}
static int
functools_copy_attr(PyObject *wrapper, PyObject *wrapped, PyObject *name)
{