[3.14] gh-132775: Add _PyCode_GetScriptXIData() (gh-133676)

This converts functions, code, str, bytes, bytearray, and memoryview objects to PyCodeObject,
and ensure that the object looks like a script.  That means no args, no return, and no closure.
_PyCode_GetPureScriptXIData() takes it a step further and ensures there are no globals.

We also add _PyObject_SupportedAsScript() to the internal C-API.

(cherry picked from commit c81fa2b9cd, AKA gh-133480)

Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
This commit is contained in:
Miss Islington (bot) 2025-05-08 18:05:34 +02:00 committed by GitHub
parent dc1a4dda88
commit 1059548686
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 366 additions and 0 deletions

View file

@ -191,6 +191,14 @@ PyAPI_FUNC(int) _PyCode_GetXIData(
PyThreadState *,
PyObject *,
_PyXIData_t *);
PyAPI_FUNC(int) _PyCode_GetScriptXIData(
PyThreadState *,
PyObject *,
_PyXIData_t *);
PyAPI_FUNC(int) _PyCode_GetPureScriptXIData(
PyThreadState *,
PyObject *,
_PyXIData_t *);
/* using cross-interpreter data */

View file

@ -25,6 +25,7 @@ extern int _PyRun_InteractiveLoopObject(
PyObject *filename,
PyCompilerFlags *flags);
extern int _PyObject_SupportedAsScript(PyObject *);
extern const char* _Py_SourceAsString(
PyObject *cmd,
const char *funcname,

View file

@ -1,4 +1,32 @@
def simple_script():
assert True
def complex_script():
obj = 'a string'
pickle = __import__('pickle')
def spam_minimal():
pass
spam_minimal()
data = pickle.dumps(obj)
res = pickle.loads(data)
assert res == obj, (res, obj)
def script_with_globals():
obj1, obj2 = spam(42)
assert obj1 == 42
assert obj2 is None
def script_with_explicit_empty_return():
return None
def script_with_return():
return True
def spam_minimal():
# no arg defaults or kwarg defaults
@ -141,6 +169,11 @@ ham_C_closure, *_ = eggs_closure_C(2)
TOP_FUNCTIONS = [
# shallow
simple_script,
complex_script,
script_with_globals,
script_with_explicit_empty_return,
script_with_return,
spam_minimal,
spam_with_builtins,
spam_with_globals_and_builtins,
@ -179,6 +212,10 @@ FUNCTIONS = [
]
STATELESS_FUNCTIONS = [
simple_script,
complex_script,
script_with_explicit_empty_return,
script_with_return,
spam,
spam_minimal,
spam_with_builtins,
@ -200,10 +237,26 @@ STATELESS_FUNCTIONS = [
]
STATELESS_CODE = [
*STATELESS_FUNCTIONS,
script_with_globals,
spam_with_globals_and_builtins,
spam_full,
]
PURE_SCRIPT_FUNCTIONS = [
simple_script,
complex_script,
script_with_explicit_empty_return,
spam_minimal,
spam_with_builtins,
spam_with_inner_not_closure,
spam_with_inner_closure,
]
SCRIPT_FUNCTIONS = [
*PURE_SCRIPT_FUNCTIONS,
script_with_globals,
spam_with_globals_and_builtins,
]
# generators

View file

@ -673,6 +673,20 @@ class CodeTest(unittest.TestCase):
VARKWARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_KW
funcs = {
defs.simple_script: {},
defs.complex_script: {
'obj': CO_FAST_LOCAL,
'pickle': CO_FAST_LOCAL,
'spam_minimal': CO_FAST_LOCAL,
'data': CO_FAST_LOCAL,
'res': CO_FAST_LOCAL,
},
defs.script_with_globals: {
'obj1': CO_FAST_LOCAL,
'obj2': CO_FAST_LOCAL,
},
defs.script_with_explicit_empty_return: {},
defs.script_with_return: {},
defs.spam_minimal: {},
defs.spam_with_builtins: {
'x': CO_FAST_LOCAL,
@ -898,6 +912,19 @@ class CodeTest(unittest.TestCase):
}
funcs = {
defs.simple_script: new_var_counts(),
defs.complex_script: new_var_counts(
purelocals=5,
globalvars=1,
attrs=2,
),
defs.script_with_globals: new_var_counts(
purelocals=2,
globalvars=1,
),
defs.script_with_explicit_empty_return: new_var_counts(),
defs.script_with_return: new_var_counts(),
defs.spam_minimal: new_var_counts(),
defs.spam_minimal: new_var_counts(),
defs.spam_with_builtins: new_var_counts(
purelocals=4,

View file

@ -758,6 +758,126 @@ class CodeTests(_GetXIDataTests):
])
class PureShareableScriptTests(_GetXIDataTests):
MODE = 'script-pure'
VALID_SCRIPTS = [
'',
'spam',
'# a comment',
'print("spam")',
'raise Exception("spam")',
"""if True:
do_something()
""",
"""if True:
def spam(x):
return x
class Spam:
def eggs(self):
return 42
x = Spam().eggs()
raise ValueError(spam(x))
""",
]
INVALID_SCRIPTS = [
' pass', # IndentationError
'----', # SyntaxError
"""if True:
def spam():
# no body
spam()
""", # IndentationError
]
def test_valid_str(self):
self.assert_roundtrip_not_equal([
*self.VALID_SCRIPTS,
], expecttype=types.CodeType)
def test_invalid_str(self):
self.assert_not_shareable([
*self.INVALID_SCRIPTS,
])
def test_valid_bytes(self):
self.assert_roundtrip_not_equal([
*(s.encode('utf8') for s in self.VALID_SCRIPTS),
], expecttype=types.CodeType)
def test_invalid_bytes(self):
self.assert_not_shareable([
*(s.encode('utf8') for s in self.INVALID_SCRIPTS),
])
def test_pure_script_code(self):
self.assert_roundtrip_equal_not_identical([
*(f.__code__ for f in defs.PURE_SCRIPT_FUNCTIONS),
])
def test_impure_script_code(self):
self.assert_not_shareable([
*(f.__code__ for f in defs.SCRIPT_FUNCTIONS
if f not in defs.PURE_SCRIPT_FUNCTIONS),
])
def test_other_code(self):
self.assert_not_shareable([
*(f.__code__ for f in defs.FUNCTIONS
if f not in defs.SCRIPT_FUNCTIONS),
*(f.__code__ for f in defs.FUNCTION_LIKE),
])
def test_pure_script_function(self):
self.assert_roundtrip_not_equal([
*defs.PURE_SCRIPT_FUNCTIONS,
], expecttype=types.CodeType)
def test_impure_script_function(self):
self.assert_not_shareable([
*(f for f in defs.SCRIPT_FUNCTIONS
if f not in defs.PURE_SCRIPT_FUNCTIONS),
])
def test_other_function(self):
self.assert_not_shareable([
*(f for f in defs.FUNCTIONS
if f not in defs.SCRIPT_FUNCTIONS),
*defs.FUNCTION_LIKE,
])
def test_other_objects(self):
self.assert_not_shareable([
None,
True,
False,
Ellipsis,
NotImplemented,
(),
[],
{},
object(),
])
class ShareableScriptTests(PureShareableScriptTests):
MODE = 'script'
def test_impure_script_code(self):
self.assert_roundtrip_equal_not_identical([
*(f.__code__ for f in defs.SCRIPT_FUNCTIONS
if f not in defs.PURE_SCRIPT_FUNCTIONS),
])
def test_impure_script_function(self):
self.assert_roundtrip_not_equal([
*(f for f in defs.SCRIPT_FUNCTIONS
if f not in defs.PURE_SCRIPT_FUNCTIONS),
], expecttype=types.CodeType)
class ShareableTypeTests(_GetXIDataTests):
MODE = 'xidata'

View file

@ -1989,6 +1989,16 @@ get_crossinterp_data(PyObject *self, PyObject *args, PyObject *kwargs)
goto error;
}
}
else if (strcmp(mode, "script") == 0) {
if (_PyCode_GetScriptXIData(tstate, obj, xidata) != 0) {
goto error;
}
}
else if (strcmp(mode, "script-pure") == 0) {
if (_PyCode_GetPureScriptXIData(tstate, obj, xidata) != 0) {
goto error;
}
}
else {
PyErr_Format(PyExc_ValueError, "unsupported mode %R", modeobj);
goto error;

View file

@ -6,8 +6,10 @@
#include "osdefs.h" // MAXPATHLEN
#include "pycore_ceval.h" // _Py_simple_func
#include "pycore_crossinterp.h" // _PyXIData_t
#include "pycore_function.h" // _PyFunction_VerifyStateless()
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_namespace.h" // _PyNamespace_New()
#include "pycore_pythonrun.h" // _Py_SourceAsString()
#include "pycore_typeobject.h" // _PyStaticType_InitBuiltin()
@ -784,6 +786,131 @@ _PyMarshal_GetXIData(PyThreadState *tstate, PyObject *obj, _PyXIData_t *xidata)
}
/* script wrapper */
static int
verify_script(PyThreadState *tstate, PyCodeObject *co, int checked, int pure)
{
// Make sure it isn't a closure and (optionally) doesn't use globals.
PyObject *builtins = NULL;
if (pure) {
builtins = _PyEval_GetBuiltins(tstate);
assert(builtins != NULL);
}
if (checked) {
assert(_PyCode_VerifyStateless(tstate, co, NULL, NULL, builtins) == 0);
}
else if (_PyCode_VerifyStateless(tstate, co, NULL, NULL, builtins) < 0) {
return -1;
}
// Make sure it doesn't have args.
if (co->co_argcount > 0
|| co->co_posonlyargcount > 0
|| co->co_kwonlyargcount > 0
|| co->co_flags & (CO_VARARGS | CO_VARKEYWORDS))
{
_PyErr_SetString(tstate, PyExc_ValueError,
"code with args not supported");
return -1;
}
// Make sure it doesn't return anything.
if (!_PyCode_ReturnsOnlyNone(co)) {
_PyErr_SetString(tstate, PyExc_ValueError,
"code that returns a value is not a script");
return -1;
}
return 0;
}
static int
get_script_xidata(PyThreadState *tstate, PyObject *obj, int pure,
_PyXIData_t *xidata)
{
// Get the corresponding code object.
PyObject *code = NULL;
int checked = 0;
if (PyCode_Check(obj)) {
code = obj;
Py_INCREF(code);
}
else if (PyFunction_Check(obj)) {
code = PyFunction_GET_CODE(obj);
assert(code != NULL);
Py_INCREF(code);
if (pure) {
if (_PyFunction_VerifyStateless(tstate, obj) < 0) {
goto error;
}
checked = 1;
}
}
else {
const char *filename = "<script>";
int optimize = 0;
PyCompilerFlags cf = _PyCompilerFlags_INIT;
cf.cf_flags = PyCF_SOURCE_IS_UTF8;
PyObject *ref = NULL;
const char *script = _Py_SourceAsString(obj, "???", "???", &cf, &ref);
if (script == NULL) {
if (!_PyObject_SupportedAsScript(obj)) {
// We discard the raised exception.
_PyErr_Format(tstate, PyExc_TypeError,
"unsupported script %R", obj);
}
goto error;
}
code = Py_CompileStringExFlags(
script, filename, Py_file_input, &cf, optimize);
Py_XDECREF(ref);
if (code == NULL) {
goto error;
}
// Compiled text can't have args or any return statements,
// nor be a closure. It can use globals though.
if (!pure) {
// We don't need to check for globals either.
checked = 1;
}
}
// Make sure it's actually a script.
if (verify_script(tstate, (PyCodeObject *)code, checked, pure) < 0) {
goto error;
}
// Convert the code object.
int res = _PyCode_GetXIData(tstate, code, xidata);
Py_DECREF(code);
if (res < 0) {
return -1;
}
return 0;
error:
Py_XDECREF(code);
PyObject *cause = _PyErr_GetRaisedException(tstate);
assert(cause != NULL);
_set_xid_lookup_failure(
tstate, NULL, "object not a valid script", cause);
Py_DECREF(cause);
return -1;
}
int
_PyCode_GetScriptXIData(PyThreadState *tstate,
PyObject *obj, _PyXIData_t *xidata)
{
return get_script_xidata(tstate, obj, 0, xidata);
}
int
_PyCode_GetPureScriptXIData(PyThreadState *tstate,
PyObject *obj, _PyXIData_t *xidata)
{
return get_script_xidata(tstate, obj, 1, xidata);
}
/* using cross-interpreter data */
PyObject *

View file

@ -1524,6 +1524,26 @@ Py_CompileStringExFlags(const char *str, const char *filename_str, int start,
return co;
}
int
_PyObject_SupportedAsScript(PyObject *cmd)
{
if (PyUnicode_Check(cmd)) {
return 1;
}
else if (PyBytes_Check(cmd)) {
return 1;
}
else if (PyByteArray_Check(cmd)) {
return 1;
}
else if (PyObject_CheckBuffer(cmd)) {
return 1;
}
else {
return 0;
}
}
const char *
_Py_SourceAsString(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, PyObject **cmd_copy)
{