bpo-45324: Capture data in FrozenImporter.find_spec() to use in exec_module(). (gh-28633)

Before this change we end up duplicating effort and throwing away data in FrozenImporter.find_spec().  Now we do the work once in find_spec() and the only thing we do in FrozenImporter.exec_module() is turn the raw frozen data into a code object and then exec it.

We've added _imp.find_frozen(), add an arg to _imp.get_frozen_object(), and updated FrozenImporter.  We've also moved some code around to reduce duplication, get a little more consistency in outcomes, and be more efficient.

Note that this change is mostly necessary if we want to set __file__ on frozen stdlib modules. (See https://bugs.python.org/issue21736.)

https://bugs.python.org/issue45324
This commit is contained in:
Eric Snow 2021-10-05 10:01:27 -06:00 committed by GitHub
parent b9bb74871b
commit c3d9ac8b34
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 331 additions and 120 deletions

View file

@ -169,33 +169,80 @@ exit:
return return_value;
}
PyDoc_STRVAR(_imp_get_frozen_object__doc__,
"get_frozen_object($module, name, /)\n"
PyDoc_STRVAR(_imp_find_frozen__doc__,
"find_frozen($module, name, /)\n"
"--\n"
"\n"
"Create a code object for a frozen module.");
"Return info about the corresponding frozen module (if there is one) or None.\n"
"\n"
"The returned info (a 2-tuple):\n"
"\n"
" * data the raw marshalled bytes\n"
" * is_package whether or not it is a package");
#define _IMP_GET_FROZEN_OBJECT_METHODDEF \
{"get_frozen_object", (PyCFunction)_imp_get_frozen_object, METH_O, _imp_get_frozen_object__doc__},
#define _IMP_FIND_FROZEN_METHODDEF \
{"find_frozen", (PyCFunction)_imp_find_frozen, METH_O, _imp_find_frozen__doc__},
static PyObject *
_imp_get_frozen_object_impl(PyObject *module, PyObject *name);
_imp_find_frozen_impl(PyObject *module, PyObject *name);
static PyObject *
_imp_get_frozen_object(PyObject *module, PyObject *arg)
_imp_find_frozen(PyObject *module, PyObject *arg)
{
PyObject *return_value = NULL;
PyObject *name;
if (!PyUnicode_Check(arg)) {
_PyArg_BadArgument("get_frozen_object", "argument", "str", arg);
_PyArg_BadArgument("find_frozen", "argument", "str", arg);
goto exit;
}
if (PyUnicode_READY(arg) == -1) {
goto exit;
}
name = arg;
return_value = _imp_get_frozen_object_impl(module, name);
return_value = _imp_find_frozen_impl(module, name);
exit:
return return_value;
}
PyDoc_STRVAR(_imp_get_frozen_object__doc__,
"get_frozen_object($module, name, data=None, /)\n"
"--\n"
"\n"
"Create a code object for a frozen module.");
#define _IMP_GET_FROZEN_OBJECT_METHODDEF \
{"get_frozen_object", (PyCFunction)(void(*)(void))_imp_get_frozen_object, METH_FASTCALL, _imp_get_frozen_object__doc__},
static PyObject *
_imp_get_frozen_object_impl(PyObject *module, PyObject *name,
PyObject *dataobj);
static PyObject *
_imp_get_frozen_object(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
PyObject *name;
PyObject *dataobj = Py_None;
if (!_PyArg_CheckPositional("get_frozen_object", nargs, 1, 2)) {
goto exit;
}
if (!PyUnicode_Check(args[0])) {
_PyArg_BadArgument("get_frozen_object", "argument 1", "str", args[0]);
goto exit;
}
if (PyUnicode_READY(args[0]) == -1) {
goto exit;
}
name = args[0];
if (nargs < 2) {
goto skip_optional;
}
dataobj = args[1];
skip_optional:
return_value = _imp_get_frozen_object_impl(module, name, dataobj);
exit:
return return_value;
@ -498,4 +545,4 @@ exit:
#ifndef _IMP_EXEC_DYNAMIC_METHODDEF
#define _IMP_EXEC_DYNAMIC_METHODDEF
#endif /* !defined(_IMP_EXEC_DYNAMIC_METHODDEF) */
/*[clinic end generated code: output=96038c277119d6e3 input=a9049054013a1b77]*/
/*[clinic end generated code: output=a31e1c00653359ff input=a9049054013a1b77]*/

View file

@ -887,10 +887,6 @@ _imp__fix_co_filename_impl(PyObject *module, PyCodeObject *code,
}
/* Forward */
static const struct _frozen * find_frozen(PyObject *);
/* Helper to test for built-in module */
static int
@ -1119,75 +1115,125 @@ list_frozen_module_names()
return names;
}
static const struct _frozen *
find_frozen(PyObject *modname)
typedef enum {
FROZEN_OKAY,
FROZEN_BAD_NAME, // The given module name wasn't valid.
FROZEN_NOT_FOUND, // It wasn't in PyImport_FrozenModules.
FROZEN_DISABLED, // -X frozen_modules=off (and not essential)
FROZEN_EXCLUDED, // The PyImport_FrozenModules entry has NULL "code".
FROZEN_INVALID, // The PyImport_FrozenModules entry is bogus.
} frozen_status;
static inline void
set_frozen_error(frozen_status status, PyObject *modname)
{
if (modname == NULL) {
return NULL;
const char *err = NULL;
switch (status) {
case FROZEN_BAD_NAME:
case FROZEN_NOT_FOUND:
case FROZEN_DISABLED:
err = "No such frozen object named %R";
break;
case FROZEN_EXCLUDED:
err = "Excluded frozen object named %R";
break;
case FROZEN_INVALID:
err = "Frozen object named %R is invalid";
break;
case FROZEN_OKAY:
// There was no error.
break;
default:
Py_UNREACHABLE();
}
const char *name = PyUnicode_AsUTF8(modname);
if (err != NULL) {
PyObject *msg = PyUnicode_FromFormat(err, modname);
if (msg == NULL) {
PyErr_Clear();
}
PyErr_SetImportError(msg, modname, NULL);
Py_XDECREF(msg);
}
}
struct frozen_info {
PyObject *nameobj;
const char *data;
Py_ssize_t size;
bool is_package;
};
static frozen_status
find_frozen(PyObject *nameobj, struct frozen_info *info)
{
if (info != NULL) {
info->nameobj = NULL;
info->data = NULL;
info->size = 0;
info->is_package = false;
}
if (nameobj == NULL || nameobj == Py_None) {
return FROZEN_BAD_NAME;
}
const char *name = PyUnicode_AsUTF8(nameobj);
if (name == NULL) {
// Note that this function previously used
// _PyUnicode_EqualToASCIIString(). We clear the error here
// (instead of propagating it) to match the earlier behavior
// more closely.
PyErr_Clear();
return NULL;
return FROZEN_BAD_NAME;
}
if (!use_frozen() && !is_essential_frozen_module(name)) {
return NULL;
return FROZEN_DISABLED;
}
const struct _frozen *p;
for (p = PyImport_FrozenModules; ; p++) {
if (p->name == NULL) {
return NULL;
// We hit the end-of-list sentinel value.
return FROZEN_NOT_FOUND;
}
if (strcmp(name, p->name) == 0) {
break;
}
}
return p;
}
static PyObject *
get_frozen_object(PyObject *name)
{
const struct _frozen *p = find_frozen(name);
int size;
if (p == NULL) {
PyErr_Format(PyExc_ImportError,
"No such frozen object named %R",
name);
return NULL;
if (info != NULL) {
info->nameobj = nameobj; // borrowed
info->data = (const char *)p->code;
info->size = p->size < 0 ? -(p->size) : p->size;
info->is_package = p->size < 0 ? true : false;
}
if (p->code == NULL) {
PyErr_Format(PyExc_ImportError,
"Excluded frozen object named %R",
name);
return NULL;
/* It is frozen but marked as un-importable. */
return FROZEN_EXCLUDED;
}
size = p->size;
if (size < 0)
size = -size;
return PyMarshal_ReadObjectFromString((const char *)p->code, size);
if (p->code[0] == '\0' || p->size == 0) {
return FROZEN_INVALID;
}
return FROZEN_OKAY;
}
static PyObject *
is_frozen_package(PyObject *name)
unmarshal_frozen_code(struct frozen_info *info)
{
const struct _frozen *p = find_frozen(name);
int size;
if (p == NULL) {
PyErr_Format(PyExc_ImportError,
"No such frozen object named %R",
name);
PyObject *co = PyMarshal_ReadObjectFromString(info->data, info->size);
if (co == NULL) {
set_frozen_error(FROZEN_INVALID, info->nameobj);
return NULL;
}
size = p->size;
if (size < 0)
Py_RETURN_TRUE;
else
Py_RETURN_FALSE;
if (!PyCode_Check(co)) {
// We stick with TypeError for backward compatibility.
PyErr_Format(PyExc_TypeError,
"frozen object %R is not a code object",
info->nameobj);
Py_DECREF(co);
return NULL;
}
return co;
}
@ -1200,35 +1246,25 @@ int
PyImport_ImportFrozenModuleObject(PyObject *name)
{
PyThreadState *tstate = _PyThreadState_GET();
const struct _frozen *p;
PyObject *co, *m, *d;
int ispackage;
int size;
p = find_frozen(name);
if (p == NULL)
struct frozen_info info;
frozen_status status = find_frozen(name, &info);
if (status == FROZEN_NOT_FOUND || status == FROZEN_DISABLED) {
return 0;
if (p->code == NULL) {
_PyErr_Format(tstate, PyExc_ImportError,
"Excluded frozen object named %R",
name);
}
else if (status == FROZEN_BAD_NAME) {
return 0;
}
else if (status != FROZEN_OKAY) {
set_frozen_error(status, name);
return -1;
}
size = p->size;
ispackage = (size < 0);
if (ispackage)
size = -size;
co = PyMarshal_ReadObjectFromString((const char *)p->code, size);
if (co == NULL)
co = unmarshal_frozen_code(&info);
if (co == NULL) {
return -1;
if (!PyCode_Check(co)) {
_PyErr_Format(tstate, PyExc_TypeError,
"frozen object %R is not a code object",
name);
goto err_return;
}
if (ispackage) {
if (info.is_package) {
/* Set __path__ to the empty list */
PyObject *l;
int err;
@ -1966,20 +2002,83 @@ _imp_init_frozen_impl(PyObject *module, PyObject *name)
return import_add_module(tstate, name);
}
/*[clinic input]
_imp.find_frozen
name: unicode
/
Return info about the corresponding frozen module (if there is one) or None.
The returned info (a 2-tuple):
* data the raw marshalled bytes
* is_package whether or not it is a package
[clinic start generated code]*/
static PyObject *
_imp_find_frozen_impl(PyObject *module, PyObject *name)
/*[clinic end generated code: output=3fd17da90d417e4e input=4e52b3ac95f6d7ab]*/
{
struct frozen_info info;
frozen_status status = find_frozen(name, &info);
if (status == FROZEN_NOT_FOUND || status == FROZEN_DISABLED) {
Py_RETURN_NONE;
}
else if (status == FROZEN_BAD_NAME) {
Py_RETURN_NONE;
}
else if (status != FROZEN_OKAY) {
set_frozen_error(status, name);
return NULL;
}
PyObject *data = PyBytes_FromStringAndSize(info.data, info.size);
if (data == NULL) {
return NULL;
}
PyObject *result = PyTuple_Pack(2, data,
info.is_package ? Py_True : Py_False);
Py_DECREF(data);
return result;
}
/*[clinic input]
_imp.get_frozen_object
name: unicode
data as dataobj: object = None
/
Create a code object for a frozen module.
[clinic start generated code]*/
static PyObject *
_imp_get_frozen_object_impl(PyObject *module, PyObject *name)
/*[clinic end generated code: output=2568cc5b7aa0da63 input=ed689bc05358fdbd]*/
_imp_get_frozen_object_impl(PyObject *module, PyObject *name,
PyObject *dataobj)
/*[clinic end generated code: output=54368a673a35e745 input=034bdb88f6460b7b]*/
{
return get_frozen_object(name);
struct frozen_info info;
if (PyBytes_Check(dataobj)) {
info.nameobj = name;
info.data = PyBytes_AS_STRING(dataobj);
info.size = PyBytes_Size(dataobj);
if (info.size == 0) {
set_frozen_error(FROZEN_INVALID, name);
return NULL;
}
}
else if (dataobj != Py_None) {
_PyArg_BadArgument("get_frozen_object", "argument 2", "bytes", dataobj);
return NULL;
}
else {
frozen_status status = find_frozen(name, &info);
if (status != FROZEN_OKAY) {
set_frozen_error(status, name);
return NULL;
}
}
return unmarshal_frozen_code(&info);
}
/*[clinic input]
@ -1995,7 +2094,13 @@ static PyObject *
_imp_is_frozen_package_impl(PyObject *module, PyObject *name)
/*[clinic end generated code: output=e70cbdb45784a1c9 input=81b6cdecd080fbb8]*/
{
return is_frozen_package(name);
struct frozen_info info;
frozen_status status = find_frozen(name, &info);
if (status != FROZEN_OKAY && status != FROZEN_EXCLUDED) {
set_frozen_error(status, name);
return NULL;
}
return PyBool_FromLong(info.is_package);
}
/*[clinic input]
@ -2027,10 +2132,12 @@ static PyObject *
_imp_is_frozen_impl(PyObject *module, PyObject *name)
/*[clinic end generated code: output=01f408f5ec0f2577 input=7301dbca1897d66b]*/
{
const struct _frozen *p;
p = find_frozen(name);
return PyBool_FromLong((long) (p == NULL ? 0 : p->size));
struct frozen_info info;
frozen_status status = find_frozen(name, &info);
if (status != FROZEN_OKAY) {
Py_RETURN_FALSE;
}
Py_RETURN_TRUE;
}
/*[clinic input]
@ -2221,6 +2328,7 @@ static PyMethodDef imp_methods[] = {
_IMP_LOCK_HELD_METHODDEF
_IMP_ACQUIRE_LOCK_METHODDEF
_IMP_RELEASE_LOCK_METHODDEF
_IMP_FIND_FROZEN_METHODDEF
_IMP_GET_FROZEN_OBJECT_METHODDEF
_IMP_IS_FROZEN_PACKAGE_METHODDEF
_IMP_CREATE_BUILTIN_METHODDEF