mirror of
https://github.com/python/cpython.git
synced 2025-07-12 13:55:34 +00:00
gh-117953: Always Run Extension Init Func in Main Interpreter First (gh-118157)
This change makes sure all extension/builtin modules have their init function run first by the main interpreter before proceeding with import in the original interpreter (main or otherwise). This means when the import of a single-phase init module fails in an isolated subinterpreter, it won't tie any global state/callbacks to the subinterpreter.
This commit is contained in:
parent
1a23716d4b
commit
b2cd54a4fb
4 changed files with 217 additions and 70 deletions
|
@ -2454,10 +2454,6 @@ class SinglephaseInitTests(unittest.TestCase):
|
||||||
# Start fresh.
|
# Start fresh.
|
||||||
cls.clean_up()
|
cls.clean_up()
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def tearDownClass(cls):
|
|
||||||
restore__testsinglephase()
|
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
# Clean up the module.
|
# Clean up the module.
|
||||||
self.clean_up()
|
self.clean_up()
|
||||||
|
@ -3014,20 +3010,20 @@ class SinglephaseInitTests(unittest.TestCase):
|
||||||
# * alive in 0 interpreters
|
# * alive in 0 interpreters
|
||||||
# * module def in _PyRuntime.imports.extensions
|
# * module def in _PyRuntime.imports.extensions
|
||||||
# * mod init func ran for the first time (since reset)
|
# * mod init func ran for the first time (since reset)
|
||||||
# * m_copy is NULL (claered when the interpreter was destroyed)
|
# * m_copy is still set (owned by main interpreter)
|
||||||
# * module's global state was initialized, not reset
|
# * module's global state was initialized, not reset
|
||||||
|
|
||||||
# Use a subinterpreter that sticks around.
|
# Use a subinterpreter that sticks around.
|
||||||
loaded_interp1 = self.import_in_subinterp(interpid1)
|
loaded_interp1 = self.import_in_subinterp(interpid1)
|
||||||
self.check_common(loaded_interp1)
|
self.check_common(loaded_interp1)
|
||||||
self.check_semi_fresh(loaded_interp1, loaded_main, base)
|
self.check_copied(loaded_interp1, base)
|
||||||
|
|
||||||
# At this point:
|
# At this point:
|
||||||
# * alive in 1 interpreter (interp1)
|
# * alive in 1 interpreter (interp1)
|
||||||
# * module def still in _PyRuntime.imports.extensions
|
# * module def still in _PyRuntime.imports.extensions
|
||||||
# * mod init func ran for the second time (since reset)
|
# * mod init func did not run again
|
||||||
# * m_copy was copied from interp1 (was NULL)
|
# * m_copy was not changed
|
||||||
# * module's global state was updated, not reset
|
# * module's global state was not touched
|
||||||
|
|
||||||
# Use a subinterpreter while the previous one is still alive.
|
# Use a subinterpreter while the previous one is still alive.
|
||||||
loaded_interp2 = self.import_in_subinterp(interpid2)
|
loaded_interp2 = self.import_in_subinterp(interpid2)
|
||||||
|
@ -3038,8 +3034,8 @@ class SinglephaseInitTests(unittest.TestCase):
|
||||||
# * alive in 2 interpreters (interp1, interp2)
|
# * alive in 2 interpreters (interp1, interp2)
|
||||||
# * module def still in _PyRuntime.imports.extensions
|
# * module def still in _PyRuntime.imports.extensions
|
||||||
# * mod init func did not run again
|
# * mod init func did not run again
|
||||||
# * m_copy was copied from interp2 (was from interp1)
|
# * m_copy was not changed
|
||||||
# * module's global state was updated, not reset
|
# * module's global state was not touched
|
||||||
|
|
||||||
@requires_subinterpreters
|
@requires_subinterpreters
|
||||||
def test_basic_multiple_interpreters_reset_each(self):
|
def test_basic_multiple_interpreters_reset_each(self):
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import os
|
import os
|
||||||
from test.support import load_package_tests
|
from test.support import load_package_tests, Py_GIL_DISABLED
|
||||||
|
|
||||||
def load_tests(*args):
|
if not Py_GIL_DISABLED:
|
||||||
|
def load_tests(*args):
|
||||||
return load_package_tests(os.path.dirname(__file__), *args)
|
return load_package_tests(os.path.dirname(__file__), *args)
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
When a builtin or extension module is imported for the first time, while a
|
||||||
|
subinterpreter is active, the module's init function is now run by the main
|
||||||
|
interpreter first before import continues in the subinterpreter.
|
||||||
|
Consequently, single-phase init modules now fail in an isolated
|
||||||
|
subinterpreter without the init function running under that interpreter,
|
||||||
|
whereas before it would run under the subinterpreter *before* failing,
|
||||||
|
potentially leaving behind global state and callbacks and otherwise leaving
|
||||||
|
the module in an inconsistent state.
|
194
Python/import.c
194
Python/import.c
|
@ -1154,9 +1154,6 @@ init_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
// XXX We may want to make the copy immortal.
|
// XXX We may want to make the copy immortal.
|
||||||
// XXX This incref shouldn't be necessary. We are decref'ing
|
|
||||||
// one to many times somewhere.
|
|
||||||
Py_INCREF(copied);
|
|
||||||
|
|
||||||
value->_m_dict = (struct cached_m_dict){
|
value->_m_dict = (struct cached_m_dict){
|
||||||
.copied=copied,
|
.copied=copied,
|
||||||
|
@ -1580,6 +1577,26 @@ _PyImport_CheckGILForModule(PyObject* module, PyObject *module_name)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static PyThreadState *
|
||||||
|
switch_to_main_interpreter(PyThreadState *tstate)
|
||||||
|
{
|
||||||
|
if (_Py_IsMainInterpreter(tstate->interp)) {
|
||||||
|
return tstate;
|
||||||
|
}
|
||||||
|
PyThreadState *main_tstate = PyThreadState_New(_PyInterpreterState_Main());
|
||||||
|
if (main_tstate == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
main_tstate->_whence = _PyThreadState_WHENCE_EXEC;
|
||||||
|
#ifndef NDEBUG
|
||||||
|
PyThreadState *old_tstate = PyThreadState_Swap(main_tstate);
|
||||||
|
assert(old_tstate == tstate);
|
||||||
|
#else
|
||||||
|
(void)PyThreadState_Swap(main_tstate);
|
||||||
|
#endif
|
||||||
|
return main_tstate;
|
||||||
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
get_core_module_dict(PyInterpreterState *interp,
|
get_core_module_dict(PyInterpreterState *interp,
|
||||||
PyObject *name, PyObject *path)
|
PyObject *name, PyObject *path)
|
||||||
|
@ -1936,13 +1953,72 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
|
||||||
struct extensions_cache_value *cached = NULL;
|
struct extensions_cache_value *cached = NULL;
|
||||||
const char *name_buf = PyBytes_AS_STRING(info->name_encoded);
|
const char *name_buf = PyBytes_AS_STRING(info->name_encoded);
|
||||||
|
|
||||||
struct _Py_ext_module_loader_result res;
|
/* We cannot know if the module is single-phase init or
|
||||||
if (_PyImport_RunModInitFunc(p0, info, &res) < 0) {
|
* multi-phase init until after we call its init function. Even
|
||||||
/* We discard res.def. */
|
* in isolated interpreters (that do not support single-phase init),
|
||||||
assert(res.module == NULL);
|
* the init function will run without restriction. For multi-phase
|
||||||
_Py_ext_module_loader_result_apply_error(&res, name_buf);
|
* init modules that isn't a problem because the init function only
|
||||||
|
* runs PyModuleDef_Init() on the module's def and then returns it.
|
||||||
|
*
|
||||||
|
* However, for single-phase init the module's init function will
|
||||||
|
* create the module, create other objects (and allocate other
|
||||||
|
* memory), populate it and its module state, and initialze static
|
||||||
|
* types. Some modules store other objects and data in global C
|
||||||
|
* variables and register callbacks with the runtime/stdlib or
|
||||||
|
* even external libraries (which is part of why we can't just
|
||||||
|
* dlclose() the module in the error case). That's a problem
|
||||||
|
* for isolated interpreters since all of the above happens
|
||||||
|
* and only then * will the import fail. Memory will leak,
|
||||||
|
* callbacks will still get used, and sometimes there
|
||||||
|
* will be crashes (memory access violations
|
||||||
|
* and use-after-free).
|
||||||
|
*
|
||||||
|
* To put it another way, if the module is single-phase init
|
||||||
|
* then the import will probably break interpreter isolation
|
||||||
|
* and should fail ASAP. However, the module's init function
|
||||||
|
* will still get run. That means it may still store state
|
||||||
|
* in the shared-object/DLL address space (which never gets
|
||||||
|
* closed/cleared), including objects (e.g. static types).
|
||||||
|
* This is a problem for isolated subinterpreters since each
|
||||||
|
* has its own object allocator. If the loaded shared-object
|
||||||
|
* still holds a reference to an object after the corresponding
|
||||||
|
* interpreter has finalized then either we must let it leak
|
||||||
|
* or else any later use of that object by another interpreter
|
||||||
|
* (or across multiple init-fini cycles) will crash the process.
|
||||||
|
*
|
||||||
|
* To avoid all of that, we make sure the module's init function
|
||||||
|
* is always run first with the main interpreter active. If it was
|
||||||
|
* already the main interpreter then we can continue loading the
|
||||||
|
* module like normal. Otherwise, right after the init function,
|
||||||
|
* we take care of some import state bookkeeping, switch back
|
||||||
|
* to the subinterpreter, check for single-phase init,
|
||||||
|
* and then continue loading like normal. */
|
||||||
|
|
||||||
|
bool switched = false;
|
||||||
|
/* We *could* leave in place a legacy interpreter here
|
||||||
|
* (one that shares obmalloc/GIL with main interp),
|
||||||
|
* but there isn't a big advantage, we anticipate
|
||||||
|
* such interpreters will be increasingly uncommon,
|
||||||
|
* and the code is a bit simpler if we always switch
|
||||||
|
* to the main interpreter. */
|
||||||
|
PyThreadState *main_tstate = switch_to_main_interpreter(tstate);
|
||||||
|
if (main_tstate == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
else if (main_tstate != tstate) {
|
||||||
|
switched = true;
|
||||||
|
/* In the switched case, we could play it safe
|
||||||
|
* by getting the main interpreter's import lock here.
|
||||||
|
* It's unlikely to matter though. */
|
||||||
|
}
|
||||||
|
|
||||||
|
struct _Py_ext_module_loader_result res;
|
||||||
|
int rc = _PyImport_RunModInitFunc(p0, info, &res);
|
||||||
|
if (rc < 0) {
|
||||||
|
/* We discard res.def. */
|
||||||
|
assert(res.module == NULL);
|
||||||
|
}
|
||||||
|
else {
|
||||||
assert(!PyErr_Occurred());
|
assert(!PyErr_Occurred());
|
||||||
assert(res.err == NULL);
|
assert(res.err == NULL);
|
||||||
|
|
||||||
|
@ -1951,26 +2027,16 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
|
||||||
def = res.def;
|
def = res.def;
|
||||||
assert(def != NULL);
|
assert(def != NULL);
|
||||||
|
|
||||||
if (res.kind == _Py_ext_module_kind_MULTIPHASE) {
|
/* Do anything else that should be done
|
||||||
assert_multiphase_def(def);
|
* while still using the main interpreter. */
|
||||||
assert(mod == NULL);
|
if (res.kind == _Py_ext_module_kind_SINGLEPHASE) {
|
||||||
mod = PyModule_FromDefAndSpec(def, spec);
|
|
||||||
if (mod == NULL) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
assert(res.kind == _Py_ext_module_kind_SINGLEPHASE);
|
|
||||||
assert_singlephase_def(def);
|
|
||||||
assert(PyModule_Check(mod));
|
|
||||||
|
|
||||||
if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Remember the filename as the __file__ attribute */
|
/* Remember the filename as the __file__ attribute */
|
||||||
if (info->filename != NULL) {
|
if (info->filename != NULL) {
|
||||||
if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) {
|
// XXX There's a refleak somewhere with the filename.
|
||||||
|
// Until we can track it down, we intern it.
|
||||||
|
PyObject *filename = Py_NewRef(info->filename);
|
||||||
|
PyUnicode_InternInPlace(&filename);
|
||||||
|
if (PyModule_AddObjectRef(mod, "__file__", filename) < 0) {
|
||||||
PyErr_Clear(); /* Not important enough to report */
|
PyErr_Clear(); /* Not important enough to report */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2004,9 +2070,84 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
|
||||||
cached = update_global_state_for_extension(
|
cached = update_global_state_for_extension(
|
||||||
tstate, info->path, info->name, def, &singlephase);
|
tstate, info->path, info->name, def, &singlephase);
|
||||||
if (cached == NULL) {
|
if (cached == NULL) {
|
||||||
|
assert(PyErr_Occurred());
|
||||||
|
goto main_finally;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main_finally:
|
||||||
|
/* Switch back to the subinterpreter. */
|
||||||
|
if (switched) {
|
||||||
|
assert(main_tstate != tstate);
|
||||||
|
|
||||||
|
/* Handle any exceptions, which we cannot propagate directly
|
||||||
|
* to the subinterpreter. */
|
||||||
|
if (PyErr_Occurred()) {
|
||||||
|
if (PyErr_ExceptionMatches(PyExc_MemoryError)) {
|
||||||
|
/* We trust it will be caught again soon. */
|
||||||
|
PyErr_Clear();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* Printing the exception should be sufficient. */
|
||||||
|
PyErr_PrintEx(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Any module we got from the init function will have to be
|
||||||
|
* reloaded in the subinterpreter. */
|
||||||
|
Py_CLEAR(mod);
|
||||||
|
|
||||||
|
PyThreadState_Clear(main_tstate);
|
||||||
|
(void)PyThreadState_Swap(tstate);
|
||||||
|
PyThreadState_Delete(main_tstate);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*****************************************************************/
|
||||||
|
/* At this point we are back to the interpreter we started with. */
|
||||||
|
/*****************************************************************/
|
||||||
|
|
||||||
|
/* Finally we handle the error return from _PyImport_RunModInitFunc(). */
|
||||||
|
if (rc < 0) {
|
||||||
|
_Py_ext_module_loader_result_apply_error(&res, name_buf);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (res.kind == _Py_ext_module_kind_MULTIPHASE) {
|
||||||
|
assert_multiphase_def(def);
|
||||||
|
assert(mod == NULL);
|
||||||
|
/* Note that we cheat a little by not repeating the calls
|
||||||
|
* to _PyImport_GetModInitFunc() and _PyImport_RunModInitFunc(). */
|
||||||
|
mod = PyModule_FromDefAndSpec(def, spec);
|
||||||
|
if (mod == NULL) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
assert(res.kind == _Py_ext_module_kind_SINGLEPHASE);
|
||||||
|
assert_singlephase_def(def);
|
||||||
|
|
||||||
|
if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
assert(!PyErr_Occurred());
|
||||||
|
|
||||||
|
if (switched) {
|
||||||
|
/* We switched to the main interpreter to run the init
|
||||||
|
* function, so now we will "reload" the module from the
|
||||||
|
* cached data using the original subinterpreter. */
|
||||||
|
assert(mod == NULL);
|
||||||
|
mod = reload_singlephase_extension(tstate, cached, info);
|
||||||
|
if (mod == NULL) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
assert(!PyErr_Occurred());
|
||||||
|
assert(PyModule_Check(mod));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
assert(mod != NULL);
|
||||||
|
assert(PyModule_Check(mod));
|
||||||
|
|
||||||
/* Update per-interpreter import state. */
|
/* Update per-interpreter import state. */
|
||||||
PyObject *modules = get_modules_dict(tstate, true);
|
PyObject *modules = get_modules_dict(tstate, true);
|
||||||
if (finish_singlephase_extension(
|
if (finish_singlephase_extension(
|
||||||
|
@ -2015,6 +2156,7 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
_Py_ext_module_loader_result_clear(&res);
|
_Py_ext_module_loader_result_clear(&res);
|
||||||
return mod;
|
return mod;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue