gh-116322: Enable the GIL while loading C extension modules (#118560)

Add the ability to enable/disable the GIL at runtime, and use that in the C module loading code. We can't know before running a module init function if it supports free-threading, so the GIL is temporarily enabled before doing so. If the module declares support for running without the GIL, the GIL is later disabled. Otherwise, the GIL is permanently enabled, and will never be disabled again for the life of the current interpreter.
2025-11-25 04:34:37 +00:00 · 2024-05-06 20:07:23 -07:00 · 2024-05-06 20:07:23 -07:00 · 853163d3b5
commit 853163d3b5
parent 60bd111844
9 changed files with 352 additions and 32 deletions
--- a/Python/ceval_gil.c
+++ b/Python/ceval_gil.c
@ -205,6 +205,16 @@ static void recreate_gil(struct _gil_runtime_state *gil)
 }
 #endif

+static void
+drop_gil_impl(struct _gil_runtime_state *gil)
+{
+    MUTEX_LOCK(gil->mutex);
+    _Py_ANNOTATE_RWLOCK_RELEASED(&gil->locked, /*is_write=*/1);
+    _Py_atomic_store_int_relaxed(&gil->locked, 0);
+    COND_SIGNAL(gil->cond);
+    MUTEX_UNLOCK(gil->mutex);
+}
+
 static void
 drop_gil(PyInterpreterState *interp, PyThreadState *tstate)
 {
@ -220,7 +230,7 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate)

    struct _gil_runtime_state *gil = ceval->gil;
 #ifdef Py_GIL_DISABLED
-    if (!gil->enabled) {
+    if (!_Py_atomic_load_int_relaxed(&gil->enabled)) {
        return;
    }
 #endif
@ -236,11 +246,7 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate)
        _Py_atomic_store_ptr_relaxed(&gil->last_holder, tstate);
    }

-    MUTEX_LOCK(gil->mutex);
-    _Py_ANNOTATE_RWLOCK_RELEASED(&gil->locked, /*is_write=*/1);
-    _Py_atomic_store_int_relaxed(&gil->locked, 0);
-    COND_SIGNAL(gil->cond);
-    MUTEX_UNLOCK(gil->mutex);
+    drop_gil_impl(gil);

 #ifdef FORCE_SWITCHING
    /* We check tstate first in case we might be releasing the GIL for
@ -275,8 +281,10 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate)

   The function saves errno at entry and restores its value at exit.

-   tstate must be non-NULL. */
-static void
+   tstate must be non-NULL.
+
+   Returns 1 if the GIL was acquired, or 0 if not. */
+static int
 take_gil(PyThreadState *tstate)
 {
    int err = errno;
@ -300,8 +308,8 @@ take_gil(PyThreadState *tstate)
    PyInterpreterState *interp = tstate->interp;
    struct _gil_runtime_state *gil = interp->ceval.gil;
 #ifdef Py_GIL_DISABLED
-    if (!gil->enabled) {
-        return;
+    if (!_Py_atomic_load_int_relaxed(&gil->enabled)) {
+        return 0;
    }
 #endif

@ -346,6 +354,17 @@ take_gil(PyThreadState *tstate)
        }
    }

+#ifdef Py_GIL_DISABLED
+    if (!_Py_atomic_load_int_relaxed(&gil->enabled)) {
+        // Another thread disabled the GIL between our check above and
+        // now. Don't take the GIL, signal any other waiting threads, and
+        // return 0.
+        COND_SIGNAL(gil->cond);
+        MUTEX_UNLOCK(gil->mutex);
+        return 0;
+    }
+#endif
+
 #ifdef FORCE_SWITCHING
    /* This mutex must be taken before modifying gil->last_holder:
       see drop_gil(). */
@ -387,6 +406,7 @@ take_gil(PyThreadState *tstate)
    MUTEX_UNLOCK(gil->mutex);

    errno = err;
+    return 1;
 }

 void _PyEval_SetSwitchInterval(unsigned long microseconds)
@ -451,7 +471,8 @@ init_own_gil(PyInterpreterState *interp, struct _gil_runtime_state *gil)
 {
    assert(!gil_created(gil));
 #ifdef Py_GIL_DISABLED
-    gil->enabled = _PyInterpreterState_GetConfig(interp)->enable_gil == _PyConfig_GIL_ENABLE;
+    const PyConfig *config = _PyInterpreterState_GetConfig(interp);
+    gil->enabled = config->enable_gil == _PyConfig_GIL_ENABLE ? INT_MAX : 0;
 #endif
    create_gil(gil);
    assert(gil_created(gil));
@ -545,11 +566,11 @@ PyEval_ReleaseLock(void)
    drop_gil(tstate->interp, tstate);
 }

-void
+int
 _PyEval_AcquireLock(PyThreadState *tstate)
 {
    _Py_EnsureTstateNotNULL(tstate);
-    take_gil(tstate);
+    return take_gil(tstate);
 }

 void
@ -1011,6 +1032,117 @@ _PyEval_InitState(PyInterpreterState *interp)
    _gil_initialize(&interp->_gil);
 }

+#ifdef Py_GIL_DISABLED
+int
+_PyEval_EnableGILTransient(PyThreadState *tstate)
+{
+    const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp);
+    if (config->enable_gil != _PyConfig_GIL_DEFAULT) {
+        return 0;
+    }
+    struct _gil_runtime_state *gil = tstate->interp->ceval.gil;
+
+    int enabled = _Py_atomic_load_int_relaxed(&gil->enabled);
+    if (enabled == INT_MAX) {
+        // The GIL is already enabled permanently.
+        return 0;
+    }
+    if (enabled == INT_MAX - 1) {
+        Py_FatalError("Too many transient requests to enable the GIL");
+    }
+    if (enabled > 0) {
+        // If enabled is nonzero, we know we hold the GIL. This means that no
+        // other threads are attached, and nobody else can be concurrently
+        // mutating it.
+        _Py_atomic_store_int_relaxed(&gil->enabled, enabled + 1);
+        return 0;
+    }
+
+    // Enabling the GIL changes what it means to be an "attached" thread. To
+    // safely make this transition, we:
+    // 1. Detach the current thread.
+    // 2. Stop the world to detach (and suspend) all other threads.
+    // 3. Enable the GIL, if nobody else did between our check above and when
+    //    our stop-the-world begins.
+    // 4. Start the world.
+    // 5. Attach the current thread. Other threads may attach and hold the GIL
+    //    before this thread, which is harmless.
+    _PyThreadState_Detach(tstate);
+
+    // This could be an interpreter-local stop-the-world in situations where we
+    // know that this interpreter's GIL is not shared, and that it won't become
+    // shared before the stop-the-world begins. For now, we always stop all
+    // interpreters for simplicity.
+    _PyEval_StopTheWorldAll(&_PyRuntime);
+
+    enabled = _Py_atomic_load_int_relaxed(&gil->enabled);
+    int this_thread_enabled = enabled == 0;
+    _Py_atomic_store_int_relaxed(&gil->enabled, enabled + 1);
+
+    _PyEval_StartTheWorldAll(&_PyRuntime);
+    _PyThreadState_Attach(tstate);
+
+    return this_thread_enabled;
+}
+
+int
+_PyEval_EnableGILPermanent(PyThreadState *tstate)
+{
+    const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp);
+    if (config->enable_gil != _PyConfig_GIL_DEFAULT) {
+        return 0;
+    }
+
+    struct _gil_runtime_state *gil = tstate->interp->ceval.gil;
+    assert(current_thread_holds_gil(gil, tstate));
+
+    int enabled = _Py_atomic_load_int_relaxed(&gil->enabled);
+    if (enabled == INT_MAX) {
+        return 0;
+    }
+
+    _Py_atomic_store_int_relaxed(&gil->enabled, INT_MAX);
+    return 1;
+}
+
+int
+_PyEval_DisableGIL(PyThreadState *tstate)
+{
+    const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp);
+    if (config->enable_gil != _PyConfig_GIL_DEFAULT) {
+        return 0;
+    }
+
+    struct _gil_runtime_state *gil = tstate->interp->ceval.gil;
+    assert(current_thread_holds_gil(gil, tstate));
+
+    int enabled = _Py_atomic_load_int_relaxed(&gil->enabled);
+    if (enabled == INT_MAX) {
+        return 0;
+    }
+
+    assert(enabled >= 1);
+    enabled--;
+
+    // Disabling the GIL is much simpler than enabling it, since we know we are
+    // the only attached thread. Other threads may start free-threading as soon
+    // as this store is complete, if it sets gil->enabled to 0.
+    _Py_atomic_store_int_relaxed(&gil->enabled, enabled);
+
+    if (enabled == 0) {
+        // We're attached, so we know the GIL will remain disabled until at
+        // least the next time we detach, which must be after this function
+        // returns.
+        //
+        // Drop the GIL, which will wake up any threads waiting in take_gil()
+        // and let them resume execution without the GIL.
+        drop_gil_impl(gil);
+        return 1;
+    }
+    return 0;
+}
+#endif
+

 /* Do periodic things, like check for signals and async I/0.
 * We need to do reasonably frequently, but not too frequently.
--- a/Python/import.c
+++ b/Python/import.c
@ -1,6 +1,7 @@
 /* Module definition and import implementation */

 #include "Python.h"
+#include "pycore_ceval.h"
 #include "pycore_hashtable.h"     // _Py_hashtable_new_full()
 #include "pycore_import.h"        // _PyImport_BootstrapImp()
 #include "pycore_initconfig.h"    // _PyStatus_OK()
@ -1023,6 +1024,12 @@ struct extensions_cache_value {
    struct cached_m_dict _m_dict;

    _Py_ext_module_origin origin;
+
+#ifdef Py_GIL_DISABLED
+    /* The module's md_gil slot, for legacy modules that are reinitialized from
+       m_dict rather than calling their initialization function again. */
+    void *md_gil;
+#endif
 };

 static struct extensions_cache_value *
@ -1351,7 +1358,7 @@ static struct extensions_cache_value *
 _extensions_cache_set(PyObject *path, PyObject *name,
                      PyModuleDef *def, PyModInitFunction m_init,
                      Py_ssize_t m_index, PyObject *m_dict,
-                      _Py_ext_module_origin origin)
+                      _Py_ext_module_origin origin, void *md_gil)
 {
    struct extensions_cache_value *value = NULL;
    void *key = NULL;
@ -1401,7 +1408,13 @@ _extensions_cache_set(PyObject *path, PyObject *name,
        .m_index=m_index,
        /* m_dict is set by set_cached_m_dict(). */
        .origin=origin,
+#ifdef Py_GIL_DISABLED
+        .md_gil=md_gil,
+#endif
    };
+#ifndef Py_GIL_DISABLED
+    (void)md_gil;
+#endif
    if (init_cached_m_dict(newvalue, m_dict) < 0) {
        goto finally;
    }
@ -1526,6 +1539,47 @@ _PyImport_CheckSubinterpIncompatibleExtensionAllowed(const char *name)
    return 0;
 }

+#ifdef Py_GIL_DISABLED
+int
+_PyImport_CheckGILForModule(PyObject* module, PyObject *module_name)
+{
+    PyThreadState *tstate = _PyThreadState_GET();
+    if (module == NULL) {
+        _PyEval_DisableGIL(tstate);
+        return 0;
+    }
+
+    if (!PyModule_Check(module) ||
+        ((PyModuleObject *)module)->md_gil == Py_MOD_GIL_USED) {
+        if (_PyEval_EnableGILPermanent(tstate)) {
+            int warn_result = PyErr_WarnFormat(
+                PyExc_RuntimeWarning,
+                1,
+                "The global interpreter lock (GIL) has been enabled to load "
+                "module '%U', which has not declared that it can run safely "
+                "without the GIL. To override this behavior and keep the GIL "
+                "disabled (at your own risk), run with PYTHON_GIL=0 or -Xgil=0.",
+                module_name
+            );
+            if (warn_result < 0) {
+                return warn_result;
+            }
+        }
+
+        const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp);
+        if (config->enable_gil == _PyConfig_GIL_DEFAULT && config->verbose) {
+            PySys_FormatStderr("# loading module '%U', which requires the GIL\n",
+                               module_name);
+        }
+    }
+    else {
+        _PyEval_DisableGIL(tstate);
+    }
+
+    return 0;
+}
+#endif
+
 static PyObject *
 get_core_module_dict(PyInterpreterState *interp,
                     PyObject *name, PyObject *path)
@ -1625,6 +1679,7 @@ struct singlephase_global_update {
    Py_ssize_t m_index;
    PyObject *m_dict;
    _Py_ext_module_origin origin;
+    void *md_gil;
 };

 static struct extensions_cache_value *
@ -1683,7 +1738,7 @@ update_global_state_for_extension(PyThreadState *tstate,
 #endif
        cached = _extensions_cache_set(
                path, name, def, m_init, singlephase->m_index, m_dict,
-                singlephase->origin);
+                singlephase->origin, singlephase->md_gil);
        if (cached == NULL) {
            // XXX Ignore this error?  Doing so would effectively
            // mark the module as not loadable.
@ -1768,6 +1823,13 @@ reload_singlephase_extension(PyThreadState *tstate,
            Py_DECREF(mod);
            return NULL;
        }
+#ifdef Py_GIL_DISABLED
+        if (def->m_base.m_copy != NULL) {
+            // For non-core modules, fetch the GIL slot that was stored by
+            // import_run_extension().
+            ((PyModuleObject *)mod)->md_gil = cached->md_gil;
+        }
+#endif
        /* We can't set mod->md_def if it's missing,
         * because _PyImport_ClearModulesByIndex() might break
         * due to violating interpreter isolation.
@ -1921,6 +1983,9 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0,
            // cache is less reliable than it should be).
            .m_index=def->m_base.m_index,
            .origin=info->origin,
+#ifdef Py_GIL_DISABLED
+            .md_gil=((PyModuleObject *)mod)->md_gil,
+#endif
        };
        // gh-88216: Extensions and def->m_base.m_copy can be updated
        // when the extension module doesn't support sub-interpreters.
@ -2039,6 +2104,10 @@ _PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name,
            /* We don't want def->m_base.m_copy populated. */
            .m_dict=NULL,
            .origin=_Py_ext_module_origin_CORE,
+#ifdef Py_GIL_DISABLED
+            /* Unused when m_dict == NULL. */
+            .md_gil=NULL,
+#endif
        };
        cached = update_global_state_for_extension(
                tstate, nameobj, nameobj, def, &singlephase);
@ -2128,9 +2197,23 @@ create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec)
        goto finally;
    }

+#ifdef Py_GIL_DISABLED
+    // This call (and the corresponding call to _PyImport_CheckGILForModule())
+    // would ideally be inside import_run_extension(). They are kept in the
+    // callers for now because that would complicate the control flow inside
+    // import_run_extension(). It should be possible to restructure
+    // import_run_extension() to address this.
+    _PyEval_EnableGILTransient(tstate);
+#endif
    /* Now load it. */
    mod = import_run_extension(
                    tstate, p0, &info, spec, get_modules_dict(tstate, true));
+#ifdef Py_GIL_DISABLED
+    if (_PyImport_CheckGILForModule(mod, info.name) < 0) {
+        Py_CLEAR(mod);
+        goto finally;
+    }
+#endif

 finally:
    _Py_ext_module_loader_info_clear(&info);
@ -4505,10 +4588,22 @@ _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file)
        goto finally;
    }

+#ifdef Py_GIL_DISABLED
+    // This call (and the corresponding call to _PyImport_CheckGILForModule())
+    // would ideally be inside import_run_extension(). They are kept in the
+    // callers for now because that would complicate the control flow inside
+    // import_run_extension(). It should be possible to restructure
+    // import_run_extension() to address this.
+    _PyEval_EnableGILTransient(tstate);
+#endif
    mod = import_run_extension(
                    tstate, p0, &info, spec, get_modules_dict(tstate, true));
-    if (mod == NULL) {
+#ifdef Py_GIL_DISABLED
+    if (_PyImport_CheckGILForModule(mod, info.name) < 0) {
+        Py_CLEAR(mod);
+        goto finally;
    }
+#endif

    // XXX Shouldn't this happen in the error cases too (i.e. in "finally")?
    if (fp) {
--- a/Python/pystate.c
+++ b/Python/pystate.c
@ -2057,19 +2057,36 @@ _PyThreadState_Attach(PyThreadState *tstate)
        Py_FatalError("non-NULL old thread state");
    }

-    _PyEval_AcquireLock(tstate);

-    // XXX assert(tstate_is_alive(tstate));
-    current_fast_set(&_PyRuntime, tstate);
-    tstate_activate(tstate);
+    while (1) {
+        int acquired_gil = _PyEval_AcquireLock(tstate);

-    if (!tstate_try_attach(tstate)) {
-        tstate_wait_attach(tstate);
-    }
+        // XXX assert(tstate_is_alive(tstate));
+        current_fast_set(&_PyRuntime, tstate);
+        tstate_activate(tstate);
+
+        if (!tstate_try_attach(tstate)) {
+            tstate_wait_attach(tstate);
+        }

 #ifdef Py_GIL_DISABLED
-    _Py_qsbr_attach(((_PyThreadStateImpl *)tstate)->qsbr);
+        if (_PyEval_IsGILEnabled(tstate) != acquired_gil) {
+            // The GIL was enabled between our call to _PyEval_AcquireLock()
+            // and when we attached (the GIL can't go from enabled to disabled
+            // here because only a thread holding the GIL can disable
+            // it). Detach and try again.
+            assert(!acquired_gil);
+            tstate_set_detached(tstate, _Py_THREAD_DETACHED);
+            tstate_deactivate(tstate);
+            current_fast_clear(&_PyRuntime);
+            continue;
+        }
+        _Py_qsbr_attach(((_PyThreadStateImpl *)tstate)->qsbr);
+#else
+        (void)acquired_gil;
 #endif
+        break;
+    }

    // Resume previous critical section. This acquires the lock(s) from the
    // top-most critical section.
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@ -2433,8 +2433,7 @@ sys__is_gil_enabled_impl(PyObject *module)
 /*[clinic end generated code: output=57732cf53f5b9120 input=7e9c47f15a00e809]*/
 {
 #ifdef Py_GIL_DISABLED
-    PyInterpreterState *interp = _PyInterpreterState_GET();
-    return interp->ceval.gil->enabled;
+    return _PyEval_IsGILEnabled(_PyThreadState_GET());
 #else
    return 1;
 #endif