gh-111924: Use PyMutex for Runtime-global Locks. (gh-112207)

This replaces some usages of PyThread_type_lock with PyMutex, which does not require memory allocation to initialize. This simplifies some of the runtime initialization and is also one step towards avoiding changing the default raw memory allocator during initialize/finalization, which can be non-thread-safe in some circumstances.
2025-10-21 22:22:48 +00:00 · 2023-12-07 14:33:40 -05:00 · 2023-12-07 14:33:40 -05:00 · cf6110ba13
commit cf6110ba13
parent db460735af
18 changed files with 97 additions and 241 deletions
--- a/Python/ceval_gil.c
+++ b/Python/ceval_gil.c
@ -589,9 +589,7 @@ _PyEval_ReInitThreads(PyThreadState *tstate)
    take_gil(tstate);

    struct _pending_calls *pending = &tstate->interp->ceval.pending;
-    if (_PyThread_at_fork_reinit(&pending->lock) < 0) {
-        return _PyStatus_ERR("Can't reinitialize pending calls lock");
-    }
+    _PyMutex_at_fork_reinit(&pending->mutex);

    /* Destroy all threads except the current one */
    _PyThreadState_DeleteExcept(tstate);
@ -720,13 +718,10 @@ _PyEval_AddPendingCall(PyInterpreterState *interp,
        assert(_Py_IsMainInterpreter(interp));
        pending = &_PyRuntime.ceval.pending_mainthread;
    }
-    /* Ensure that _PyEval_InitState() was called
-       and that _PyEval_FiniState() is not called yet. */
-    assert(pending->lock != NULL);

-    PyThread_acquire_lock(pending->lock, WAIT_LOCK);
+    PyMutex_Lock(&pending->mutex);
    int result = _push_pending_call(pending, func, arg, flags);
-    PyThread_release_lock(pending->lock);
+    PyMutex_Unlock(&pending->mutex);

    /* signal main loop */
    SIGNAL_PENDING_CALLS(interp);
@ -768,9 +763,9 @@ _make_pending_calls(struct _pending_calls *pending)
        int flags = 0;

        /* pop one item off the queue while holding the lock */
-        PyThread_acquire_lock(pending->lock, WAIT_LOCK);
+        PyMutex_Lock(&pending->mutex);
        _pop_pending_call(pending, &func, &arg, &flags);
-        PyThread_release_lock(pending->lock);
+        PyMutex_Unlock(&pending->mutex);

        /* having released the lock, perform the callback */
        if (func == NULL) {
@ -795,7 +790,7 @@ make_pending_calls(PyInterpreterState *interp)

    /* Only one thread (per interpreter) may run the pending calls
       at once.  In the same way, we don't do recursive pending calls. */
-    PyThread_acquire_lock(pending->lock, WAIT_LOCK);
+    PyMutex_Lock(&pending->mutex);
    if (pending->busy) {
        /* A pending call was added after another thread was already
           handling the pending calls (and had already "unsignaled").
@ -807,11 +802,11 @@ make_pending_calls(PyInterpreterState *interp)
           care of any remaining pending calls.  Until then, though,
           all the interpreter's threads will be tripping the eval
           breaker every time it's checked. */
-        PyThread_release_lock(pending->lock);
+        PyMutex_Unlock(&pending->mutex);
        return 0;
    }
    pending->busy = 1;
-    PyThread_release_lock(pending->lock);
+    PyMutex_Unlock(&pending->mutex);

    /* unsignal before starting to call callbacks, so that any callback
       added in-between re-signals */
@ -892,23 +887,9 @@ Py_MakePendingCalls(void)
 }

 void
-_PyEval_InitState(PyInterpreterState *interp, PyThread_type_lock pending_lock)
+_PyEval_InitState(PyInterpreterState *interp)
 {
    _gil_initialize(&interp->_gil);
-
-    struct _pending_calls *pending = &interp->ceval.pending;
-    assert(pending->lock == NULL);
-    pending->lock = pending_lock;
-}
-
-void
-_PyEval_FiniState(struct _ceval_state *ceval)
-{
-    struct _pending_calls *pending = &ceval->pending;
-    if (pending->lock != NULL) {
-        PyThread_free_lock(pending->lock);
-        pending->lock = NULL;
-    }
 }


--- a/Python/crossinterp.c
+++ b/Python/crossinterp.c
@ -456,16 +456,17 @@ _xidregistry_clear(struct _xidregistry *xidregistry)
 static void
 _xidregistry_lock(struct _xidregistry *registry)
 {
-    if (registry->mutex != NULL) {
-        PyThread_acquire_lock(registry->mutex, WAIT_LOCK);
+    if (registry->global) {
+        PyMutex_Lock(&registry->mutex);
    }
+    // else: Within an interpreter we rely on the GIL instead of a separate lock.
 }

 static void
 _xidregistry_unlock(struct _xidregistry *registry)
 {
-    if (registry->mutex != NULL) {
-        PyThread_release_lock(registry->mutex);
+    if (registry->global) {
+        PyMutex_Unlock(&registry->mutex);
    }
 }

@ -874,19 +875,10 @@ _xidregistry_init(struct _xidregistry *registry)
    registry->initialized = 1;

    if (registry->global) {
-        // We manage the mutex lifecycle in pystate.c.
-        assert(registry->mutex != NULL);
-
        // Registering the builtins is cheap so we don't bother doing it lazily.
        assert(registry->head == NULL);
        _register_builtins_for_crossinterpreter_data(registry);
    }
-    else {
-        // Within an interpreter we rely on the GIL instead of a separate lock.
-        assert(registry->mutex == NULL);
-
-        // There's nothing else to initialize.
-    }
 }

 static void
@ -898,17 +890,6 @@ _xidregistry_fini(struct _xidregistry *registry)
    registry->initialized = 0;

    _xidregistry_clear(registry);
-
-    if (registry->global) {
-        // We manage the mutex lifecycle in pystate.c.
-        assert(registry->mutex != NULL);
-    }
-    else {
-        // There's nothing else to finalize.
-
-        // Within an interpreter we rely on the GIL instead of a separate lock.
-        assert(registry->mutex == NULL);
-    }
 }


--- a/Python/import.c
+++ b/Python/import.c
@ -418,11 +418,7 @@ remove_module(PyThreadState *tstate, PyObject *name)
 Py_ssize_t
 _PyImport_GetNextModuleIndex(void)
 {
-    PyThread_acquire_lock(EXTENSIONS.mutex, WAIT_LOCK);
-    LAST_MODULE_INDEX++;
-    Py_ssize_t index = LAST_MODULE_INDEX;
-    PyThread_release_lock(EXTENSIONS.mutex);
-    return index;
+    return _Py_atomic_add_ssize(&LAST_MODULE_INDEX, 1) + 1;
 }

 static const char *
@ -882,13 +878,13 @@ gets even messier.
 static inline void
 extensions_lock_acquire(void)
 {
-    PyThread_acquire_lock(_PyRuntime.imports.extensions.mutex, WAIT_LOCK);
+    PyMutex_Lock(&_PyRuntime.imports.extensions.mutex);
 }

 static inline void
 extensions_lock_release(void)
 {
-    PyThread_release_lock(_PyRuntime.imports.extensions.mutex);
+    PyMutex_Unlock(&_PyRuntime.imports.extensions.mutex);
 }

 /* Magic for extension modules (built-in as well as dynamically
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@ -3056,13 +3056,13 @@ wait_for_thread_shutdown(PyThreadState *tstate)
 int Py_AtExit(void (*func)(void))
 {
    struct _atexit_runtime_state *state = &_PyRuntime.atexit;
-    PyThread_acquire_lock(state->mutex, WAIT_LOCK);
+    PyMutex_Lock(&state->mutex);
    if (state->ncallbacks >= NEXITFUNCS) {
-        PyThread_release_lock(state->mutex);
+        PyMutex_Unlock(&state->mutex);
        return -1;
    }
    state->callbacks[state->ncallbacks++] = func;
-    PyThread_release_lock(state->mutex);
+    PyMutex_Unlock(&state->mutex);
    return 0;
 }

@ -3072,18 +3072,18 @@ call_ll_exitfuncs(_PyRuntimeState *runtime)
    atexit_callbackfunc exitfunc;
    struct _atexit_runtime_state *state = &runtime->atexit;

-    PyThread_acquire_lock(state->mutex, WAIT_LOCK);
+    PyMutex_Lock(&state->mutex);
    while (state->ncallbacks > 0) {
        /* pop last function from the list */
        state->ncallbacks--;
        exitfunc = state->callbacks[state->ncallbacks];
        state->callbacks[state->ncallbacks] = NULL;

-        PyThread_release_lock(state->mutex);
+        PyMutex_Unlock(&state->mutex);
        exitfunc();
-        PyThread_acquire_lock(state->mutex, WAIT_LOCK);
+        PyMutex_Lock(&state->mutex);
    }
-    PyThread_release_lock(state->mutex);
+    PyMutex_Unlock(&state->mutex);

    fflush(stdout);
    fflush(stderr);
--- a/Python/pystate.c
+++ b/Python/pystate.c
@ -379,49 +379,23 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS
 static const _PyRuntimeState initial = _PyRuntimeState_INIT(_PyRuntime);
 _Py_COMP_DIAG_POP

-#define NUMLOCKS 8
 #define LOCKS_INIT(runtime) \
    { \
        &(runtime)->interpreters.mutex, \
        &(runtime)->xi.registry.mutex, \
-        &(runtime)->unicode_state.ids.lock, \
+        &(runtime)->unicode_state.ids.mutex, \
        &(runtime)->imports.extensions.mutex, \
-        &(runtime)->ceval.pending_mainthread.lock, \
+        &(runtime)->ceval.pending_mainthread.mutex, \
        &(runtime)->atexit.mutex, \
        &(runtime)->audit_hooks.mutex, \
        &(runtime)->allocators.mutex, \
    }

-static int
-alloc_for_runtime(PyThread_type_lock locks[NUMLOCKS])
-{
-    /* Force default allocator, since _PyRuntimeState_Fini() must
-       use the same allocator than this function. */
-    PyMemAllocatorEx old_alloc;
-    _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
-
-    for (int i = 0; i < NUMLOCKS; i++) {
-        PyThread_type_lock lock = PyThread_allocate_lock();
-        if (lock == NULL) {
-            for (int j = 0; j < i; j++) {
-                PyThread_free_lock(locks[j]);
-                locks[j] = NULL;
-            }
-            break;
-        }
-        locks[i] = lock;
-    }
-
-    PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
-    return 0;
-}
-
 static void
 init_runtime(_PyRuntimeState *runtime,
             void *open_code_hook, void *open_code_userdata,
             _Py_AuditHookEntry *audit_hook_head,
-             Py_ssize_t unicode_next_index,
-             PyThread_type_lock locks[NUMLOCKS])
+             Py_ssize_t unicode_next_index)
 {
    assert(!runtime->preinitializing);
    assert(!runtime->preinitialized);
@ -435,12 +409,6 @@ init_runtime(_PyRuntimeState *runtime,

    PyPreConfig_InitPythonConfig(&runtime->preconfig);

-    PyThread_type_lock *lockptrs[NUMLOCKS] = LOCKS_INIT(runtime);
-    for (int i = 0; i < NUMLOCKS; i++) {
-        assert(locks[i] != NULL);
-        *lockptrs[i] = locks[i];
-    }
-
    // Set it to the ID of the main thread of the main interpreter.
    runtime->main_thread = PyThread_get_thread_ident();

@ -466,11 +434,6 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
    // is called multiple times.
    Py_ssize_t unicode_next_index = runtime->unicode_state.ids.next_index;

-    PyThread_type_lock locks[NUMLOCKS];
-    if (alloc_for_runtime(locks) != 0) {
-        return _PyStatus_NO_MEMORY();
-    }
-
    if (runtime->_initialized) {
        // Py_Initialize() must be running again.
        // Reset to _PyRuntimeState_INIT.
@ -489,7 +452,7 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
    }

    init_runtime(runtime, open_code_hook, open_code_userdata, audit_hook_head,
-                 unicode_next_index, locks);
+                 unicode_next_index);

    return _PyStatus_OK();
 }
@ -509,23 +472,6 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime)
    if (PyThread_tss_is_created(&runtime->trashTSSkey)) {
        PyThread_tss_delete(&runtime->trashTSSkey);
    }
-
-    /* Force the allocator used by _PyRuntimeState_Init(). */
-    PyMemAllocatorEx old_alloc;
-    _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
-#define FREE_LOCK(LOCK) \
-    if (LOCK != NULL) { \
-        PyThread_free_lock(LOCK); \
-        LOCK = NULL; \
-    }
-
-    PyThread_type_lock *lockptrs[NUMLOCKS] = LOCKS_INIT(runtime);
-    for (int i = 0; i < NUMLOCKS; i++) {
-        FREE_LOCK(*lockptrs[i]);
-    }
-
-#undef FREE_LOCK
-    PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
 }

 #ifdef HAVE_FORK
@ -537,28 +483,19 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
    // This was initially set in _PyRuntimeState_Init().
    runtime->main_thread = PyThread_get_thread_ident();

-    /* Force default allocator, since _PyRuntimeState_Fini() must
-       use the same allocator than this function. */
-    PyMemAllocatorEx old_alloc;
-    _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
-
-    PyThread_type_lock *lockptrs[NUMLOCKS] = LOCKS_INIT(runtime);
-    int reinit_err = 0;
-    for (int i = 0; i < NUMLOCKS; i++) {
-        reinit_err += _PyThread_at_fork_reinit(lockptrs[i]);
-    }
-
-    PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
-
    // Clears the parking lot. Any waiting threads are dead. This must be
    // called before releasing any locks that use the parking lot.
    _PyParkingLot_AfterFork();

+    // Re-initialize global locks
+    PyMutex *locks[] = LOCKS_INIT(runtime);
+    for (size_t i = 0; i < Py_ARRAY_LENGTH(locks); i++) {
+        _PyMutex_at_fork_reinit(locks[i]);
+    }
+
    /* bpo-42540: id_mutex is freed by _PyInterpreterState_Delete, which does
     * not force the default allocator. */
-    reinit_err += _PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex);
-
-    if (reinit_err < 0) {
+    if (_PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex) < 0) {
        return _PyStatus_ERR("Failed to reinitialize runtime locks");
    }

@ -594,24 +531,6 @@ _PyInterpreterState_Enable(_PyRuntimeState *runtime)
 {
    struct pyinterpreters *interpreters = &runtime->interpreters;
    interpreters->next_id = 0;
-
-    /* Py_Finalize() calls _PyRuntimeState_Fini() which clears the mutex.
-       Create a new mutex if needed. */
-    if (interpreters->mutex == NULL) {
-        /* Force default allocator, since _PyRuntimeState_Fini() must
-           use the same allocator than this function. */
-        PyMemAllocatorEx old_alloc;
-        _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
-
-        interpreters->mutex = PyThread_allocate_lock();
-
-        PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
-
-        if (interpreters->mutex == NULL) {
-            return _PyStatus_ERR("Can't initialize threads for interpreter");
-        }
-    }
-
    return _PyStatus_OK();
 }

@ -654,8 +573,7 @@ free_interpreter(PyInterpreterState *interp)
 static PyStatus
 init_interpreter(PyInterpreterState *interp,
                 _PyRuntimeState *runtime, int64_t id,
-                 PyInterpreterState *next,
-                 PyThread_type_lock pending_lock)
+                 PyInterpreterState *next)
 {
    if (interp->_initialized) {
        return _PyStatus_ERR("interpreter already initialized");
@ -684,7 +602,7 @@ init_interpreter(PyInterpreterState *interp,
        return status;
    }

-    _PyEval_InitState(interp, pending_lock);
+    _PyEval_InitState(interp);
    _PyGC_InitState(&interp->gc);
    PyConfig_InitPythonConfig(&interp->config);
    _PyType_InitCache(interp);
@ -730,11 +648,6 @@ _PyInterpreterState_New(PyThreadState *tstate, PyInterpreterState **pinterp)
        }
    }

-    PyThread_type_lock pending_lock = PyThread_allocate_lock();
-    if (pending_lock == NULL) {
-        return _PyStatus_NO_MEMORY();
-    }
-
    /* We completely serialize creation of multiple interpreters, since
       it simplifies things here and blocking concurrent calls isn't a problem.
       Regardless, we must fully block subinterpreter creation until
@ -781,11 +694,10 @@ _PyInterpreterState_New(PyThreadState *tstate, PyInterpreterState **pinterp)
    interpreters->head = interp;

    status = init_interpreter(interp, runtime,
-                              id, old_head, pending_lock);
+                              id, old_head);
    if (_PyStatus_EXCEPTION(status)) {
        goto error;
    }
-    pending_lock = NULL;

    HEAD_UNLOCK(runtime);

@ -796,9 +708,6 @@ _PyInterpreterState_New(PyThreadState *tstate, PyInterpreterState **pinterp)
 error:
    HEAD_UNLOCK(runtime);

-    if (pending_lock != NULL) {
-        PyThread_free_lock(pending_lock);
-    }
    if (interp != NULL) {
        free_interpreter(interp);
    }
@ -1003,8 +912,6 @@ PyInterpreterState_Delete(PyInterpreterState *interp)

    zapthreads(interp);

-    _PyEval_FiniState(&interp->ceval);
-
    // XXX These two calls should be done at the end of clear_interpreter(),
    // but currently some objects get decref'ed after that.
 #ifdef Py_REF_DEBUG
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@ -451,15 +451,9 @@ PySys_AddAuditHook(Py_AuditHookFunction hook, void *userData)
    e->hookCFunction = (Py_AuditHookFunction)hook;
    e->userData = userData;

-    if (runtime->audit_hooks.mutex == NULL) {
-        /* The runtime must not be initialized yet. */
-        add_audit_hook_entry_unlocked(runtime, e);
-    }
-    else {
-        PyThread_acquire_lock(runtime->audit_hooks.mutex, WAIT_LOCK);
-        add_audit_hook_entry_unlocked(runtime, e);
-        PyThread_release_lock(runtime->audit_hooks.mutex);
-    }
+    PyMutex_Lock(&runtime->audit_hooks.mutex);
+    add_audit_hook_entry_unlocked(runtime, e);
+    PyMutex_Unlock(&runtime->audit_hooks.mutex);

    return 0;
 }