gh-124878: Fix race conditions during interpreter finalization (#130649)

The PyThreadState field gains a reference count field to avoid
issues with PyThreadState being a dangling pointer to freed memory.
The refcount starts with a value of two: one reference is owned by the
interpreter's linked list of thread states and one reference is owned by
the OS thread. The reference count is decremented when the thread state
is removed from the interpreter's linked list and before the OS thread
calls `PyThread_hang_thread()`. The thread that decrements it to zero
frees the `PyThreadState` memory.

The `holds_gil` field is moved out of the `_status` bit field, to avoid
a data race where on thread calls `PyThreadState_Clear()`, modifying the
`_status` bit field while the OS thread reads `holds_gil` when
attempting to acquire the GIL.

The `PyThreadState.state` field now has `_Py_THREAD_SHUTTING_DOWN` as a
possible value. This corresponds to the `_PyThreadState_MustExit()`
check. This avoids race conditions in the free threading build when
checking `_PyThreadState_MustExit()`.
This commit is contained in:
Sam Gross 2025-03-06 10:38:34 -05:00 committed by GitHub
parent c6dd2348ca
commit 052cb717f5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 109 additions and 81 deletions

View file

@ -27,6 +27,10 @@ extern "C" {
// "suspended" state. Only the thread performing a stop-the-world pause may
// transition a thread from the "suspended" state back to the "detached" state.
//
// The "shutting down" state is used when the interpreter is being finalized.
// Threads in this state can't do anything other than block the OS thread.
// (See _PyThreadState_HangThread).
//
// State transition diagram:
//
// (bound thread) (stop-the-world thread)
@ -37,9 +41,10 @@ extern "C" {
//
// The (bound thread) and (stop-the-world thread) labels indicate which thread
// is allowed to perform the transition.
#define _Py_THREAD_DETACHED 0
#define _Py_THREAD_ATTACHED 1
#define _Py_THREAD_SUSPENDED 2
#define _Py_THREAD_DETACHED 0
#define _Py_THREAD_ATTACHED 1
#define _Py_THREAD_SUSPENDED 2
#define _Py_THREAD_SHUTTING_DOWN 3
/* Check if the current thread is the main thread.
@ -118,7 +123,8 @@ extern _Py_thread_local PyThreadState *_Py_tss_tstate;
extern int _PyThreadState_CheckConsistency(PyThreadState *tstate);
#endif
int _PyThreadState_MustExit(PyThreadState *tstate);
extern int _PyThreadState_MustExit(PyThreadState *tstate);
extern void _PyThreadState_HangThread(PyThreadState *tstate);
// Export for most shared extensions, used via _PyThreadState_GET() static
// inline function.
@ -169,6 +175,11 @@ extern void _PyThreadState_Detach(PyThreadState *tstate);
// to the "detached" state.
extern void _PyThreadState_Suspend(PyThreadState *tstate);
// Mark the thread state as "shutting down". This is used during interpreter
// and runtime finalization. The thread may no longer attach to the
// interpreter and will instead block via _PyThreadState_HangThread().
extern void _PyThreadState_SetShuttingDown(PyThreadState *tstate);
// Perform a stop-the-world pause for all threads in the all interpreters.
//
// Threads in the "attached" state are paused and transitioned to the "GC"
@ -238,7 +249,7 @@ PyAPI_FUNC(PyThreadState *) _PyThreadState_NewBound(
PyInterpreterState *interp,
int whence);
extern PyThreadState * _PyThreadState_RemoveExcept(PyThreadState *tstate);
extern void _PyThreadState_DeleteList(PyThreadState *list);
extern void _PyThreadState_DeleteList(PyThreadState *list, int is_after_fork);
extern void _PyThreadState_ClearMimallocHeaps(PyThreadState *tstate);
// Export for '_testinternalcapi' shared extension

View file

@ -171,6 +171,8 @@ extern PyTypeObject _PyExc_MemoryError;
#define _PyThreadStateImpl_INIT \
{ \
.base = _PyThreadState_INIT, \
/* The thread and the interpreter's linked list hold a reference */ \
.refcount = 2, \
}
#define _PyThreadState_INIT \

View file

@ -21,6 +21,10 @@ typedef struct _PyThreadStateImpl {
// semi-public fields are in PyThreadState.
PyThreadState base;
// The reference count field is used to synchronize deallocation of the
// thread state during runtime finalization.
Py_ssize_t refcount;
// These are addresses, but we need to convert to ints to avoid UB.
uintptr_t c_stack_top;
uintptr_t c_stack_soft_limit;