gh-87135: Hang non-main threads that attempt to acquire the GIL during finalization (GH-105805)

Instead of surprise crashes and memory corruption, we now hang threads that attempt to re-enter the Python interpreter after Python runtime finalization has started. These are typically daemon threads (our long standing mis-feature) but could also be threads spawned by extension modules that then try to call into Python. This marks the `PyThread_exit_thread` public C API as deprecated as there is no plausible safe way to accomplish that on any supported platform in the face of things like C++ code with finalizers anywhere on a thread's stack. Doing this was the least bad option.

Co-authored-by: Gregory P. Smith <greg@krypto.org>
This commit is contained in:
Jeremy Maitin-Shepard 2024-10-02 09:17:49 -07:00 committed by GitHub
parent 113b2d7583
commit 8cc5aa47ee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 247 additions and 29 deletions

View file

@ -7,6 +7,7 @@
#include "pycore_pylifecycle.h" // _PyErr_Print()
#include "pycore_pymem.h" // _PyMem_IsPtrFreed()
#include "pycore_pystats.h" // _Py_PrintSpecializationStats()
#include "pycore_pythread.h" // PyThread_hang_thread()
/*
Notes about the implementation:
@ -277,10 +278,9 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate, int final_release)
/* Take the GIL.
The function saves errno at entry and restores its value at exit.
It may hang rather than return if the interpreter has been finalized.
tstate must be non-NULL.
Returns 1 if the GIL was acquired, or 0 if not. */
tstate must be non-NULL. */
static void
take_gil(PyThreadState *tstate)
{
@ -293,12 +293,18 @@ take_gil(PyThreadState *tstate)
if (_PyThreadState_MustExit(tstate)) {
/* bpo-39877: If Py_Finalize() has been called and tstate is not the
thread which called Py_Finalize(), exit immediately the thread.
thread which called Py_Finalize(), this thread cannot continue.
This code path can be reached by a daemon thread after Py_Finalize()
completes. In this case, tstate is a dangling pointer: points to
PyThreadState freed memory. */
PyThread_exit_thread();
PyThreadState freed memory.
This used to call a *thread_exit API, but that was not safe as it
lacks stack unwinding and local variable destruction important to
C++. gh-87135: The best that can be done is to hang the thread as
the public APIs calling this have no error reporting mechanism (!).
*/
PyThread_hang_thread();
}
assert(_PyThreadState_CheckConsistency(tstate));
@ -342,7 +348,9 @@ take_gil(PyThreadState *tstate)
if (drop_requested) {
_Py_unset_eval_breaker_bit(holder_tstate, _PY_GIL_DROP_REQUEST_BIT);
}
PyThread_exit_thread();
// gh-87135: hang the thread as *thread_exit() is not a safe
// API. It lacks stack unwind and local variable destruction.
PyThread_hang_thread();
}
assert(_PyThreadState_CheckConsistency(tstate));
@ -383,7 +391,7 @@ take_gil(PyThreadState *tstate)
if (_PyThreadState_MustExit(tstate)) {
/* bpo-36475: If Py_Finalize() has been called and tstate is not
the thread which called Py_Finalize(), exit immediately the
the thread which called Py_Finalize(), gh-87135: hang the
thread.
This code path can be reached by a daemon thread which was waiting
@ -393,7 +401,7 @@ take_gil(PyThreadState *tstate)
/* tstate could be a dangling pointer, so don't pass it to
drop_gil(). */
drop_gil(interp, NULL, 1);
PyThread_exit_thread();
PyThread_hang_thread();
}
assert(_PyThreadState_CheckConsistency(tstate));

View file

@ -2020,7 +2020,7 @@ _Py_Finalize(_PyRuntimeState *runtime)
/* Ensure that remaining threads are detached */
_PyEval_StopTheWorldAll(runtime);
/* Remaining daemon threads will automatically exit
/* Remaining daemon threads will be trapped in PyThread_hang_thread
when they attempt to take the GIL (ex: PyEval_RestoreThread()). */
_PyInterpreterState_SetFinalizing(tstate->interp, tstate);
_PyRuntimeState_SetFinalizing(runtime, tstate);

View file

@ -291,6 +291,14 @@ PyThread_exit_thread(void)
_endthreadex(0);
}
void _Py_NO_RETURN
PyThread_hang_thread(void)
{
while (1) {
SleepEx(INFINITE, TRUE);
}
}
/*
* Lock support. It has to be implemented as semaphores.
* I [Dag] tried to implement it with mutex but I could find a way to

View file

@ -16,6 +16,7 @@
#undef destructor
#endif
#include <signal.h>
#include <unistd.h> /* pause(), also getthrid() on OpenBSD */
#if defined(__linux__)
# include <sys/syscall.h> /* syscall(SYS_gettid) */
@ -23,8 +24,6 @@
# include <pthread_np.h> /* pthread_getthreadid_np() */
#elif defined(__FreeBSD_kernel__)
# include <sys/syscall.h> /* syscall(SYS_thr_self) */
#elif defined(__OpenBSD__)
# include <unistd.h> /* getthrid() */
#elif defined(_AIX)
# include <sys/thread.h> /* thread_self() */
#elif defined(__NetBSD__)
@ -419,6 +418,18 @@ PyThread_exit_thread(void)
#endif
}
void _Py_NO_RETURN
PyThread_hang_thread(void)
{
while (1) {
#if defined(__wasi__)
sleep(9999999); // WASI doesn't have pause() ?!
#else
pause();
#endif
}
}
#ifdef USE_SEMAPHORES
/*