gh-114271: Make _thread.ThreadHandle thread-safe in free-threaded builds (GH-115190)

Make `_thread.ThreadHandle` thread-safe in free-threaded builds

We protect the mutable state of `ThreadHandle` using a `_PyOnceFlag`.
Concurrent operations (i.e. `join` or `detach`) on `ThreadHandle` block
until it is their turn to execute or an earlier operation succeeds.
Once an operation has been applied successfully all future operations
complete immediately.

The `join()` method is now idempotent. It may be called multiple times
but the underlying OS thread will only be joined once. After `join()`
succeeds, any future calls to `join()` will succeed immediately.

The internal thread handle `detach()` method has been removed.
This commit is contained in:
mpage 2024-03-01 13:43:12 -08:00 committed by GitHub
parent 5e0c7bc1d3
commit 9e88173d36
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 230 additions and 106 deletions

View file

@ -1,9 +1,9 @@
/* Thread module */
/* Interface to Sjoerd's portable C thread library */
#include "Python.h"
#include "pycore_interp.h" // _PyInterpreterState.threads.count
#include "pycore_lock.h"
#include "pycore_moduleobject.h" // _PyModule_GetState()
#include "pycore_modsupport.h" // _PyArg_NoKeywords()
#include "pycore_pylifecycle.h"
@ -44,24 +44,76 @@ get_thread_state(PyObject *module)
// _ThreadHandle type
// Handles transition from RUNNING to one of JOINED, DETACHED, or INVALID (post
// fork).
typedef enum {
THREAD_HANDLE_RUNNING = 1,
THREAD_HANDLE_JOINED = 2,
THREAD_HANDLE_DETACHED = 3,
THREAD_HANDLE_INVALID = 4,
} ThreadHandleState;
// A handle around an OS thread.
//
// The OS thread is either joined or detached after the handle is destroyed.
//
// Joining the handle is idempotent; the underlying OS thread is joined or
// detached only once. Concurrent join operations are serialized until it is
// their turn to execute or an earlier operation completes successfully. Once a
// join has completed successfully all future joins complete immediately.
typedef struct {
PyObject_HEAD
struct llist_node node; // linked list node (see _pythread_runtime_state)
// The `ident` and `handle` fields are immutable once the object is visible
// to threads other than its creator, thus they do not need to be accessed
// atomically.
PyThread_ident_t ident;
PyThread_handle_t handle;
char joinable;
// Holds a value from the `ThreadHandleState` enum.
int state;
// Set immediately before `thread_run` returns to indicate that the OS
// thread is about to exit. This is used to avoid false positives when
// detecting self-join attempts. See the comment in `ThreadHandle_join()`
// for a more detailed explanation.
_PyEventRc *thread_is_exiting;
// Serializes calls to `join`.
_PyOnceFlag once;
} ThreadHandleObject;
static inline int
get_thread_handle_state(ThreadHandleObject *handle)
{
return _Py_atomic_load_int(&handle->state);
}
static inline void
set_thread_handle_state(ThreadHandleObject *handle, ThreadHandleState state)
{
_Py_atomic_store_int(&handle->state, state);
}
static ThreadHandleObject*
new_thread_handle(thread_module_state* state)
{
_PyEventRc *event = _PyEventRc_New();
if (event == NULL) {
PyErr_NoMemory();
return NULL;
}
ThreadHandleObject* self = PyObject_New(ThreadHandleObject, state->thread_handle_type);
if (self == NULL) {
_PyEventRc_Decref(event);
return NULL;
}
self->ident = 0;
self->handle = 0;
self->joinable = 0;
self->thread_is_exiting = event;
self->once = (_PyOnceFlag){0};
self->state = THREAD_HANDLE_INVALID;
HEAD_LOCK(&_PyRuntime);
llist_insert_tail(&_PyRuntime.threads.handles, &self->node);
@ -82,13 +134,21 @@ ThreadHandle_dealloc(ThreadHandleObject *self)
}
HEAD_UNLOCK(&_PyRuntime);
if (self->joinable) {
int ret = PyThread_detach_thread(self->handle);
if (ret) {
// It's safe to access state non-atomically:
// 1. This is the destructor; nothing else holds a reference.
// 2. The refcount going to zero is a "synchronizes-with" event;
// all changes from other threads are visible.
if (self->state == THREAD_HANDLE_RUNNING) {
// This is typically short so no need to release the GIL
if (PyThread_detach_thread(self->handle)) {
PyErr_SetString(ThreadError, "Failed detaching thread");
PyErr_WriteUnraisable(tp);
}
else {
self->state = THREAD_HANDLE_DETACHED;
}
}
_PyEventRc_Decref(self->thread_is_exiting);
PyObject_Free(self);
Py_DECREF(tp);
}
@ -109,8 +169,9 @@ _PyThread_AfterFork(struct _pythread_runtime_state *state)
continue;
}
// Disallow calls to detach() and join() as they could crash.
hobj->joinable = 0;
// Disallow calls to join() as they could crash. We are the only
// thread; it's safe to set this without an atomic.
hobj->state = THREAD_HANDLE_INVALID;
llist_remove(node);
}
}
@ -128,48 +189,54 @@ ThreadHandle_get_ident(ThreadHandleObject *self, void *ignored)
return PyLong_FromUnsignedLongLong(self->ident);
}
static PyObject *
ThreadHandle_detach(ThreadHandleObject *self, void* ignored)
static int
join_thread(ThreadHandleObject *handle)
{
if (!self->joinable) {
PyErr_SetString(PyExc_ValueError,
"the thread is not joinable and thus cannot be detached");
return NULL;
assert(get_thread_handle_state(handle) == THREAD_HANDLE_RUNNING);
int err;
Py_BEGIN_ALLOW_THREADS
err = PyThread_join_thread(handle->handle);
Py_END_ALLOW_THREADS
if (err) {
PyErr_SetString(ThreadError, "Failed joining thread");
return -1;
}
self->joinable = 0;
// This is typically short so no need to release the GIL
int ret = PyThread_detach_thread(self->handle);
if (ret) {
PyErr_SetString(ThreadError, "Failed detaching thread");
return NULL;
}
Py_RETURN_NONE;
set_thread_handle_state(handle, THREAD_HANDLE_JOINED);
return 0;
}
static PyObject *
ThreadHandle_join(ThreadHandleObject *self, void* ignored)
{
if (!self->joinable) {
PyErr_SetString(PyExc_ValueError, "the thread is not joinable");
if (get_thread_handle_state(self) == THREAD_HANDLE_INVALID) {
PyErr_SetString(PyExc_ValueError,
"the handle is invalid and thus cannot be joined");
return NULL;
}
if (self->ident == PyThread_get_thread_ident_ex()) {
// We want to perform this check outside of the `_PyOnceFlag` to prevent
// deadlock in the scenario where another thread joins us and we then
// attempt to join ourselves. However, it's not safe to check thread
// identity once the handle's os thread has finished. We may end up reusing
// the identity stored in the handle and erroneously think we are
// attempting to join ourselves.
//
// To work around this, we set `thread_is_exiting` immediately before
// `thread_run` returns. We can be sure that we are not attempting to join
// ourselves if the handle's thread is about to exit.
if (!_PyEvent_IsSet(&self->thread_is_exiting->event) &&
self->ident == PyThread_get_thread_ident_ex()) {
// PyThread_join_thread() would deadlock or error out.
PyErr_SetString(ThreadError, "Cannot join current thread");
return NULL;
}
// Before actually joining, we must first mark the thread as non-joinable,
// as joining several times simultaneously or sequentially is undefined behavior.
self->joinable = 0;
int ret;
Py_BEGIN_ALLOW_THREADS
ret = PyThread_join_thread(self->handle);
Py_END_ALLOW_THREADS
if (ret) {
PyErr_SetString(ThreadError, "Failed joining thread");
if (_PyOnceFlag_CallOnce(&self->once, (_Py_once_fn_t *)join_thread,
self) == -1) {
return NULL;
}
assert(get_thread_handle_state(self) == THREAD_HANDLE_JOINED);
Py_RETURN_NONE;
}
@ -180,7 +247,6 @@ static PyGetSetDef ThreadHandle_getsetlist[] = {
static PyMethodDef ThreadHandle_methods[] =
{
{"detach", (PyCFunction)ThreadHandle_detach, METH_NOARGS},
{"join", (PyCFunction)ThreadHandle_join, METH_NOARGS},
{0, 0}
};
@ -1210,11 +1276,15 @@ _localdummy_destroyed(PyObject *localweakref, PyObject *dummyweakref)
/* Module functions */
// bootstate is used to "bootstrap" new threads. Any arguments needed by
// `thread_run()`, which can only take a single argument due to platform
// limitations, are contained in bootstate.
struct bootstate {
PyThreadState *tstate;
PyObject *func;
PyObject *args;
PyObject *kwargs;
_PyEventRc *thread_is_exiting;
};
@ -1226,6 +1296,9 @@ thread_bootstate_free(struct bootstate *boot, int decref)
Py_DECREF(boot->args);
Py_XDECREF(boot->kwargs);
}
if (boot->thread_is_exiting != NULL) {
_PyEventRc_Decref(boot->thread_is_exiting);
}
PyMem_RawFree(boot);
}
@ -1236,6 +1309,10 @@ thread_run(void *boot_raw)
struct bootstate *boot = (struct bootstate *) boot_raw;
PyThreadState *tstate = boot->tstate;
// `thread_is_exiting` needs to be set after bootstate has been freed
_PyEventRc *thread_is_exiting = boot->thread_is_exiting;
boot->thread_is_exiting = NULL;
// gh-108987: If _thread.start_new_thread() is called before or while
// Python is being finalized, thread_run() can called *after*.
// _PyRuntimeState_SetFinalizing() is called. At this point, all Python
@ -1280,6 +1357,11 @@ thread_run(void *boot_raw)
_PyThreadState_DeleteCurrent(tstate);
exit:
if (thread_is_exiting != NULL) {
_PyEvent_Notify(&thread_is_exiting->event);
_PyEventRc_Decref(thread_is_exiting);
}
// bpo-44434: Don't call explicitly PyThread_exit_thread(). On Linux with
// the glibc, pthread_exit() can abort the whole process if dlopen() fails
// to open the libgcc_s.so library (ex: EMFILE error).
@ -1308,7 +1390,8 @@ static int
do_start_new_thread(thread_module_state* state,
PyObject *func, PyObject* args, PyObject* kwargs,
int joinable,
PyThread_ident_t* ident, PyThread_handle_t* handle)
PyThread_ident_t* ident, PyThread_handle_t* handle,
_PyEventRc *thread_is_exiting)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (!_PyInterpreterState_HasFeature(interp, Py_RTFLAGS_THREADS)) {
@ -1341,6 +1424,10 @@ do_start_new_thread(thread_module_state* state,
boot->func = Py_NewRef(func);
boot->args = Py_NewRef(args);
boot->kwargs = Py_XNewRef(kwargs);
boot->thread_is_exiting = thread_is_exiting;
if (thread_is_exiting != NULL) {
_PyEventRc_Incref(thread_is_exiting);
}
int err;
if (joinable) {
@ -1392,7 +1479,7 @@ thread_PyThread_start_new_thread(PyObject *module, PyObject *fargs)
PyThread_ident_t ident = 0;
PyThread_handle_t handle;
if (do_start_new_thread(state, func, args, kwargs, /*joinable=*/ 0,
&ident, &handle)) {
&ident, &handle, NULL)) {
return NULL;
}
return PyLong_FromUnsignedLongLong(ident);
@ -1436,13 +1523,13 @@ thread_PyThread_start_joinable_thread(PyObject *module, PyObject *func)
return NULL;
}
if (do_start_new_thread(state, func, args, /*kwargs=*/ NULL, /*joinable=*/ 1,
&hobj->ident, &hobj->handle)) {
&hobj->ident, &hobj->handle, hobj->thread_is_exiting)) {
Py_DECREF(args);
Py_DECREF(hobj);
return NULL;
}
set_thread_handle_state(hobj, THREAD_HANDLE_RUNNING);
Py_DECREF(args);
hobj->joinable = 1;
return (PyObject*) hobj;
}