GH-110829: Ensure Thread.join() joins the OS thread (#110848)

Joining a thread now ensures the underlying OS thread has exited. This is required for safer fork() in multi-threaded processes.

---------

Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
This commit is contained in:
Antoine Pitrou 2023-11-04 14:59:24 +01:00 committed by GitHub
parent a28a3967ab
commit 0e9c364f4a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 676 additions and 103 deletions

View file

@ -22,12 +22,13 @@
// Forward declarations
static struct PyModuleDef thread_module;
// Module state
typedef struct {
PyTypeObject *excepthook_type;
PyTypeObject *lock_type;
PyTypeObject *local_type;
PyTypeObject *local_dummy_type;
PyTypeObject *thread_handle_type;
} thread_module_state;
static inline thread_module_state*
@ -38,6 +39,145 @@ get_thread_state(PyObject *module)
return (thread_module_state *)state;
}
// _ThreadHandle type
typedef struct {
PyObject_HEAD
PyThread_ident_t ident;
PyThread_handle_t handle;
char joinable;
} ThreadHandleObject;
static ThreadHandleObject*
new_thread_handle(thread_module_state* state)
{
ThreadHandleObject* self = PyObject_New(ThreadHandleObject, state->thread_handle_type);
if (self == NULL) {
return NULL;
}
self->ident = 0;
self->handle = 0;
self->joinable = 0;
return self;
}
static void
ThreadHandle_dealloc(ThreadHandleObject *self)
{
PyObject *tp = (PyObject *) Py_TYPE(self);
if (self->joinable) {
int ret = PyThread_detach_thread(self->handle);
if (ret) {
PyErr_SetString(ThreadError, "Failed detaching thread");
PyErr_WriteUnraisable(tp);
}
}
PyObject_Free(self);
Py_DECREF(tp);
}
static PyObject *
ThreadHandle_repr(ThreadHandleObject *self)
{
return PyUnicode_FromFormat("<%s object: ident=%" PY_FORMAT_THREAD_IDENT_T ">",
Py_TYPE(self)->tp_name, self->ident);
}
static PyObject *
ThreadHandle_get_ident(ThreadHandleObject *self, void *ignored)
{
return PyLong_FromUnsignedLongLong(self->ident);
}
static PyObject *
ThreadHandle_after_fork_alive(ThreadHandleObject *self, void* ignored)
{
PyThread_update_thread_after_fork(&self->ident, &self->handle);
Py_RETURN_NONE;
}
static PyObject *
ThreadHandle_after_fork_dead(ThreadHandleObject *self, void* ignored)
{
// Disallow calls to detach() and join() as they could crash.
self->joinable = 0;
Py_RETURN_NONE;
}
static PyObject *
ThreadHandle_detach(ThreadHandleObject *self, void* ignored)
{
if (!self->joinable) {
PyErr_SetString(PyExc_ValueError,
"the thread is not joinable and thus cannot be detached");
return NULL;
}
self->joinable = 0;
// This is typically short so no need to release the GIL
int ret = PyThread_detach_thread(self->handle);
if (ret) {
PyErr_SetString(ThreadError, "Failed detaching thread");
return NULL;
}
Py_RETURN_NONE;
}
static PyObject *
ThreadHandle_join(ThreadHandleObject *self, void* ignored)
{
if (!self->joinable) {
PyErr_SetString(PyExc_ValueError, "the thread is not joinable");
return NULL;
}
if (self->ident == PyThread_get_thread_ident_ex()) {
// PyThread_join_thread() would deadlock or error out.
PyErr_SetString(ThreadError, "Cannot join current thread");
return NULL;
}
// Before actually joining, we must first mark the thread as non-joinable,
// as joining several times simultaneously or sequentially is undefined behavior.
self->joinable = 0;
int ret;
Py_BEGIN_ALLOW_THREADS
ret = PyThread_join_thread(self->handle);
Py_END_ALLOW_THREADS
if (ret) {
PyErr_SetString(ThreadError, "Failed joining thread");
return NULL;
}
Py_RETURN_NONE;
}
static PyGetSetDef ThreadHandle_getsetlist[] = {
{"ident", (getter)ThreadHandle_get_ident, NULL, NULL},
{0},
};
static PyMethodDef ThreadHandle_methods[] =
{
{"after_fork_alive", (PyCFunction)ThreadHandle_after_fork_alive, METH_NOARGS},
{"after_fork_dead", (PyCFunction)ThreadHandle_after_fork_dead, METH_NOARGS},
{"detach", (PyCFunction)ThreadHandle_detach, METH_NOARGS},
{"join", (PyCFunction)ThreadHandle_join, METH_NOARGS},
{0, 0}
};
static PyType_Slot ThreadHandle_Type_slots[] = {
{Py_tp_dealloc, (destructor)ThreadHandle_dealloc},
{Py_tp_repr, (reprfunc)ThreadHandle_repr},
{Py_tp_getset, ThreadHandle_getsetlist},
{Py_tp_methods, ThreadHandle_methods},
{0, 0}
};
static PyType_Spec ThreadHandle_Type_spec = {
"_thread._ThreadHandle",
sizeof(ThreadHandleObject),
0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
ThreadHandle_Type_slots,
};
/* Lock objects */
@ -274,7 +414,7 @@ static PyType_Spec lock_type_spec = {
typedef struct {
PyObject_HEAD
PyThread_type_lock rlock_lock;
unsigned long rlock_owner;
PyThread_ident_t rlock_owner;
unsigned long rlock_count;
PyObject *in_weakreflist;
} rlockobject;
@ -311,13 +451,13 @@ static PyObject *
rlock_acquire(rlockobject *self, PyObject *args, PyObject *kwds)
{
_PyTime_t timeout;
unsigned long tid;
PyThread_ident_t tid;
PyLockStatus r = PY_LOCK_ACQUIRED;
if (lock_acquire_parse_args(args, kwds, &timeout) < 0)
return NULL;
tid = PyThread_get_thread_ident();
tid = PyThread_get_thread_ident_ex();
if (self->rlock_count > 0 && tid == self->rlock_owner) {
unsigned long count = self->rlock_count + 1;
if (count <= self->rlock_count) {
@ -360,7 +500,7 @@ the lock is taken and its internal counter initialized to 1.");
static PyObject *
rlock_release(rlockobject *self, PyObject *Py_UNUSED(ignored))
{
unsigned long tid = PyThread_get_thread_ident();
PyThread_ident_t tid = PyThread_get_thread_ident_ex();
if (self->rlock_count == 0 || self->rlock_owner != tid) {
PyErr_SetString(PyExc_RuntimeError,
@ -389,11 +529,12 @@ to be available for other threads.");
static PyObject *
rlock_acquire_restore(rlockobject *self, PyObject *args)
{
unsigned long owner;
PyThread_ident_t owner;
unsigned long count;
int r = 1;
if (!PyArg_ParseTuple(args, "(kk):_acquire_restore", &count, &owner))
if (!PyArg_ParseTuple(args, "(k" Py_PARSE_THREAD_IDENT_T "):_acquire_restore",
&count, &owner))
return NULL;
if (!PyThread_acquire_lock(self->rlock_lock, 0)) {
@ -419,7 +560,7 @@ For internal use by `threading.Condition`.");
static PyObject *
rlock_release_save(rlockobject *self, PyObject *Py_UNUSED(ignored))
{
unsigned long owner;
PyThread_ident_t owner;
unsigned long count;
if (self->rlock_count == 0) {
@ -433,7 +574,7 @@ rlock_release_save(rlockobject *self, PyObject *Py_UNUSED(ignored))
self->rlock_count = 0;
self->rlock_owner = 0;
PyThread_release_lock(self->rlock_lock);
return Py_BuildValue("kk", count, owner);
return Py_BuildValue("k" Py_PARSE_THREAD_IDENT_T, count, owner);
}
PyDoc_STRVAR(rlock_release_save_doc,
@ -444,7 +585,7 @@ For internal use by `threading.Condition`.");
static PyObject *
rlock_recursion_count(rlockobject *self, PyObject *Py_UNUSED(ignored))
{
unsigned long tid = PyThread_get_thread_ident();
PyThread_ident_t tid = PyThread_get_thread_ident_ex();
return PyLong_FromUnsignedLong(
self->rlock_owner == tid ? self->rlock_count : 0UL);
}
@ -457,7 +598,7 @@ For internal use by reentrancy checks.");
static PyObject *
rlock_is_owned(rlockobject *self, PyObject *Py_UNUSED(ignored))
{
unsigned long tid = PyThread_get_thread_ident();
PyThread_ident_t tid = PyThread_get_thread_ident_ex();
if (self->rlock_count > 0 && self->rlock_owner == tid) {
Py_RETURN_TRUE;
@ -493,7 +634,8 @@ rlock_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
static PyObject *
rlock_repr(rlockobject *self)
{
return PyUnicode_FromFormat("<%s %s object owner=%ld count=%lu at %p>",
return PyUnicode_FromFormat(
"<%s %s object owner=%" PY_FORMAT_THREAD_IDENT_T " count=%lu at %p>",
self->rlock_count ? "locked" : "unlocked",
Py_TYPE(self)->tp_name, self->rlock_owner,
self->rlock_count, self);
@ -1109,10 +1251,66 @@ PyDoc_STRVAR(daemon_threads_allowed_doc,
Return True if daemon threads are allowed in the current interpreter,\n\
and False otherwise.\n");
static int
do_start_new_thread(thread_module_state* state,
PyObject *func, PyObject* args, PyObject* kwargs,
int joinable,
PyThread_ident_t* ident, PyThread_handle_t* handle)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (!_PyInterpreterState_HasFeature(interp, Py_RTFLAGS_THREADS)) {
PyErr_SetString(PyExc_RuntimeError,
"thread is not supported for isolated subinterpreters");
return -1;
}
if (interp->finalizing) {
PyErr_SetString(PyExc_RuntimeError,
"can't create new thread at interpreter shutdown");
return -1;
}
// gh-109795: Use PyMem_RawMalloc() instead of PyMem_Malloc(),
// because it should be possible to call thread_bootstate_free()
// without holding the GIL.
struct bootstate *boot = PyMem_RawMalloc(sizeof(struct bootstate));
if (boot == NULL) {
PyErr_NoMemory();
return -1;
}
boot->tstate = _PyThreadState_New(interp, _PyThreadState_WHENCE_THREADING);
if (boot->tstate == NULL) {
PyMem_RawFree(boot);
if (!PyErr_Occurred()) {
PyErr_NoMemory();
}
return -1;
}
boot->func = Py_NewRef(func);
boot->args = Py_NewRef(args);
boot->kwargs = Py_XNewRef(kwargs);
int err;
if (joinable) {
err = PyThread_start_joinable_thread(thread_run, (void*) boot, ident, handle);
} else {
*handle = 0;
*ident = PyThread_start_new_thread(thread_run, (void*) boot);
err = (*ident == PYTHREAD_INVALID_THREAD_ID);
}
if (err) {
PyErr_SetString(ThreadError, "can't start new thread");
PyThreadState_Clear(boot->tstate);
thread_bootstate_free(boot, 1);
return -1;
}
return 0;
}
static PyObject *
thread_PyThread_start_new_thread(PyObject *self, PyObject *fargs)
thread_PyThread_start_new_thread(PyObject *module, PyObject *fargs)
{
PyObject *func, *args, *kwargs = NULL;
thread_module_state *state = get_thread_state(module);
if (!PyArg_UnpackTuple(fargs, "start_new_thread", 2, 3,
&func, &args, &kwargs))
@ -1138,57 +1336,73 @@ thread_PyThread_start_new_thread(PyObject *self, PyObject *fargs)
return NULL;
}
PyInterpreterState *interp = _PyInterpreterState_GET();
if (!_PyInterpreterState_HasFeature(interp, Py_RTFLAGS_THREADS)) {
PyErr_SetString(PyExc_RuntimeError,
"thread is not supported for isolated subinterpreters");
PyThread_ident_t ident = 0;
PyThread_handle_t handle;
if (do_start_new_thread(state, func, args, kwargs, /*joinable=*/ 0,
&ident, &handle)) {
return NULL;
}
if (interp->finalizing) {
PyErr_SetString(PyExc_RuntimeError,
"can't create new thread at interpreter shutdown");
return NULL;
}
// gh-109795: Use PyMem_RawMalloc() instead of PyMem_Malloc(),
// because it should be possible to call thread_bootstate_free()
// without holding the GIL.
struct bootstate *boot = PyMem_RawMalloc(sizeof(struct bootstate));
if (boot == NULL) {
return PyErr_NoMemory();
}
boot->tstate = _PyThreadState_New(interp, _PyThreadState_WHENCE_THREADING);
if (boot->tstate == NULL) {
PyMem_RawFree(boot);
if (!PyErr_Occurred()) {
return PyErr_NoMemory();
}
return NULL;
}
boot->func = Py_NewRef(func);
boot->args = Py_NewRef(args);
boot->kwargs = Py_XNewRef(kwargs);
unsigned long ident = PyThread_start_new_thread(thread_run, (void*) boot);
if (ident == PYTHREAD_INVALID_THREAD_ID) {
PyErr_SetString(ThreadError, "can't start new thread");
PyThreadState_Clear(boot->tstate);
thread_bootstate_free(boot, 1);
return NULL;
}
return PyLong_FromUnsignedLong(ident);
return PyLong_FromUnsignedLongLong(ident);
}
PyDoc_STRVAR(start_new_doc,
"start_new_thread(function, args[, kwargs])\n\
(start_new() is an obsolete synonym)\n\
\n\
Start a new thread and return its identifier. The thread will call the\n\
function with positional arguments from the tuple args and keyword arguments\n\
taken from the optional dictionary kwargs. The thread exits when the\n\
function returns; the return value is ignored. The thread will also exit\n\
when the function raises an unhandled exception; a stack trace will be\n\
printed unless the exception is SystemExit.\n");
Start a new thread and return its identifier.\n\
\n\
The thread will call the function with positional arguments from the\n\
tuple args and keyword arguments taken from the optional dictionary\n\
kwargs. The thread exits when the function returns; the return value\n\
is ignored. The thread will also exit when the function raises an\n\
unhandled exception; a stack trace will be printed unless the exception\n\
is SystemExit.\n");
static PyObject *
thread_PyThread_start_joinable_thread(PyObject *module, PyObject *func)
{
thread_module_state *state = get_thread_state(module);
if (!PyCallable_Check(func)) {
PyErr_SetString(PyExc_TypeError,
"thread function must be callable");
return NULL;
}
if (PySys_Audit("_thread.start_joinable_thread", "O", func) < 0) {
return NULL;
}
PyObject* args = PyTuple_New(0);
if (args == NULL) {
return NULL;
}
ThreadHandleObject* hobj = new_thread_handle(state);
if (hobj == NULL) {
Py_DECREF(args);
return NULL;
}
if (do_start_new_thread(state, func, args, /*kwargs=*/ NULL, /*joinable=*/ 1,
&hobj->ident, &hobj->handle)) {
Py_DECREF(args);
Py_DECREF(hobj);
return NULL;
}
Py_DECREF(args);
hobj->joinable = 1;
return (PyObject*) hobj;
}
PyDoc_STRVAR(start_joinable_doc,
"start_joinable_thread(function)\n\
\n\
*For internal use only*: start a new thread.\n\
\n\
Like start_new_thread(), this starts a new thread calling the given function.\n\
Unlike start_new_thread(), this returns a handle object with methods to join\n\
or detach the given thread.\n\
This function is not for third-party code, please use the\n\
`threading` module instead.\n");
static PyObject *
thread_PyThread_exit_thread(PyObject *self, PyObject *Py_UNUSED(ignored))
@ -1248,12 +1462,12 @@ information about locks.");
static PyObject *
thread_get_ident(PyObject *self, PyObject *Py_UNUSED(ignored))
{
unsigned long ident = PyThread_get_thread_ident();
PyThread_ident_t ident = PyThread_get_thread_ident_ex();
if (ident == PYTHREAD_INVALID_THREAD_ID) {
PyErr_SetString(ThreadError, "no current thread ident");
return NULL;
}
return PyLong_FromUnsignedLong(ident);
return PyLong_FromUnsignedLongLong(ident);
}
PyDoc_STRVAR(get_ident_doc,
@ -1440,8 +1654,8 @@ thread_excepthook_file(PyObject *file, PyObject *exc_type, PyObject *exc_value,
Py_DECREF(name);
}
else {
unsigned long ident = PyThread_get_thread_ident();
PyObject *str = PyUnicode_FromFormat("%lu", ident);
PyThread_ident_t ident = PyThread_get_thread_ident_ex();
PyObject *str = PyUnicode_FromFormat("%" PY_FORMAT_THREAD_IDENT_T, ident);
if (str != NULL) {
if (PyFile_WriteObject(str, file, Py_PRINT_RAW) < 0) {
Py_DECREF(str);
@ -1574,6 +1788,8 @@ static PyMethodDef thread_methods[] = {
METH_VARARGS, start_new_doc},
{"start_new", (PyCFunction)thread_PyThread_start_new_thread,
METH_VARARGS, start_new_doc},
{"start_joinable_thread", (PyCFunction)thread_PyThread_start_joinable_thread,
METH_O, start_joinable_doc},
{"daemon_threads_allowed", (PyCFunction)thread_daemon_threads_allowed,
METH_NOARGS, daemon_threads_allowed_doc},
{"allocate_lock", thread_PyThread_allocate_lock,
@ -1617,6 +1833,15 @@ thread_module_exec(PyObject *module)
// Initialize the C thread library
PyThread_init_thread();
// _ThreadHandle
state->thread_handle_type = (PyTypeObject *)PyType_FromSpec(&ThreadHandle_Type_spec);
if (state->thread_handle_type == NULL) {
return -1;
}
if (PyDict_SetItemString(d, "_ThreadHandle", (PyObject *)state->thread_handle_type) < 0) {
return -1;
}
// Lock
state->lock_type = (PyTypeObject *)PyType_FromSpec(&lock_type_spec);
if (state->lock_type == NULL) {
@ -1690,6 +1915,7 @@ thread_module_traverse(PyObject *module, visitproc visit, void *arg)
Py_VISIT(state->lock_type);
Py_VISIT(state->local_type);
Py_VISIT(state->local_dummy_type);
Py_VISIT(state->thread_handle_type);
return 0;
}
@ -1701,6 +1927,7 @@ thread_module_clear(PyObject *module)
Py_CLEAR(state->lock_type);
Py_CLEAR(state->local_type);
Py_CLEAR(state->local_dummy_type);
Py_CLEAR(state->thread_handle_type);
return 0;
}