gh-100228: Warn from os.fork() if other threads exist. (#100229)

Not comprehensive, best effort warning. There are cases when threads exist on some platforms that this code cannot detect. macOS when API permissions allow and Linux with a readable /proc procfs present are the currently supported cases where a warning should show up reliably.

Starting with a DeprecationWarning for now, it is less disruptive than something like RuntimeWarning and most likely to only be seen in people's CI tests - a good place to start with this messaging.
This commit is contained in:
Gregory P. Smith 2022-12-29 14:41:39 -08:00 committed by GitHub
parent 2df82db485
commit 894f2c3c16
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 283 additions and 66 deletions

View file

@ -72,6 +72,8 @@
*/
#if defined(__APPLE__)
#include <mach/mach.h>
#if defined(__has_builtin)
#if __has_builtin(__builtin_available)
#define HAVE_BUILTIN_AVAILABLE 1
@ -6745,6 +6747,104 @@ os_register_at_fork_impl(PyObject *module, PyObject *before,
}
#endif /* HAVE_FORK */
// Common code to raise a warning if we detect there is more than one thread
// running in the process. Best effort, silent if unable to count threads.
// Constraint: Quick. Never overcounts. Never leaves an error set.
//
// This code might do an import, thus acquiring the import lock, which
// PyOS_BeforeFork() also does. As this should only be called from
// the parent process, it is in the same thread so that works.
static void warn_about_fork_with_threads(const char* name) {
// TODO: Consider making an `os` module API to return the current number
// of threads in the process. That'd presumably use this platform code but
// raise an error rather than using the inaccurate fallback.
Py_ssize_t num_python_threads = 0;
#if defined(__APPLE__) && defined(HAVE_GETPID)
mach_port_t macos_self = mach_task_self();
mach_port_t macos_task;
if (task_for_pid(macos_self, getpid(), &macos_task) == KERN_SUCCESS) {
thread_array_t macos_threads;
mach_msg_type_number_t macos_n_threads;
if (task_threads(macos_task, &macos_threads,
&macos_n_threads) == KERN_SUCCESS) {
num_python_threads = macos_n_threads;
}
}
#elif defined(__linux__)
// Linux /proc/self/stat 20th field is the number of threads.
FILE* proc_stat = fopen("/proc/self/stat", "r");
if (proc_stat) {
size_t n;
// Size chosen arbitrarily. ~60% more bytes than a 20th column index
// observed on the author's workstation.
char stat_line[160];
n = fread(&stat_line, 1, 159, proc_stat);
stat_line[n] = '\0';
fclose(proc_stat);
char *saveptr = NULL;
char *field = strtok_r(stat_line, " ", &saveptr);
unsigned int idx;
for (idx = 19; idx && field; --idx) {
field = strtok_r(NULL, " ", &saveptr);
}
if (idx == 0 && field) { // found the 20th field
num_python_threads = atoi(field); // 0 on error
}
}
#endif
if (num_python_threads <= 0) {
// Fall back to just the number our threading module knows about.
// An incomplete view of the world, but better than nothing.
PyObject *threading = PyImport_GetModule(&_Py_ID(threading));
if (!threading) {
PyErr_Clear();
return;
}
PyObject *threading_active =
PyObject_GetAttr(threading, &_Py_ID(_active));
if (!threading_active) {
PyErr_Clear();
Py_DECREF(threading);
return;
}
PyObject *threading_limbo =
PyObject_GetAttr(threading, &_Py_ID(_limbo));
if (!threading_limbo) {
PyErr_Clear();
Py_DECREF(threading);
Py_DECREF(threading_active);
return;
}
Py_DECREF(threading);
// Duplicating what threading.active_count() does but without holding
// threading._active_limbo_lock so our count could be inaccurate if
// these dicts are mid-update from another thread. Not a big deal.
// Worst case if someone replaced threading._active or threading._limbo
// with non-dicts, we get -1 from *Length() below and undercount.
// Nobody should, but we're best effort so we clear errors and move on.
num_python_threads = (PyMapping_Length(threading_active)
+ PyMapping_Length(threading_limbo));
PyErr_Clear();
Py_DECREF(threading_active);
Py_DECREF(threading_limbo);
}
if (num_python_threads > 1) {
PyErr_WarnFormat(
PyExc_DeprecationWarning, 1,
#ifdef HAVE_GETPID
"This process (pid=%d) is multi-threaded, "
#else
"This process is multi-threaded, "
#endif
"use of %s() may lead to deadlocks in the child.",
#ifdef HAVE_GETPID
getpid(),
#endif
name);
PyErr_Clear();
}
}
#ifdef HAVE_FORK1
/*[clinic input]
@ -6771,6 +6871,7 @@ os_fork1_impl(PyObject *module)
/* child: this clobbers and resets the import lock. */
PyOS_AfterFork_Child();
} else {
warn_about_fork_with_threads("fork1");
/* parent: release the import lock. */
PyOS_AfterFork_Parent();
}
@ -6810,6 +6911,7 @@ os_fork_impl(PyObject *module)
/* child: this clobbers and resets the import lock. */
PyOS_AfterFork_Child();
} else {
warn_about_fork_with_threads("fork");
/* parent: release the import lock. */
PyOS_AfterFork_Parent();
}
@ -7479,6 +7581,7 @@ os_forkpty_impl(PyObject *module)
/* child: this clobbers and resets the import lock. */
PyOS_AfterFork_Child();
} else {
warn_about_fork_with_threads("forkpty");
/* parent: release the import lock. */
PyOS_AfterFork_Parent();
}