[3.11] gh-106883 Fix deadlock in threaded application (#117332)

When using threaded applications, there is a high risk of a deadlock in
the interpreter. It's a lock ordering deadlock with HEAD_LOCK(&_PyRuntime); and the GIL.

By disabling the GC during the _PyThread_CurrentFrames() and
_PyThread_CurrentExceptions() calls fixes the issue.
This commit is contained in:
Diego Russo 2025-03-11 15:31:03 +00:00 committed by GitHub
parent f1897d312e
commit 6b37486184
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 93 additions and 0 deletions

View file

@ -14,6 +14,7 @@ from test.support import os_helper
from test.support.script_helper import assert_python_ok, assert_python_failure
from test.support import threading_helper
from test.support import import_helper
from test.support import skip_if_sanitizer
import textwrap
import unittest
import warnings
@ -471,6 +472,79 @@ class SysModuleTest(unittest.TestCase):
leave_g.set()
t.join()
@skip_if_sanitizer(memory=True, address=True, reason= "Test too slow "
"when the address sanitizer is enabled.")
@threading_helper.reap_threads
@threading_helper.requires_working_threading()
@support.requires_fork()
def test_current_frames_exceptions_deadlock(self):
"""
Reproduce the bug raised in GH-106883 and GH-116969.
"""
import threading
import time
import signal
class MockObject:
def __init__(self):
# Create some garbage
self._list = list(range(10000))
# Call the functions under test
self._trace = sys._current_frames()
self._exceptions = sys._current_exceptions()
def __del__(self):
# The presence of the __del__ method causes the deadlock when
# there is one thread executing the _current_frames or
# _current_exceptions functions and the other thread is
# running the GC:
# thread 1 has the interpreter lock and it is trying to
# acquire the GIL; thread 2 holds the GIL but is trying to
# acquire the interpreter lock.
# When the GC is running and it finds that an
# object has the __del__ method, it needs to execute the
# Python code in it and it requires the GIL to execute it
# (which will never happen because it is held by another thread
# blocked on the acquisition of the interpreter lock)
pass
def thread_function(num_objects):
obj = None
for _ in range(num_objects):
obj = MockObject()
# The number of objects should be big enough to increase the
# chances to call the GC.
NUM_OBJECTS = 1000
NUM_THREADS = 10
# 40 seconds should be enough for the test to be executed: if it
# is more than 40 seconds it means that the process is in deadlock
# hence the test fails
TIMEOUT = 40
# Test the sys._current_frames and sys._current_exceptions calls
pid = os.fork()
if pid: # parent process
try:
support.wait_process(pid, exitcode=0, timeout=TIMEOUT)
except KeyboardInterrupt:
# When pressing CTRL-C kill the deadlocked process
os.kill(pid, signal.SIGTERM)
raise
else: # child process
# Run the actual test in the forked process.
threads = []
for i in range(NUM_THREADS):
thread = threading.Thread(
target=thread_function, args=(NUM_OBJECTS,)
)
threads.append(thread)
thread.start()
for t in threads:
t.join()
os._exit(0)
@threading_helper.reap_threads
@threading_helper.requires_working_threading()
def test_current_exceptions(self):

View file

@ -0,0 +1 @@
Disable GC during the _PyThread_CurrentFrames() and _PyThread_CurrentExceptions() calls to avoid the interpreter to deadlock.

View file

@ -1398,6 +1398,9 @@ _PyThread_CurrentFrames(void)
return NULL;
}
// gh-106883: Disable the GC as this can cause the interpreter to deadlock
int gc_was_enabled = PyGC_Disable();
/* for i in all interpreters:
* for t in all of i's thread states:
* if t's frame isn't NULL, map t's id to its frame
@ -1440,6 +1443,12 @@ fail:
done:
HEAD_UNLOCK(runtime);
// Once the runtime is released, the GC can be reenabled.
if (gc_was_enabled) {
PyGC_Enable();
}
return result;
}
@ -1459,6 +1468,9 @@ _PyThread_CurrentExceptions(void)
return NULL;
}
// gh-106883: Disable the GC as this can cause the interpreter to deadlock
int gc_was_enabled = PyGC_Disable();
/* for i in all interpreters:
* for t in all of i's thread states:
* if t's frame isn't NULL, map t's id to its frame
@ -1499,6 +1511,12 @@ fail:
done:
HEAD_UNLOCK(runtime);
// Once the runtime is released, the GC can be reenabled.
if (gc_was_enabled) {
PyGC_Enable();
}
return result;
}