mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
gh-114271: Fix race in Thread.join()
(#114839)
There is a race between when `Thread._tstate_lock` is released[^1] in `Thread._wait_for_tstate_lock()` and when `Thread._stop()` asserts[^2] that it is unlocked. Consider the following execution involving threads A, B, and C: 1. A starts. 2. B joins A, blocking on its `_tstate_lock`. 3. C joins A, blocking on its `_tstate_lock`. 4. A finishes and releases its `_tstate_lock`. 5. B acquires A's `_tstate_lock` in `_wait_for_tstate_lock()`, releases it, but is swapped out before calling `_stop()`. 6. C is scheduled, acquires A's `_tstate_lock` in `_wait_for_tstate_lock()` but is swapped out before releasing it. 7. B is scheduled, calls `_stop()`, which asserts that A's `_tstate_lock` is not held. However, C holds it, so the assertion fails. The race can be reproduced[^3] by inserting sleeps at the appropriate points in the threading code. To do so, run the `repro_join_race.py` from the linked repo. There are two main parts to this PR: 1. `_tstate_lock` is replaced with an event that is attached to `PyThreadState`. The event is set by the runtime prior to the thread being cleared (in the same place that `_tstate_lock` was released). `Thread.join()` blocks waiting for the event to be set. 2. `_PyInterpreterState_WaitForThreads()` provides the ability to wait for all non-daemon threads to exit. To do so, an `is_daemon` predicate was added to `PyThreadState`. This field is set each time a thread is created. `threading._shutdown()` now calls into `_PyInterpreterState_WaitForThreads()` instead of waiting on `_tstate_lock`s. [^1]:441affc9e7/Lib/threading.py (L1201)
[^2]:441affc9e7/Lib/threading.py (L1115)
[^3]:8194653279
--------- Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Antoine Pitrou <antoine@python.org>
This commit is contained in:
parent
86bc40dd41
commit
33da0e844c
12 changed files with 771 additions and 643 deletions
|
@ -289,6 +289,54 @@ class ThreadRunningTests(BasicThreadTest):
|
|||
with self.assertRaisesRegex(RuntimeError, "Cannot join current thread"):
|
||||
raise error
|
||||
|
||||
def test_join_with_timeout(self):
|
||||
lock = thread.allocate_lock()
|
||||
lock.acquire()
|
||||
|
||||
def thr():
|
||||
lock.acquire()
|
||||
|
||||
with threading_helper.wait_threads_exit():
|
||||
handle = thread.start_joinable_thread(thr)
|
||||
handle.join(0.1)
|
||||
self.assertFalse(handle.is_done())
|
||||
lock.release()
|
||||
handle.join()
|
||||
self.assertTrue(handle.is_done())
|
||||
|
||||
def test_join_unstarted(self):
|
||||
handle = thread._ThreadHandle()
|
||||
with self.assertRaisesRegex(RuntimeError, "thread not started"):
|
||||
handle.join()
|
||||
|
||||
def test_set_done_unstarted(self):
|
||||
handle = thread._ThreadHandle()
|
||||
with self.assertRaisesRegex(RuntimeError, "thread not started"):
|
||||
handle._set_done()
|
||||
|
||||
def test_start_duplicate_handle(self):
|
||||
lock = thread.allocate_lock()
|
||||
lock.acquire()
|
||||
|
||||
def func():
|
||||
lock.acquire()
|
||||
|
||||
handle = thread._ThreadHandle()
|
||||
with threading_helper.wait_threads_exit():
|
||||
thread.start_joinable_thread(func, handle=handle)
|
||||
with self.assertRaisesRegex(RuntimeError, "thread already started"):
|
||||
thread.start_joinable_thread(func, handle=handle)
|
||||
lock.release()
|
||||
handle.join()
|
||||
|
||||
def test_start_with_none_handle(self):
|
||||
def func():
|
||||
pass
|
||||
|
||||
with threading_helper.wait_threads_exit():
|
||||
handle = thread.start_joinable_thread(func, handle=None)
|
||||
handle.join()
|
||||
|
||||
|
||||
class Barrier:
|
||||
def __init__(self, num_threads):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue