gh-132641: fix race in lru_cache under free-threading (#133787)

Fix race in `lru_cache` by acquiring critical section on the cache object itself and call the lock held variant of dict functions to modify the underlying dict.
This commit is contained in:
Peter Hawkins 2025-05-13 13:38:57 -04:00 committed by GitHub
parent 35f47d0589
commit 9ad0c7b0f1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 94 additions and 5 deletions

View file

@ -150,6 +150,8 @@ extern int _PyDict_Pop_KnownHash(
Py_hash_t hash,
PyObject **result);
extern void _PyDict_Clear_LockHeld(PyObject *op);
#ifdef Py_GIL_DISABLED
PyAPI_FUNC(void) _PyDict_EnsureSharedOnRead(PyDictObject *mp);
#endif

View file

@ -0,0 +1,75 @@
import random
import unittest
from functools import lru_cache
from threading import Barrier, Thread
from test.support import threading_helper
@threading_helper.requires_working_threading()
class TestLRUCache(unittest.TestCase):
def _test_concurrent_operations(self, maxsize):
num_threads = 10
b = Barrier(num_threads)
@lru_cache(maxsize=maxsize)
def func(arg=0):
return object()
def thread_func():
b.wait()
for i in range(1000):
r = random.randint(0, 1000)
if i < 800:
func(i)
elif i < 900:
func.cache_info()
else:
func.cache_clear()
threads = []
for i in range(num_threads):
t = Thread(target=thread_func)
threads.append(t)
with threading_helper.start_threads(threads):
pass
def test_concurrent_operations_unbounded(self):
self._test_concurrent_operations(maxsize=None)
def test_concurrent_operations_bounded(self):
self._test_concurrent_operations(maxsize=128)
def _test_reentrant_cache_clear(self, maxsize):
num_threads = 10
b = Barrier(num_threads)
@lru_cache(maxsize=maxsize)
def func(arg=0):
func.cache_clear()
return object()
def thread_func():
b.wait()
for i in range(1000):
func(random.randint(0, 10000))
threads = []
for i in range(num_threads):
t = Thread(target=thread_func)
threads.append(t)
with threading_helper.start_threads(threads):
pass
def test_reentrant_cache_clear_unbounded(self):
self._test_reentrant_cache_clear(maxsize=None)
def test_reentrant_cache_clear_bounded(self):
self._test_reentrant_cache_clear(maxsize=128)
if __name__ == "__main__":
unittest.main()

View file

@ -0,0 +1 @@
Fixed a race in :func:`functools.lru_cache` under free-threading.

View file

@ -1383,8 +1383,8 @@ bounded_lru_cache_update_lock_held(lru_cache_object *self,
this same key, then this setitem call will update the cache dict
with this new link, leaving the old link as an orphan (i.e. not
having a cache dict entry that refers to it). */
if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link,
hash) < 0) {
if (_PyDict_SetItem_KnownHash_LockHeld((PyDictObject *)self->cache, key,
(PyObject *)link, hash) < 0) {
Py_DECREF(link);
return NULL;
}
@ -1453,8 +1453,8 @@ bounded_lru_cache_update_lock_held(lru_cache_object *self,
for successful insertion in the cache dict before adding the
link to the linked list. Otherwise, the potentially reentrant
__eq__ call could cause the then orphan link to be visited. */
if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link,
hash) < 0) {
if (_PyDict_SetItem_KnownHash_LockHeld((PyDictObject *)self->cache, key,
(PyObject *)link, hash) < 0) {
/* Somehow the cache dict update failed. We no longer can
restore the old link. Let the error propagate upward and
leave the cache short one link. */
@ -1689,7 +1689,13 @@ _functools__lru_cache_wrapper_cache_clear_impl(PyObject *self)
lru_list_elem *list = lru_cache_unlink_list(_self);
FT_ATOMIC_STORE_SSIZE_RELAXED(_self->hits, 0);
FT_ATOMIC_STORE_SSIZE_RELAXED(_self->misses, 0);
PyDict_Clear(_self->cache);
if (_self->wrapper == bounded_lru_cache_wrapper) {
/* The critical section on the lru cache itself protects the dictionary
for bounded_lru_cache instances. */
_PyDict_Clear_LockHeld(_self->cache);
} else {
PyDict_Clear(_self->cache);
}
lru_cache_clear_list(list);
Py_RETURN_NONE;
}

View file

@ -2915,6 +2915,11 @@ clear_lock_held(PyObject *op)
ASSERT_CONSISTENT(mp);
}
void
_PyDict_Clear_LockHeld(PyObject *op) {
clear_lock_held(op);
}
void
PyDict_Clear(PyObject *op)
{