gh-115999: Make list and tuple iteration more thread-safe. (#128637)

Make tuple iteration more thread-safe, and actually test concurrent iteration of tuple, range and list. (This is prep work for enabling specialization of FOR_ITER in free-threaded builds.) The basic premise is:

Iterating over a shared iterable (list, tuple or range) should be safe, not involve data races, and behave like iteration normally does.

Using a shared iterator should not crash or involve data races, and should only produce items regular iteration would produce. It is not guaranteed to produce all items, or produce each item only once. (This is not the case for range iteration even after this PR.)

Providing stronger guarantees is possible for some of these iterators, but it's not always straight-forward and can significantly hamper the common case. Since iterators in general aren't shared between threads, and it's simply impossible to concurrently use many iterators (like generators), better to make sharing iterators without explicit synchronization clearly wrong.

Specific issues fixed in order to make the tests pass:

 - List iteration could occasionally fail an assertion when a shared list was shrunk and an item past the new end was retrieved concurrently. There's still some unsafety when deleting/inserting multiple items through for example slice assignment, which uses memmove/memcpy.

 - Tuple iteration could occasionally crash when the iterator's reference to the tuple was cleared on exhaustion. Like with list iteration, in free-threaded builds we can't safely and efficiently clear the iterator's reference to the iterable (doing it safely would mean extra, slow refcount operations), so just keep the iterable reference around.
This commit is contained in:
T. Wouters 2025-02-18 16:52:46 -08:00 committed by GitHub
parent 736ad664e0
commit 388e1ca9f0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 177 additions and 18 deletions

View file

@ -357,7 +357,7 @@ list_get_item_ref(PyListObject *op, Py_ssize_t i)
return NULL;
}
Py_ssize_t cap = list_capacity(ob_item);
assert(cap != -1 && cap >= size);
assert(cap != -1);
if (!valid_index(i, cap)) {
return NULL;
}
@ -784,7 +784,8 @@ list_repeat_lock_held(PyListObject *a, Py_ssize_t n)
_Py_RefcntAdd(*src, n);
*dest++ = *src++;
}
// TODO: _Py_memory_repeat calls are not safe for shared lists in
// GIL_DISABLED builds. (See issue #129069)
_Py_memory_repeat((char *)np->ob_item, sizeof(PyObject *)*output_size,
sizeof(PyObject *)*input_size);
}
@ -919,6 +920,8 @@ list_ass_slice_lock_held(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyO
if (d < 0) { /* Delete -d items */
Py_ssize_t tail;
tail = (Py_SIZE(a) - ihigh) * sizeof(PyObject *);
// TODO: these memmove/memcpy calls are not safe for shared lists in
// GIL_DISABLED builds. (See issue #129069)
memmove(&item[ihigh+d], &item[ihigh], tail);
if (list_resize(a, Py_SIZE(a) + d) < 0) {
memmove(&item[ihigh], &item[ihigh+d], tail);
@ -932,12 +935,14 @@ list_ass_slice_lock_held(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyO
if (list_resize(a, k+d) < 0)
goto Error;
item = a->ob_item;
// TODO: these memmove/memcpy calls are not safe for shared lists in
// GIL_DISABLED builds. (See issue #129069)
memmove(&item[ihigh+d], &item[ihigh],
(k - ihigh)*sizeof(PyObject *));
}
for (k = 0; k < n; k++, ilow++) {
PyObject *w = vitem[k];
item[ilow] = Py_XNewRef(w);
FT_ATOMIC_STORE_PTR_RELEASE(item[ilow], Py_XNewRef(w));
}
for (k = norig - 1; k >= 0; --k)
Py_XDECREF(recycle[k]);
@ -1017,6 +1022,8 @@ list_inplace_repeat_lock_held(PyListObject *self, Py_ssize_t n)
for (Py_ssize_t j = 0; j < input_size; j++) {
_Py_RefcntAdd(items[j], n-1);
}
// TODO: _Py_memory_repeat calls are not safe for shared lists in
// GIL_DISABLED builds. (See issue #129069)
_Py_memory_repeat((char *)items, sizeof(PyObject *)*output_size,
sizeof(PyObject *)*input_size);
return 0;
@ -3993,7 +4000,7 @@ listiter_setstate(PyObject *self, PyObject *state)
index = -1;
else if (index > PyList_GET_SIZE(it->it_seq))
index = PyList_GET_SIZE(it->it_seq); /* iterator exhausted */
it->it_index = index;
FT_ATOMIC_STORE_SSIZE_RELAXED(it->it_index, index);
}
Py_RETURN_NONE;
}
@ -4145,7 +4152,7 @@ listreviter_setstate(PyObject *self, PyObject *state)
index = -1;
else if (index > PyList_GET_SIZE(it->it_seq) - 1)
index = PyList_GET_SIZE(it->it_seq) - 1;
it->it_index = index;
FT_ATOMIC_STORE_SSIZE_RELAXED(it->it_index, index);
}
Py_RETURN_NONE;
}
@ -4162,18 +4169,19 @@ listiter_reduce_general(void *_it, int forward)
* call must be before access of iterator pointers.
* see issue #101765 */
/* the objects are not the same, index is of different types! */
if (forward) {
iter = _PyEval_GetBuiltin(&_Py_ID(iter));
_PyListIterObject *it = (_PyListIterObject *)_it;
if (it->it_index >= 0) {
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
Py_ssize_t idx = FT_ATOMIC_LOAD_SSIZE_RELAXED(it->it_index);
if (idx >= 0) {
return Py_BuildValue("N(O)n", iter, it->it_seq, idx);
}
} else {
iter = _PyEval_GetBuiltin(&_Py_ID(reversed));
listreviterobject *it = (listreviterobject *)_it;
if (it->it_index >= 0) {
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
Py_ssize_t idx = FT_ATOMIC_LOAD_SSIZE_RELAXED(it->it_index);
if (idx >= 0) {
return Py_BuildValue("N(O)n", iter, it->it_seq, idx);
}
}
/* empty iterator, create an empty list */

View file

@ -1014,18 +1014,23 @@ tupleiter_next(PyObject *self)
assert(it != NULL);
seq = it->it_seq;
#ifndef Py_GIL_DISABLED
if (seq == NULL)
return NULL;
#endif
assert(PyTuple_Check(seq));
if (it->it_index < PyTuple_GET_SIZE(seq)) {
item = PyTuple_GET_ITEM(seq, it->it_index);
++it->it_index;
Py_ssize_t index = FT_ATOMIC_LOAD_SSIZE_RELAXED(it->it_index);
if (index < PyTuple_GET_SIZE(seq)) {
FT_ATOMIC_STORE_SSIZE_RELAXED(it->it_index, index + 1);
item = PyTuple_GET_ITEM(seq, index);
return Py_NewRef(item);
}
#ifndef Py_GIL_DISABLED
it->it_seq = NULL;
Py_DECREF(seq);
#endif
return NULL;
}
@ -1034,8 +1039,15 @@ tupleiter_len(PyObject *self, PyObject *Py_UNUSED(ignored))
{
_PyTupleIterObject *it = _PyTupleIterObject_CAST(self);
Py_ssize_t len = 0;
#ifdef Py_GIL_DISABLED
Py_ssize_t idx = FT_ATOMIC_LOAD_SSIZE_RELAXED(it->it_index);
Py_ssize_t seq_len = PyTuple_GET_SIZE(it->it_seq);
if (idx < seq_len)
len = seq_len - idx;
#else
if (it->it_seq)
len = PyTuple_GET_SIZE(it->it_seq) - it->it_index;
#endif
return PyLong_FromSsize_t(len);
}
@ -1051,10 +1063,15 @@ tupleiter_reduce(PyObject *self, PyObject *Py_UNUSED(ignored))
* see issue #101765 */
_PyTupleIterObject *it = _PyTupleIterObject_CAST(self);
#ifdef Py_GIL_DISABLED
Py_ssize_t idx = FT_ATOMIC_LOAD_SSIZE_RELAXED(it->it_index);
if (idx < PyTuple_GET_SIZE(it->it_seq))
return Py_BuildValue("N(O)n", iter, it->it_seq, idx);
#else
if (it->it_seq)
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
else
return Py_BuildValue("N(())", iter);
#endif
return Py_BuildValue("N(())", iter);
}
static PyObject *
@ -1069,7 +1086,7 @@ tupleiter_setstate(PyObject *self, PyObject *state)
index = 0;
else if (index > PyTuple_GET_SIZE(it->it_seq))
index = PyTuple_GET_SIZE(it->it_seq); /* exhausted iterator */
it->it_index = index;
FT_ATOMIC_STORE_SSIZE_RELAXED(it->it_index, index);
}
Py_RETURN_NONE;
}