gh-115999: Add free-threaded specialization for FOR_ITER (#128798)

Add free-threaded versions of existing specialization for FOR_ITER (list, tuples, fast range iterators and generators), without significantly affecting their thread-safety. (Iterating over shared lists/tuples/ranges should be fine like before. Reusing iterators between threads is not fine, like before. Sharing generators between threads is a recipe for significant crashes, like before.)
This commit is contained in:
T. Wouters 2025-03-12 16:21:46 +01:00 committed by GitHub
parent db27aee2fe
commit de2f7da77d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 469 additions and 125 deletions

View file

@ -5434,7 +5434,7 @@
iter = stack_pointer[-1];
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -5444,7 +5444,7 @@
}
OPCODE_DEFERRED_INC(FOR_ITER);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
}
// _FOR_ITER
{
@ -5514,6 +5514,17 @@
assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
JUMP_TO_PREDICTED(FOR_ITER);
}
#ifdef Py_GIL_DISABLED
// Since generators can't be used by multiple threads anyway we
// don't need to deopt here, but this lets us work on making
// generators thread-safe without necessarily having to
// specialize them thread-safely as well.
if (!_PyObject_IsUniquelyReferenced((PyObject *)gen)) {
UPDATE_MISS_STATS(FOR_ITER);
assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
JUMP_TO_PREDICTED(FOR_ITER);
}
#endif
if (gen->gi_frame_state >= FRAME_EXECUTING) {
UPDATE_MISS_STATS(FOR_ITER);
assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
@ -5565,33 +5576,55 @@
// _ITER_CHECK_LIST
{
iter = stack_pointer[-1];
if (Py_TYPE(PyStackRef_AsPyObjectBorrow(iter)) != &PyListIter_Type) {
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
if (Py_TYPE(iter_o) != &PyListIter_Type) {
UPDATE_MISS_STATS(FOR_ITER);
assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
JUMP_TO_PREDICTED(FOR_ITER);
}
#ifdef Py_GIL_DISABLED
if (!_PyObject_IsUniquelyReferenced(iter_o)) {
UPDATE_MISS_STATS(FOR_ITER);
assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
JUMP_TO_PREDICTED(FOR_ITER);
}
_PyListIterObject *it = (_PyListIterObject *)iter_o;
if (!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) ||
!_PyObject_GC_IS_SHARED(it->it_seq)) {
UPDATE_MISS_STATS(FOR_ITER);
assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
JUMP_TO_PREDICTED(FOR_ITER);
}
#endif
}
// _ITER_JUMP_LIST
{
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
_PyListIterObject *it = (_PyListIterObject *)iter_o;
assert(Py_TYPE(iter_o) == &PyListIter_Type);
// For free-threaded Python, the loop exit can happen at any point during
// item retrieval, so it doesn't make much sense to check and jump
// separately before item retrieval. Any length check we do here can be
// invalid by the time we actually try to fetch the item.
#ifdef Py_GIL_DISABLED
assert(_PyObject_IsUniquelyReferenced(iter_o));
(void)iter_o;
#else
_PyListIterObject *it = (_PyListIterObject *)iter_o;
STAT_INC(FOR_ITER, hit);
PyListObject *seq = it->it_seq;
if (seq == NULL || (size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)) {
it->it_index = -1;
#ifndef Py_GIL_DISABLED
if (seq != NULL) {
it->it_seq = NULL;
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(seq);
stack_pointer = _PyFrame_GetStackPointer(frame);
}
#endif
/* Jump forward oparg, then skip following END_FOR instruction */
JUMPBY(oparg + 1);
DISPATCH();
}
#endif
}
// _ITER_NEXT_LIST
{
@ -5600,8 +5633,32 @@
assert(Py_TYPE(iter_o) == &PyListIter_Type);
PyListObject *seq = it->it_seq;
assert(seq);
#ifdef Py_GIL_DISABLED
assert(_PyObject_IsUniquelyReferenced(iter_o));
assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) ||
_PyObject_GC_IS_SHARED(seq));
STAT_INC(FOR_ITER, hit);
_PyFrame_SetStackPointer(frame, stack_pointer);
int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next);
stack_pointer = _PyFrame_GetStackPointer(frame);
// A negative result means we lost a race with another thread
// and we need to take the slow path.
if (result < 0) {
UPDATE_MISS_STATS(FOR_ITER);
assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
JUMP_TO_PREDICTED(FOR_ITER);
}
if (result == 0) {
it->it_index = -1;
/* Jump forward oparg, then skip following END_FOR instruction */
JUMPBY(oparg + 1);
DISPATCH();
}
it->it_index++;
#else
assert(it->it_index < PyList_GET_SIZE(seq));
next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++));
#endif
}
stack_pointer[0] = next;
stack_pointer += 1;
@ -5632,11 +5689,21 @@
assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
JUMP_TO_PREDICTED(FOR_ITER);
}
#ifdef Py_GIL_DISABLED
if (!_PyObject_IsUniquelyReferenced((PyObject *)r)) {
UPDATE_MISS_STATS(FOR_ITER);
assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
JUMP_TO_PREDICTED(FOR_ITER);
}
#endif
}
// _ITER_JUMP_RANGE
{
_PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter);
assert(Py_TYPE(r) == &PyRangeIter_Type);
#ifdef Py_GIL_DISABLED
assert(_PyObject_IsUniquelyReferenced((PyObject *)r));
#endif
STAT_INC(FOR_ITER, hit);
if (r->len <= 0) {
// Jump over END_FOR instruction.
@ -5648,6 +5715,9 @@
{
_PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter);
assert(Py_TYPE(r) == &PyRangeIter_Type);
#ifdef Py_GIL_DISABLED
assert(_PyObject_IsUniquelyReferenced((PyObject *)r));
#endif
assert(r->len > 0);
long value = r->start;
r->start = value + r->step;
@ -5681,26 +5751,40 @@
// _ITER_CHECK_TUPLE
{
iter = stack_pointer[-1];
if (Py_TYPE(PyStackRef_AsPyObjectBorrow(iter)) != &PyTupleIter_Type) {
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
if (Py_TYPE(iter_o) != &PyTupleIter_Type) {
UPDATE_MISS_STATS(FOR_ITER);
assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
JUMP_TO_PREDICTED(FOR_ITER);
}
#ifdef Py_GIL_DISABLED
if (!_PyObject_IsUniquelyReferenced(iter_o)) {
UPDATE_MISS_STATS(FOR_ITER);
assert(_PyOpcode_Deopt[opcode] == (FOR_ITER));
JUMP_TO_PREDICTED(FOR_ITER);
}
#endif
}
// _ITER_JUMP_TUPLE
{
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
_PyTupleIterObject *it = (_PyTupleIterObject *)iter_o;
(void)iter_o;
assert(Py_TYPE(iter_o) == &PyTupleIter_Type);
#ifdef Py_GIL_DISABLED
assert(_PyObject_IsUniquelyReferenced(iter_o));
#endif
_PyTupleIterObject *it = (_PyTupleIterObject *)iter_o;
STAT_INC(FOR_ITER, hit);
PyTupleObject *seq = it->it_seq;
if (seq == NULL || it->it_index >= PyTuple_GET_SIZE(seq)) {
if (seq == NULL || (size_t)it->it_index >= (size_t)PyTuple_GET_SIZE(seq)) {
#ifndef Py_GIL_DISABLED
if (seq != NULL) {
it->it_seq = NULL;
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(seq);
stack_pointer = _PyFrame_GetStackPointer(frame);
}
#endif
/* Jump forward oparg, then skip following END_FOR instruction */
JUMPBY(oparg + 1);
DISPATCH();
@ -5712,6 +5796,9 @@
_PyTupleIterObject *it = (_PyTupleIterObject *)iter_o;
assert(Py_TYPE(iter_o) == &PyTupleIter_Type);
PyTupleObject *seq = it->it_seq;
#ifdef Py_GIL_DISABLED
assert(_PyObject_IsUniquelyReferenced(iter_o));
#endif
assert(seq);
assert(it->it_index < PyTuple_GET_SIZE(seq));
next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++));