gh-106529: Make FOR_ITER a viable uop (#112134)

This uses the new mechanism whereby certain uops
are replaced by others during translation,
using the `_PyUop_Replacements` table.
We further special-case the `_FOR_ITER_TIER_TWO` uop
to update the deoptimization target to point
just past the corresponding `END_FOR` opcode.

Two tiny code cleanups are also part of this PR.
This commit is contained in:
Guido van Rossum 2023-11-20 10:08:53 -08:00 committed by GitHub
parent d59feb5dbe
commit 1995955173
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 138 additions and 43 deletions

View file

@ -81,45 +81,46 @@
#define _IS_NONE 353 #define _IS_NONE 353
#define _SPECIALIZE_FOR_ITER 354 #define _SPECIALIZE_FOR_ITER 354
#define _FOR_ITER 355 #define _FOR_ITER 355
#define _ITER_CHECK_LIST 356 #define _FOR_ITER_TIER_TWO 356
#define _ITER_JUMP_LIST 357 #define _ITER_CHECK_LIST 357
#define _GUARD_NOT_EXHAUSTED_LIST 358 #define _ITER_JUMP_LIST 358
#define _ITER_NEXT_LIST 359 #define _GUARD_NOT_EXHAUSTED_LIST 359
#define _ITER_CHECK_TUPLE 360 #define _ITER_NEXT_LIST 360
#define _ITER_JUMP_TUPLE 361 #define _ITER_CHECK_TUPLE 361
#define _GUARD_NOT_EXHAUSTED_TUPLE 362 #define _ITER_JUMP_TUPLE 362
#define _ITER_NEXT_TUPLE 363 #define _GUARD_NOT_EXHAUSTED_TUPLE 363
#define _ITER_CHECK_RANGE 364 #define _ITER_NEXT_TUPLE 364
#define _ITER_JUMP_RANGE 365 #define _ITER_CHECK_RANGE 365
#define _GUARD_NOT_EXHAUSTED_RANGE 366 #define _ITER_JUMP_RANGE 366
#define _ITER_NEXT_RANGE 367 #define _GUARD_NOT_EXHAUSTED_RANGE 367
#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 368 #define _ITER_NEXT_RANGE 368
#define _GUARD_KEYS_VERSION 369 #define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 369
#define _LOAD_ATTR_METHOD_WITH_VALUES 370 #define _GUARD_KEYS_VERSION 370
#define _LOAD_ATTR_METHOD_NO_DICT 371 #define _LOAD_ATTR_METHOD_WITH_VALUES 371
#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 372 #define _LOAD_ATTR_METHOD_NO_DICT 372
#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 373 #define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 373
#define _CHECK_ATTR_METHOD_LAZY_DICT 374 #define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 374
#define _LOAD_ATTR_METHOD_LAZY_DICT 375 #define _CHECK_ATTR_METHOD_LAZY_DICT 375
#define _SPECIALIZE_CALL 376 #define _LOAD_ATTR_METHOD_LAZY_DICT 376
#define _CALL 377 #define _SPECIALIZE_CALL 377
#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 378 #define _CALL 378
#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 379 #define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 379
#define _CHECK_PEP_523 380 #define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 380
#define _CHECK_FUNCTION_EXACT_ARGS 381 #define _CHECK_PEP_523 381
#define _CHECK_STACK_SPACE 382 #define _CHECK_FUNCTION_EXACT_ARGS 382
#define _INIT_CALL_PY_EXACT_ARGS 383 #define _CHECK_STACK_SPACE 383
#define _PUSH_FRAME 384 #define _INIT_CALL_PY_EXACT_ARGS 384
#define _SPECIALIZE_BINARY_OP 385 #define _PUSH_FRAME 385
#define _BINARY_OP 386 #define _SPECIALIZE_BINARY_OP 386
#define _GUARD_IS_TRUE_POP 387 #define _BINARY_OP 387
#define _GUARD_IS_FALSE_POP 388 #define _GUARD_IS_TRUE_POP 388
#define _GUARD_IS_NONE_POP 389 #define _GUARD_IS_FALSE_POP 389
#define _GUARD_IS_NOT_NONE_POP 390 #define _GUARD_IS_NONE_POP 390
#define _JUMP_TO_TOP 391 #define _GUARD_IS_NOT_NONE_POP 391
#define _SAVE_RETURN_OFFSET 392 #define _JUMP_TO_TOP 392
#define _INSERT 393 #define _SAVE_RETURN_OFFSET 393
#define _CHECK_VALIDITY 394 #define _INSERT 394
#define _CHECK_VALIDITY 395
extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump);
#ifdef NEED_OPCODE_METADATA #ifdef NEED_OPCODE_METADATA
@ -543,6 +544,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 1; return 1;
case _FOR_ITER: case _FOR_ITER:
return 1; return 1;
case _FOR_ITER_TIER_TWO:
return 1;
case FOR_ITER: case FOR_ITER:
return 1; return 1;
case INSTRUMENTED_FOR_ITER: case INSTRUMENTED_FOR_ITER:
@ -1181,6 +1184,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 1; return 1;
case _FOR_ITER: case _FOR_ITER:
return 2; return 2;
case _FOR_ITER_TIER_TWO:
return 2;
case FOR_ITER: case FOR_ITER:
return 2; return 2;
case INSTRUMENTED_FOR_ITER: case INSTRUMENTED_FOR_ITER:
@ -1676,6 +1681,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = {
[GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[_SPECIALIZE_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_SPECIALIZE_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG },
[_FOR_ITER] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_FOR_ITER] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[_FOR_ITER_TIER_TWO] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[_ITER_CHECK_LIST] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, [_ITER_CHECK_LIST] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG },
@ -1906,6 +1912,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN
[MATCH_KEYS] = { .nuops = 1, .uops = { { MATCH_KEYS, 0, 0 } } }, [MATCH_KEYS] = { .nuops = 1, .uops = { { MATCH_KEYS, 0, 0 } } },
[GET_ITER] = { .nuops = 1, .uops = { { GET_ITER, 0, 0 } } }, [GET_ITER] = { .nuops = 1, .uops = { { GET_ITER, 0, 0 } } },
[GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } }, [GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } },
[FOR_ITER] = { .nuops = 1, .uops = { { _FOR_ITER, 0, 0 } } },
[FOR_ITER_LIST] = { .nuops = 3, .uops = { { _ITER_CHECK_LIST, 0, 0 }, { _ITER_JUMP_LIST, 0, 0 }, { _ITER_NEXT_LIST, 0, 0 } } }, [FOR_ITER_LIST] = { .nuops = 3, .uops = { { _ITER_CHECK_LIST, 0, 0 }, { _ITER_JUMP_LIST, 0, 0 }, { _ITER_NEXT_LIST, 0, 0 } } },
[FOR_ITER_TUPLE] = { .nuops = 3, .uops = { { _ITER_CHECK_TUPLE, 0, 0 }, { _ITER_JUMP_TUPLE, 0, 0 }, { _ITER_NEXT_TUPLE, 0, 0 } } }, [FOR_ITER_TUPLE] = { .nuops = 3, .uops = { { _ITER_CHECK_TUPLE, 0, 0 }, { _ITER_JUMP_TUPLE, 0, 0 }, { _ITER_NEXT_TUPLE, 0, 0 } } },
[FOR_ITER_RANGE] = { .nuops = 3, .uops = { { _ITER_CHECK_RANGE, 0, 0 }, { _ITER_JUMP_RANGE, 0, 0 }, { _ITER_NEXT_RANGE, 0, 0 } } }, [FOR_ITER_RANGE] = { .nuops = 3, .uops = { { _ITER_CHECK_RANGE, 0, 0 }, { _ITER_JUMP_RANGE, 0, 0 }, { _ITER_NEXT_RANGE, 0, 0 } } },
@ -2005,6 +2012,7 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = {
[_IS_NONE] = "_IS_NONE", [_IS_NONE] = "_IS_NONE",
[_SPECIALIZE_FOR_ITER] = "_SPECIALIZE_FOR_ITER", [_SPECIALIZE_FOR_ITER] = "_SPECIALIZE_FOR_ITER",
[_FOR_ITER] = "_FOR_ITER", [_FOR_ITER] = "_FOR_ITER",
[_FOR_ITER_TIER_TWO] = "_FOR_ITER_TIER_TWO",
[_ITER_CHECK_LIST] = "_ITER_CHECK_LIST", [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST",
[_ITER_JUMP_LIST] = "_ITER_JUMP_LIST", [_ITER_JUMP_LIST] = "_ITER_JUMP_LIST",
[_GUARD_NOT_EXHAUSTED_LIST] = "_GUARD_NOT_EXHAUSTED_LIST", [_GUARD_NOT_EXHAUSTED_LIST] = "_GUARD_NOT_EXHAUSTED_LIST",

View file

@ -2808,6 +2808,36 @@ class TestUops(unittest.TestCase):
uops = {opname for opname, _, _ in ex} uops = {opname for opname, _, _ in ex}
self.assertIn("_GUARD_IS_FALSE_POP", uops) self.assertIn("_GUARD_IS_FALSE_POP", uops)
def test_for_iter_tier_two(self):
class MyIter:
def __init__(self, n):
self.n = n
def __iter__(self):
return self
def __next__(self):
self.n -= 1
if self.n < 0:
raise StopIteration
return self.n
def testfunc(n, m):
x = 0
for i in range(m):
for j in MyIter(n):
x += 1000*i + j
return x
opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
x = testfunc(10, 10)
self.assertEqual(x, sum(range(10)) * 10010)
ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertIn("_FOR_ITER_TIER_TWO", uops)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View file

@ -0,0 +1 @@
Enable translating unspecialized ``FOR_ITER`` to Tier 2.

View file

@ -624,6 +624,12 @@
break; break;
} }
case _FOR_ITER_TIER_TWO: {
STACK_GROW(1);
PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true);
break;
}
case _ITER_CHECK_LIST: { case _ITER_CHECK_LIST: {
break; break;
} }

View file

@ -2513,7 +2513,7 @@ dummy_func(
#endif /* ENABLE_SPECIALIZATION */ #endif /* ENABLE_SPECIALIZATION */
} }
op(_FOR_ITER, (iter -- iter, next)) { replaced op(_FOR_ITER, (iter -- iter, next)) {
/* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */
next = (*Py_TYPE(iter)->tp_iternext)(iter); next = (*Py_TYPE(iter)->tp_iternext)(iter);
if (next == NULL) { if (next == NULL) {
@ -2536,6 +2536,25 @@ dummy_func(
// Common case: no jump, leave it to the code generator // Common case: no jump, leave it to the code generator
} }
op(_FOR_ITER_TIER_TWO, (iter -- iter, next)) {
/* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */
next = (*Py_TYPE(iter)->tp_iternext)(iter);
if (next == NULL) {
if (_PyErr_Occurred(tstate)) {
if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) {
GOTO_ERROR(error);
}
_PyErr_Clear(tstate);
}
/* iterator ended normally */
Py_DECREF(iter);
STACK_SHRINK(1);
/* The translator sets the deopt target just past END_FOR */
DEOPT_IF(true);
}
// Common case: no jump, leave it to the code generator
}
macro(FOR_ITER) = _SPECIALIZE_FOR_ITER + _FOR_ITER; macro(FOR_ITER) = _SPECIALIZE_FOR_ITER + _FOR_ITER;
inst(INSTRUMENTED_FOR_ITER, (unused/1 -- )) { inst(INSTRUMENTED_FOR_ITER, (unused/1 -- )) {

View file

@ -1074,7 +1074,7 @@ deoptimize:
UOP_STAT_INC(opcode, miss); UOP_STAT_INC(opcode, miss);
frame->return_offset = 0; // Dispatch to frame->instr_ptr frame->return_offset = 0; // Dispatch to frame->instr_ptr
_PyFrame_SetStackPointer(frame, stack_pointer); _PyFrame_SetStackPointer(frame, stack_pointer);
frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable); frame->instr_ptr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame));
Py_DECREF(current_executor); Py_DECREF(current_executor);
// Fall through // Fall through
// Jump here from ENTER_EXECUTOR // Jump here from ENTER_EXECUTOR
@ -1085,7 +1085,7 @@ enter_tier_one:
// Jump here from _EXIT_TRACE // Jump here from _EXIT_TRACE
exit_trace: exit_trace:
_PyFrame_SetStackPointer(frame, stack_pointer); _PyFrame_SetStackPointer(frame, stack_pointer);
frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable); frame->instr_ptr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame));
Py_DECREF(current_executor); Py_DECREF(current_executor);
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
goto enter_tier_one; goto enter_tier_one;

View file

@ -2101,6 +2101,31 @@
break; break;
} }
case _FOR_ITER_TIER_TWO: {
PyObject *iter;
PyObject *next;
iter = stack_pointer[-1];
/* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */
next = (*Py_TYPE(iter)->tp_iternext)(iter);
if (next == NULL) {
if (_PyErr_Occurred(tstate)) {
if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) {
GOTO_ERROR(error);
}
_PyErr_Clear(tstate);
}
/* iterator ended normally */
Py_DECREF(iter);
STACK_SHRINK(1);
/* The translator sets the deopt target just past END_FOR */
DEOPT_IF(true, _FOR_ITER_TIER_TWO);
}
// Common case: no jump, leave it to the code generator
STACK_GROW(1);
stack_pointer[-1] = next;
break;
}
case _ITER_CHECK_LIST: { case _ITER_CHECK_LIST: {
PyObject *iter; PyObject *iter;
iter = stack_pointer[-1]; iter = stack_pointer[-1];

View file

@ -392,6 +392,7 @@ _PyUop_Replacements[OPCODE_METADATA_SIZE] = {
[_ITER_JUMP_RANGE] = _GUARD_NOT_EXHAUSTED_RANGE, [_ITER_JUMP_RANGE] = _GUARD_NOT_EXHAUSTED_RANGE,
[_ITER_JUMP_LIST] = _GUARD_NOT_EXHAUSTED_LIST, [_ITER_JUMP_LIST] = _GUARD_NOT_EXHAUSTED_LIST,
[_ITER_JUMP_TUPLE] = _GUARD_NOT_EXHAUSTED_TUPLE, [_ITER_JUMP_TUPLE] = _GUARD_NOT_EXHAUSTED_TUPLE,
[_FOR_ITER] = _FOR_ITER_TIER_TWO,
}; };
static const uint16_t static const uint16_t
@ -620,6 +621,11 @@ top: // Jump here after _PUSH_FRAME or likely branches
} }
if (_PyUop_Replacements[uop]) { if (_PyUop_Replacements[uop]) {
uop = _PyUop_Replacements[uop]; uop = _PyUop_Replacements[uop];
if (uop == _FOR_ITER_TIER_TWO) {
target += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1;
assert(_PyCode_CODE(code)[target-1].op.code == END_FOR ||
_PyCode_CODE(code)[target-1].op.code == INSTRUMENTED_END_FOR);
}
} }
break; break;
case OPARG_CACHE_1: case OPARG_CACHE_1: