mirror of
https://github.com/python/cpython.git
synced 2025-08-30 21:48:47 +00:00
GH-118095: Add dynamic exit support and FOR_ITER_GEN support to tier 2 (GH-118279)
This commit is contained in:
parent
63add11704
commit
3e06c7f719
12 changed files with 315 additions and 139 deletions
|
@ -1109,6 +1109,10 @@ dummy_func(
|
|||
_PyFrame_StackPush(frame, retval);
|
||||
/* We don't know which of these is relevant here, so keep them equal */
|
||||
assert(INLINE_CACHE_ENTRIES_SEND == INLINE_CACHE_ENTRIES_FOR_ITER);
|
||||
assert(_PyOpcode_Deopt[frame->instr_ptr->op.code] == SEND ||
|
||||
_PyOpcode_Deopt[frame->instr_ptr->op.code] == FOR_ITER ||
|
||||
_PyOpcode_Deopt[frame->instr_ptr->op.code] == INTERPRETER_EXIT ||
|
||||
_PyOpcode_Deopt[frame->instr_ptr->op.code] == ENTER_EXECUTOR);
|
||||
LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND);
|
||||
goto resume_frame;
|
||||
}
|
||||
|
@ -2759,24 +2763,26 @@ dummy_func(
|
|||
_ITER_JUMP_RANGE +
|
||||
_ITER_NEXT_RANGE;
|
||||
|
||||
inst(FOR_ITER_GEN, (unused/1, iter -- iter, unused)) {
|
||||
DEOPT_IF(tstate->interp->eval_frame);
|
||||
op(_FOR_ITER_GEN_FRAME, (iter -- iter, gen_frame: _PyInterpreterFrame*)) {
|
||||
PyGenObject *gen = (PyGenObject *)iter;
|
||||
DEOPT_IF(Py_TYPE(gen) != &PyGen_Type);
|
||||
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING);
|
||||
STAT_INC(FOR_ITER, hit);
|
||||
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
|
||||
gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
|
||||
_PyFrame_StackPush(gen_frame, Py_None);
|
||||
gen->gi_frame_state = FRAME_EXECUTING;
|
||||
gen->gi_exc_state.previous_item = tstate->exc_info;
|
||||
tstate->exc_info = &gen->gi_exc_state;
|
||||
assert(next_instr[oparg].op.code == END_FOR ||
|
||||
next_instr[oparg].op.code == INSTRUMENTED_END_FOR);
|
||||
assert(next_instr - this_instr + oparg <= UINT16_MAX);
|
||||
frame->return_offset = (uint16_t)(next_instr - this_instr + oparg);
|
||||
DISPATCH_INLINED(gen_frame);
|
||||
// oparg is the return offset from the next instruction.
|
||||
frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg);
|
||||
}
|
||||
|
||||
macro(FOR_ITER_GEN) =
|
||||
unused/1 +
|
||||
_CHECK_PEP_523 +
|
||||
_FOR_ITER_GEN_FRAME +
|
||||
_PUSH_FRAME;
|
||||
|
||||
inst(BEFORE_ASYNC_WITH, (mgr -- exit, res)) {
|
||||
PyObject *enter = _PyObject_LookupSpecial(mgr, &_Py_ID(__aenter__));
|
||||
if (enter == NULL) {
|
||||
|
@ -3166,10 +3172,7 @@ dummy_func(
|
|||
}
|
||||
}
|
||||
|
||||
// The 'unused' output effect represents the return value
|
||||
// (which will be pushed when the frame returns).
|
||||
// It is needed so CALL_PY_EXACT_ARGS matches its family.
|
||||
op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused if (0))) {
|
||||
op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- )) {
|
||||
// Write it out explicitly because it's subtly different.
|
||||
// Eventually this should be the only occurrence of this code.
|
||||
assert(tstate->interp->eval_frame == NULL);
|
||||
|
@ -4189,6 +4192,38 @@ dummy_func(
|
|||
GOTO_TIER_TWO(executor);
|
||||
}
|
||||
|
||||
tier2 op(_DYNAMIC_EXIT, (--)) {
|
||||
tstate->previous_executor = (PyObject *)current_executor;
|
||||
_PyExitData *exit = (_PyExitData *)¤t_executor->exits[oparg];
|
||||
_Py_CODEUNIT *target = frame->instr_ptr;
|
||||
_PyExecutorObject *executor;
|
||||
if (target->op.code == ENTER_EXECUTOR) {
|
||||
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
|
||||
executor = code->co_executors->executors[target->op.arg];
|
||||
Py_INCREF(executor);
|
||||
}
|
||||
else {
|
||||
if (!backoff_counter_triggers(exit->temperature)) {
|
||||
exit->temperature = advance_backoff_counter(exit->temperature);
|
||||
GOTO_TIER_ONE(target);
|
||||
}
|
||||
int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
|
||||
if (optimized <= 0) {
|
||||
exit->temperature = restart_backoff_counter(exit->temperature);
|
||||
if (optimized < 0) {
|
||||
Py_DECREF(current_executor);
|
||||
tstate->previous_executor = Py_None;
|
||||
GOTO_UNWIND();
|
||||
}
|
||||
GOTO_TIER_ONE(target);
|
||||
}
|
||||
else {
|
||||
exit->temperature = initial_temperature_backoff_counter();
|
||||
}
|
||||
}
|
||||
GOTO_TIER_TWO(executor);
|
||||
}
|
||||
|
||||
tier2 op(_START_EXECUTOR, (executor/4 --)) {
|
||||
Py_DECREF(tstate->previous_executor);
|
||||
tstate->previous_executor = NULL;
|
||||
|
@ -4222,6 +4257,7 @@ dummy_func(
|
|||
GOTO_UNWIND();
|
||||
}
|
||||
|
||||
|
||||
// END BYTECODES //
|
||||
|
||||
}
|
||||
|
|
|
@ -1072,9 +1072,13 @@ jump_to_jump_target:
|
|||
next_uop = current_executor->trace + target;
|
||||
goto tier2_dispatch;
|
||||
|
||||
exit_to_tier1_dynamic:
|
||||
next_instr = frame->instr_ptr;
|
||||
goto goto_to_tier1;
|
||||
exit_to_tier1:
|
||||
assert(next_uop[-1].format == UOP_FORMAT_TARGET);
|
||||
next_instr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame));
|
||||
goto_to_tier1:
|
||||
#ifdef Py_DEBUG
|
||||
if (lltrace >= 2) {
|
||||
printf("DEOPT: [UOp ");
|
||||
|
|
|
@ -442,3 +442,4 @@ do { \
|
|||
#define GOTO_UNWIND() goto error_tier_two
|
||||
#define EXIT_TO_TRACE() goto exit_to_trace
|
||||
#define EXIT_TO_TIER1() goto exit_to_tier1
|
||||
#define EXIT_TO_TIER1_DYNAMIC() goto exit_to_tier1_dynamic;
|
||||
|
|
61
Python/executor_cases.c.h
generated
61
Python/executor_cases.c.h
generated
|
@ -2769,7 +2769,32 @@
|
|||
break;
|
||||
}
|
||||
|
||||
/* _FOR_ITER_GEN is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */
|
||||
case _FOR_ITER_GEN_FRAME: {
|
||||
PyObject *iter;
|
||||
_PyInterpreterFrame *gen_frame;
|
||||
oparg = CURRENT_OPARG();
|
||||
iter = stack_pointer[-1];
|
||||
PyGenObject *gen = (PyGenObject *)iter;
|
||||
if (Py_TYPE(gen) != &PyGen_Type) {
|
||||
UOP_STAT_INC(uopcode, miss);
|
||||
JUMP_TO_JUMP_TARGET();
|
||||
}
|
||||
if (gen->gi_frame_state >= FRAME_EXECUTING) {
|
||||
UOP_STAT_INC(uopcode, miss);
|
||||
JUMP_TO_JUMP_TARGET();
|
||||
}
|
||||
STAT_INC(FOR_ITER, hit);
|
||||
gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
|
||||
_PyFrame_StackPush(gen_frame, Py_None);
|
||||
gen->gi_frame_state = FRAME_EXECUTING;
|
||||
gen->gi_exc_state.previous_item = tstate->exc_info;
|
||||
tstate->exc_info = &gen->gi_exc_state;
|
||||
// oparg is the return offset from the next instruction.
|
||||
frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg);
|
||||
stack_pointer[0] = (PyObject *)gen_frame;
|
||||
stack_pointer += 1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* _BEFORE_ASYNC_WITH is not a viable micro-op for tier 2 because it has both popping and not-popping errors */
|
||||
|
||||
|
@ -4187,6 +4212,40 @@
|
|||
break;
|
||||
}
|
||||
|
||||
case _DYNAMIC_EXIT: {
|
||||
oparg = CURRENT_OPARG();
|
||||
tstate->previous_executor = (PyObject *)current_executor;
|
||||
_PyExitData *exit = (_PyExitData *)¤t_executor->exits[oparg];
|
||||
_Py_CODEUNIT *target = frame->instr_ptr;
|
||||
_PyExecutorObject *executor;
|
||||
if (target->op.code == ENTER_EXECUTOR) {
|
||||
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
|
||||
executor = code->co_executors->executors[target->op.arg];
|
||||
Py_INCREF(executor);
|
||||
}
|
||||
else {
|
||||
if (!backoff_counter_triggers(exit->temperature)) {
|
||||
exit->temperature = advance_backoff_counter(exit->temperature);
|
||||
GOTO_TIER_ONE(target);
|
||||
}
|
||||
int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
|
||||
if (optimized <= 0) {
|
||||
exit->temperature = restart_backoff_counter(exit->temperature);
|
||||
if (optimized < 0) {
|
||||
Py_DECREF(current_executor);
|
||||
tstate->previous_executor = Py_None;
|
||||
GOTO_UNWIND();
|
||||
}
|
||||
GOTO_TIER_ONE(target);
|
||||
}
|
||||
else {
|
||||
exit->temperature = initial_temperature_backoff_counter();
|
||||
}
|
||||
}
|
||||
GOTO_TIER_TWO(executor);
|
||||
break;
|
||||
}
|
||||
|
||||
case _START_EXECUTOR: {
|
||||
PyObject *executor = (PyObject *)CURRENT_OPERAND();
|
||||
Py_DECREF(tstate->previous_executor);
|
||||
|
|
57
Python/generated_cases.c.h
generated
57
Python/generated_cases.c.h
generated
|
@ -2631,28 +2631,49 @@
|
|||
}
|
||||
|
||||
TARGET(FOR_ITER_GEN) {
|
||||
_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
|
||||
frame->instr_ptr = next_instr;
|
||||
next_instr += 2;
|
||||
INSTRUCTION_STATS(FOR_ITER_GEN);
|
||||
static_assert(INLINE_CACHE_ENTRIES_FOR_ITER == 1, "incorrect cache size");
|
||||
PyObject *iter;
|
||||
_PyInterpreterFrame *gen_frame;
|
||||
_PyInterpreterFrame *new_frame;
|
||||
/* Skip 1 cache entry */
|
||||
// _CHECK_PEP_523
|
||||
{
|
||||
DEOPT_IF(tstate->interp->eval_frame, FOR_ITER);
|
||||
}
|
||||
// _FOR_ITER_GEN_FRAME
|
||||
iter = stack_pointer[-1];
|
||||
DEOPT_IF(tstate->interp->eval_frame, FOR_ITER);
|
||||
PyGenObject *gen = (PyGenObject *)iter;
|
||||
DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER);
|
||||
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER);
|
||||
STAT_INC(FOR_ITER, hit);
|
||||
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
|
||||
_PyFrame_StackPush(gen_frame, Py_None);
|
||||
gen->gi_frame_state = FRAME_EXECUTING;
|
||||
gen->gi_exc_state.previous_item = tstate->exc_info;
|
||||
tstate->exc_info = &gen->gi_exc_state;
|
||||
assert(next_instr[oparg].op.code == END_FOR ||
|
||||
next_instr[oparg].op.code == INSTRUMENTED_END_FOR);
|
||||
assert(next_instr - this_instr + oparg <= UINT16_MAX);
|
||||
frame->return_offset = (uint16_t)(next_instr - this_instr + oparg);
|
||||
DISPATCH_INLINED(gen_frame);
|
||||
{
|
||||
PyGenObject *gen = (PyGenObject *)iter;
|
||||
DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER);
|
||||
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER);
|
||||
STAT_INC(FOR_ITER, hit);
|
||||
gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
|
||||
_PyFrame_StackPush(gen_frame, Py_None);
|
||||
gen->gi_frame_state = FRAME_EXECUTING;
|
||||
gen->gi_exc_state.previous_item = tstate->exc_info;
|
||||
tstate->exc_info = &gen->gi_exc_state;
|
||||
// oparg is the return offset from the next instruction.
|
||||
frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg);
|
||||
}
|
||||
// _PUSH_FRAME
|
||||
new_frame = gen_frame;
|
||||
{
|
||||
// Write it out explicitly because it's subtly different.
|
||||
// Eventually this should be the only occurrence of this code.
|
||||
assert(tstate->interp->eval_frame == NULL);
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
new_frame->previous = frame;
|
||||
CALL_STAT_INC(inlined_py_calls);
|
||||
frame = tstate->current_frame = new_frame;
|
||||
tstate->py_recursion_remaining--;
|
||||
LOAD_SP();
|
||||
LOAD_IP(0);
|
||||
LLTRACE_RESUME_FRAME();
|
||||
}
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
TARGET(FOR_ITER_LIST) {
|
||||
|
@ -6011,6 +6032,10 @@
|
|||
_PyFrame_StackPush(frame, retval);
|
||||
/* We don't know which of these is relevant here, so keep them equal */
|
||||
assert(INLINE_CACHE_ENTRIES_SEND == INLINE_CACHE_ENTRIES_FOR_ITER);
|
||||
assert(_PyOpcode_Deopt[frame->instr_ptr->op.code] == SEND ||
|
||||
_PyOpcode_Deopt[frame->instr_ptr->op.code] == FOR_ITER ||
|
||||
_PyOpcode_Deopt[frame->instr_ptr->op.code] == INTERPRETER_EXIT ||
|
||||
_PyOpcode_Deopt[frame->instr_ptr->op.code] == ENTER_EXECUTOR);
|
||||
LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND);
|
||||
goto resume_frame;
|
||||
}
|
||||
|
|
|
@ -567,8 +567,6 @@ translate_bytecode_to_trace(
|
|||
top: // Jump here after _PUSH_FRAME or likely branches
|
||||
for (;;) {
|
||||
target = INSTR_IP(instr, code);
|
||||
RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP");
|
||||
ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target);
|
||||
// Need space for _DEOPT
|
||||
max_length--;
|
||||
|
||||
|
@ -597,6 +595,8 @@ top: // Jump here after _PUSH_FRAME or likely branches
|
|||
}
|
||||
}
|
||||
assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
|
||||
RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP");
|
||||
ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target);
|
||||
|
||||
/* Special case the first instruction,
|
||||
* so that we can guarantee forward progress */
|
||||
|
@ -814,6 +814,12 @@ top: // Jump here after _PUSH_FRAME or likely branches
|
|||
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
|
||||
goto done;
|
||||
}
|
||||
if (opcode == FOR_ITER_GEN) {
|
||||
DPRINTF(2, "Bailing due to dynamic target\n");
|
||||
ADD_TO_TRACE(uop, oparg, 0, target);
|
||||
ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0);
|
||||
goto done;
|
||||
}
|
||||
// Increment IP to the return address
|
||||
instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1;
|
||||
TRACE_STACK_PUSH();
|
||||
|
@ -847,7 +853,7 @@ top: // Jump here after _PUSH_FRAME or likely branches
|
|||
}
|
||||
DPRINTF(2, "Bail, new_code == NULL\n");
|
||||
ADD_TO_TRACE(uop, oparg, 0, target);
|
||||
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
|
||||
ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0);
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
@ -917,7 +923,7 @@ count_exits(_PyUOpInstruction *buffer, int length)
|
|||
int exit_count = 0;
|
||||
for (int i = 0; i < length; i++) {
|
||||
int opcode = buffer[i].opcode;
|
||||
if (opcode == _SIDE_EXIT) {
|
||||
if (opcode == _SIDE_EXIT || opcode == _DYNAMIC_EXIT) {
|
||||
exit_count++;
|
||||
}
|
||||
}
|
||||
|
@ -1114,6 +1120,11 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil
|
|||
dest->format = UOP_FORMAT_EXIT;
|
||||
next_exit--;
|
||||
}
|
||||
if (opcode == _DYNAMIC_EXIT) {
|
||||
executor->exits[next_exit].target = 0;
|
||||
dest->oparg = next_exit;
|
||||
next_exit--;
|
||||
}
|
||||
}
|
||||
assert(next_exit == -1);
|
||||
assert(dest == executor->trace);
|
||||
|
|
13
Python/optimizer_cases.c.h
generated
13
Python/optimizer_cases.c.h
generated
|
@ -1439,7 +1439,14 @@
|
|||
break;
|
||||
}
|
||||
|
||||
/* _FOR_ITER_GEN is not a viable micro-op for tier 2 */
|
||||
case _FOR_ITER_GEN_FRAME: {
|
||||
_PyInterpreterFrame *gen_frame;
|
||||
gen_frame = sym_new_not_null(ctx);
|
||||
if (gen_frame == NULL) goto out_of_space;
|
||||
stack_pointer[0] = (_Py_UopsSymbol *)gen_frame;
|
||||
stack_pointer += 1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* _BEFORE_ASYNC_WITH is not a viable micro-op for tier 2 */
|
||||
|
||||
|
@ -2109,6 +2116,10 @@
|
|||
break;
|
||||
}
|
||||
|
||||
case _DYNAMIC_EXIT: {
|
||||
break;
|
||||
}
|
||||
|
||||
case _START_EXECUTOR: {
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue