GH-118095: Add dynamic exit support and FOR_ITER_GEN support to tier 2 (GH-118279)

This commit is contained in:
Mark Shannon 2024-04-26 18:08:50 +01:00 committed by GitHub
parent 63add11704
commit 3e06c7f719
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 315 additions and 139 deletions

View file

@ -1109,6 +1109,10 @@ dummy_func(
_PyFrame_StackPush(frame, retval);
/* We don't know which of these is relevant here, so keep them equal */
assert(INLINE_CACHE_ENTRIES_SEND == INLINE_CACHE_ENTRIES_FOR_ITER);
assert(_PyOpcode_Deopt[frame->instr_ptr->op.code] == SEND ||
_PyOpcode_Deopt[frame->instr_ptr->op.code] == FOR_ITER ||
_PyOpcode_Deopt[frame->instr_ptr->op.code] == INTERPRETER_EXIT ||
_PyOpcode_Deopt[frame->instr_ptr->op.code] == ENTER_EXECUTOR);
LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND);
goto resume_frame;
}
@ -2759,24 +2763,26 @@ dummy_func(
_ITER_JUMP_RANGE +
_ITER_NEXT_RANGE;
inst(FOR_ITER_GEN, (unused/1, iter -- iter, unused)) {
DEOPT_IF(tstate->interp->eval_frame);
op(_FOR_ITER_GEN_FRAME, (iter -- iter, gen_frame: _PyInterpreterFrame*)) {
PyGenObject *gen = (PyGenObject *)iter;
DEOPT_IF(Py_TYPE(gen) != &PyGen_Type);
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING);
STAT_INC(FOR_ITER, hit);
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
_PyFrame_StackPush(gen_frame, Py_None);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
assert(next_instr[oparg].op.code == END_FOR ||
next_instr[oparg].op.code == INSTRUMENTED_END_FOR);
assert(next_instr - this_instr + oparg <= UINT16_MAX);
frame->return_offset = (uint16_t)(next_instr - this_instr + oparg);
DISPATCH_INLINED(gen_frame);
// oparg is the return offset from the next instruction.
frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg);
}
macro(FOR_ITER_GEN) =
unused/1 +
_CHECK_PEP_523 +
_FOR_ITER_GEN_FRAME +
_PUSH_FRAME;
inst(BEFORE_ASYNC_WITH, (mgr -- exit, res)) {
PyObject *enter = _PyObject_LookupSpecial(mgr, &_Py_ID(__aenter__));
if (enter == NULL) {
@ -3166,10 +3172,7 @@ dummy_func(
}
}
// The 'unused' output effect represents the return value
// (which will be pushed when the frame returns).
// It is needed so CALL_PY_EXACT_ARGS matches its family.
op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused if (0))) {
op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- )) {
// Write it out explicitly because it's subtly different.
// Eventually this should be the only occurrence of this code.
assert(tstate->interp->eval_frame == NULL);
@ -4189,6 +4192,38 @@ dummy_func(
GOTO_TIER_TWO(executor);
}
tier2 op(_DYNAMIC_EXIT, (--)) {
tstate->previous_executor = (PyObject *)current_executor;
_PyExitData *exit = (_PyExitData *)&current_executor->exits[oparg];
_Py_CODEUNIT *target = frame->instr_ptr;
_PyExecutorObject *executor;
if (target->op.code == ENTER_EXECUTOR) {
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
executor = code->co_executors->executors[target->op.arg];
Py_INCREF(executor);
}
else {
if (!backoff_counter_triggers(exit->temperature)) {
exit->temperature = advance_backoff_counter(exit->temperature);
GOTO_TIER_ONE(target);
}
int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
if (optimized <= 0) {
exit->temperature = restart_backoff_counter(exit->temperature);
if (optimized < 0) {
Py_DECREF(current_executor);
tstate->previous_executor = Py_None;
GOTO_UNWIND();
}
GOTO_TIER_ONE(target);
}
else {
exit->temperature = initial_temperature_backoff_counter();
}
}
GOTO_TIER_TWO(executor);
}
tier2 op(_START_EXECUTOR, (executor/4 --)) {
Py_DECREF(tstate->previous_executor);
tstate->previous_executor = NULL;
@ -4222,6 +4257,7 @@ dummy_func(
GOTO_UNWIND();
}
// END BYTECODES //
}

View file

@ -1072,9 +1072,13 @@ jump_to_jump_target:
next_uop = current_executor->trace + target;
goto tier2_dispatch;
exit_to_tier1_dynamic:
next_instr = frame->instr_ptr;
goto goto_to_tier1;
exit_to_tier1:
assert(next_uop[-1].format == UOP_FORMAT_TARGET);
next_instr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame));
goto_to_tier1:
#ifdef Py_DEBUG
if (lltrace >= 2) {
printf("DEOPT: [UOp ");

View file

@ -442,3 +442,4 @@ do { \
#define GOTO_UNWIND() goto error_tier_two
#define EXIT_TO_TRACE() goto exit_to_trace
#define EXIT_TO_TIER1() goto exit_to_tier1
#define EXIT_TO_TIER1_DYNAMIC() goto exit_to_tier1_dynamic;

View file

@ -2769,7 +2769,32 @@
break;
}
/* _FOR_ITER_GEN is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */
case _FOR_ITER_GEN_FRAME: {
PyObject *iter;
_PyInterpreterFrame *gen_frame;
oparg = CURRENT_OPARG();
iter = stack_pointer[-1];
PyGenObject *gen = (PyGenObject *)iter;
if (Py_TYPE(gen) != &PyGen_Type) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
if (gen->gi_frame_state >= FRAME_EXECUTING) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
STAT_INC(FOR_ITER, hit);
gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
_PyFrame_StackPush(gen_frame, Py_None);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
// oparg is the return offset from the next instruction.
frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg);
stack_pointer[0] = (PyObject *)gen_frame;
stack_pointer += 1;
break;
}
/* _BEFORE_ASYNC_WITH is not a viable micro-op for tier 2 because it has both popping and not-popping errors */
@ -4187,6 +4212,40 @@
break;
}
case _DYNAMIC_EXIT: {
oparg = CURRENT_OPARG();
tstate->previous_executor = (PyObject *)current_executor;
_PyExitData *exit = (_PyExitData *)&current_executor->exits[oparg];
_Py_CODEUNIT *target = frame->instr_ptr;
_PyExecutorObject *executor;
if (target->op.code == ENTER_EXECUTOR) {
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
executor = code->co_executors->executors[target->op.arg];
Py_INCREF(executor);
}
else {
if (!backoff_counter_triggers(exit->temperature)) {
exit->temperature = advance_backoff_counter(exit->temperature);
GOTO_TIER_ONE(target);
}
int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
if (optimized <= 0) {
exit->temperature = restart_backoff_counter(exit->temperature);
if (optimized < 0) {
Py_DECREF(current_executor);
tstate->previous_executor = Py_None;
GOTO_UNWIND();
}
GOTO_TIER_ONE(target);
}
else {
exit->temperature = initial_temperature_backoff_counter();
}
}
GOTO_TIER_TWO(executor);
break;
}
case _START_EXECUTOR: {
PyObject *executor = (PyObject *)CURRENT_OPERAND();
Py_DECREF(tstate->previous_executor);

View file

@ -2631,28 +2631,49 @@
}
TARGET(FOR_ITER_GEN) {
_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(FOR_ITER_GEN);
static_assert(INLINE_CACHE_ENTRIES_FOR_ITER == 1, "incorrect cache size");
PyObject *iter;
_PyInterpreterFrame *gen_frame;
_PyInterpreterFrame *new_frame;
/* Skip 1 cache entry */
// _CHECK_PEP_523
{
DEOPT_IF(tstate->interp->eval_frame, FOR_ITER);
}
// _FOR_ITER_GEN_FRAME
iter = stack_pointer[-1];
DEOPT_IF(tstate->interp->eval_frame, FOR_ITER);
PyGenObject *gen = (PyGenObject *)iter;
DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER);
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER);
STAT_INC(FOR_ITER, hit);
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
_PyFrame_StackPush(gen_frame, Py_None);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
assert(next_instr[oparg].op.code == END_FOR ||
next_instr[oparg].op.code == INSTRUMENTED_END_FOR);
assert(next_instr - this_instr + oparg <= UINT16_MAX);
frame->return_offset = (uint16_t)(next_instr - this_instr + oparg);
DISPATCH_INLINED(gen_frame);
{
PyGenObject *gen = (PyGenObject *)iter;
DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER);
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER);
STAT_INC(FOR_ITER, hit);
gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
_PyFrame_StackPush(gen_frame, Py_None);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
// oparg is the return offset from the next instruction.
frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg);
}
// _PUSH_FRAME
new_frame = gen_frame;
{
// Write it out explicitly because it's subtly different.
// Eventually this should be the only occurrence of this code.
assert(tstate->interp->eval_frame == NULL);
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
CALL_STAT_INC(inlined_py_calls);
frame = tstate->current_frame = new_frame;
tstate->py_recursion_remaining--;
LOAD_SP();
LOAD_IP(0);
LLTRACE_RESUME_FRAME();
}
DISPATCH();
}
TARGET(FOR_ITER_LIST) {
@ -6011,6 +6032,10 @@
_PyFrame_StackPush(frame, retval);
/* We don't know which of these is relevant here, so keep them equal */
assert(INLINE_CACHE_ENTRIES_SEND == INLINE_CACHE_ENTRIES_FOR_ITER);
assert(_PyOpcode_Deopt[frame->instr_ptr->op.code] == SEND ||
_PyOpcode_Deopt[frame->instr_ptr->op.code] == FOR_ITER ||
_PyOpcode_Deopt[frame->instr_ptr->op.code] == INTERPRETER_EXIT ||
_PyOpcode_Deopt[frame->instr_ptr->op.code] == ENTER_EXECUTOR);
LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND);
goto resume_frame;
}

View file

@ -567,8 +567,6 @@ translate_bytecode_to_trace(
top: // Jump here after _PUSH_FRAME or likely branches
for (;;) {
target = INSTR_IP(instr, code);
RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP");
ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target);
// Need space for _DEOPT
max_length--;
@ -597,6 +595,8 @@ top: // Jump here after _PUSH_FRAME or likely branches
}
}
assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP");
ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target);
/* Special case the first instruction,
* so that we can guarantee forward progress */
@ -814,6 +814,12 @@ top: // Jump here after _PUSH_FRAME or likely branches
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
goto done;
}
if (opcode == FOR_ITER_GEN) {
DPRINTF(2, "Bailing due to dynamic target\n");
ADD_TO_TRACE(uop, oparg, 0, target);
ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0);
goto done;
}
// Increment IP to the return address
instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1;
TRACE_STACK_PUSH();
@ -847,7 +853,7 @@ top: // Jump here after _PUSH_FRAME or likely branches
}
DPRINTF(2, "Bail, new_code == NULL\n");
ADD_TO_TRACE(uop, oparg, 0, target);
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0);
goto done;
}
@ -917,7 +923,7 @@ count_exits(_PyUOpInstruction *buffer, int length)
int exit_count = 0;
for (int i = 0; i < length; i++) {
int opcode = buffer[i].opcode;
if (opcode == _SIDE_EXIT) {
if (opcode == _SIDE_EXIT || opcode == _DYNAMIC_EXIT) {
exit_count++;
}
}
@ -1114,6 +1120,11 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil
dest->format = UOP_FORMAT_EXIT;
next_exit--;
}
if (opcode == _DYNAMIC_EXIT) {
executor->exits[next_exit].target = 0;
dest->oparg = next_exit;
next_exit--;
}
}
assert(next_exit == -1);
assert(dest == executor->trace);

View file

@ -1439,7 +1439,14 @@
break;
}
/* _FOR_ITER_GEN is not a viable micro-op for tier 2 */
case _FOR_ITER_GEN_FRAME: {
_PyInterpreterFrame *gen_frame;
gen_frame = sym_new_not_null(ctx);
if (gen_frame == NULL) goto out_of_space;
stack_pointer[0] = (_Py_UopsSymbol *)gen_frame;
stack_pointer += 1;
break;
}
/* _BEFORE_ASYNC_WITH is not a viable micro-op for tier 2 */
@ -2109,6 +2116,10 @@
break;
}
case _DYNAMIC_EXIT: {
break;
}
case _START_EXECUTOR: {
break;
}