gh-106581: Split CALL_PY_EXACT_ARGS into uops (#107760)

* Split `CALL_PY_EXACT_ARGS` into uops

This is only the first step for doing `CALL` in Tier 2.
The next step involves tracing into the called code object and back.
After that we'll have to do the remaining `CALL` specialization.
Finally we'll have to deal with `KW_NAMES`.

Note: this moves setting `frame->return_offset` directly in front of
`DISPATCH_INLINED()`, to make it easier to move it into `_PUSH_FRAME`.
This commit is contained in:
Guido van Rossum 2023-08-16 16:26:43 -07:00 committed by GitHub
parent 665a4391e1
commit dc8fdf5fd5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 412 additions and 116 deletions

View file

@ -956,13 +956,13 @@ dummy_func(
{
PyGenObject *gen = (PyGenObject *)receiver;
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
frame->return_offset = oparg;
STACK_SHRINK(1);
_PyFrame_StackPush(gen_frame, v);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
SKIP_OVER(INLINE_CACHE_ENTRIES_SEND);
frame->return_offset = oparg;
DISPATCH_INLINED(gen_frame);
}
if (Py_IsNone(v) && PyIter_Check(receiver)) {
@ -995,13 +995,13 @@ dummy_func(
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, SEND);
STAT_INC(SEND, hit);
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
frame->return_offset = oparg;
STACK_SHRINK(1);
_PyFrame_StackPush(gen_frame, v);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
SKIP_OVER(INLINE_CACHE_ENTRIES_SEND);
frame->return_offset = oparg;
DISPATCH_INLINED(gen_frame);
}
@ -2587,7 +2587,6 @@ dummy_func(
DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER);
STAT_INC(FOR_ITER, hit);
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
frame->return_offset = oparg;
_PyFrame_StackPush(gen_frame, Py_None);
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
@ -2595,6 +2594,7 @@ dummy_func(
SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER);
assert(next_instr[oparg].op.code == END_FOR ||
next_instr[oparg].op.code == INSTRUMENTED_END_FOR);
frame->return_offset = oparg;
DISPATCH_INLINED(gen_frame);
}
@ -2949,32 +2949,72 @@ dummy_func(
GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS);
}
inst(CALL_PY_EXACT_ARGS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) {
ASSERT_KWNAMES_IS_NULL();
op(_CHECK_PEP_523, (--)) {
DEOPT_IF(tstate->interp->eval_frame, CALL);
}
op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
ASSERT_KWNAMES_IS_NULL();
DEOPT_IF(!PyFunction_Check(callable), CALL);
PyFunctionObject *func = (PyFunctionObject *)callable;
DEOPT_IF(func->func_version != func_version, CALL);
PyCodeObject *code = (PyCodeObject *)func->func_code;
DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL);
}
op(_CHECK_STACK_SPACE, (callable, unused, unused[oparg] -- callable, unused, unused[oparg])) {
PyFunctionObject *func = (PyFunctionObject *)callable;
PyCodeObject *code = (PyCodeObject *)func->func_code;
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL);
}
op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) {
int argcount = oparg;
if (self_or_null != NULL) {
args--;
argcount++;
}
DEOPT_IF(!PyFunction_Check(callable), CALL);
PyFunctionObject *func = (PyFunctionObject *)callable;
DEOPT_IF(func->func_version != func_version, CALL);
PyCodeObject *code = (PyCodeObject *)func->func_code;
DEOPT_IF(code->co_argcount != argcount, CALL);
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL);
STAT_INC(CALL, hit);
_PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
PyFunctionObject *func = (PyFunctionObject *)callable;
new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
for (int i = 0; i < argcount; i++) {
new_frame->localsplus[i] = args[i];
}
// Manipulate stack directly since we leave using DISPATCH_INLINED().
STACK_SHRINK(oparg + 2);
SKIP_OVER(INLINE_CACHE_ENTRIES_CALL);
frame->return_offset = 0;
DISPATCH_INLINED(new_frame);
}
// The 'unused' output effect represents the return value
// (which will be pushed when the frame returns).
// It is needed so CALL_PY_EXACT_ARGS matches its family.
op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused)) {
// Write it out explicitly because it's subtly different.
// Eventually this should be the only occurrence of this code.
frame->return_offset = 0;
assert(tstate->interp->eval_frame == NULL);
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
CALL_STAT_INC(inlined_py_calls);
#if TIER_ONE
frame = cframe.current_frame = new_frame;
goto start_frame;
#endif
#if TIER_TWO
frame = tstate->cframe->current_frame = new_frame;
ERROR_IF(_Py_EnterRecursivePy(tstate), exit_unwind);
stack_pointer = _PyFrame_GetStackPointer(frame);
ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
#endif
}
macro(CALL_PY_EXACT_ARGS) =
unused/1 + // Skip over the counter
_CHECK_PEP_523 +
_CHECK_FUNCTION_EXACT_ARGS +
_CHECK_STACK_SPACE +
_INIT_CALL_PY_EXACT_ARGS +
SAVE_IP + // Tier 2 only; special-cased oparg
SAVE_CURRENT_IP + // Sets frame->prev_instr
_PUSH_FRAME;
inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) {
ASSERT_KWNAMES_IS_NULL();
DEOPT_IF(tstate->interp->eval_frame, CALL);
@ -3735,6 +3775,16 @@ dummy_func(
frame->prev_instr = ip_offset + oparg;
}
op(SAVE_CURRENT_IP, (--)) {
#if TIER_ONE
frame->prev_instr = next_instr - 1;
#endif
#if TIER_TWO
// Relies on a preceding SAVE_IP
frame->prev_instr--;
#endif
}
op(EXIT_TRACE, (--)) {
frame->prev_instr--; // Back up to just before destination
_PyFrame_SetStackPointer(frame, stack_pointer);