gh-130704: Strength reduce LOAD_FAST{_LOAD_FAST} (#130708)

Optimize `LOAD_FAST` opcodes into faster versions that load borrowed references onto the operand stack when we can prove that the lifetime of the local outlives the lifetime of the temporary that is loaded onto the stack.
This commit is contained in:
mpage 2025-04-01 10:18:42 -07:00 committed by GitHub
parent e9556e1004
commit 053c285f6b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 1282 additions and 345 deletions

View file

@ -340,9 +340,8 @@
// _BINARY_OP_INPLACE_ADD_UNICODE
{
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectSteal(right);
assert(PyUnicode_CheckExact(left_o));
assert(PyUnicode_CheckExact(right_o));
assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(right)));
int next_oparg;
#if TIER_ONE
assert(next_instr->op.code == STORE_FAST);
@ -369,9 +368,10 @@
* only the locals reference, so PyUnicode_Append knows
* that the string is safe to mutate.
*/
assert(Py_REFCNT(left_o) >= 2);
assert(Py_REFCNT(left_o) >= 2 || !PyStackRef_IsHeapSafe(left));
PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc);
PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local);
PyObject *right_o = PyStackRef_AsPyObjectSteal(right);
stack_pointer += -2;
assert(WITHIN_STACK_BOUNDS());
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -7291,8 +7291,7 @@
{
retval = val;
assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
_PyStackRef temp = retval;
assert(PyStackRef_IsHeapSafe(temp));
_PyStackRef temp = PyStackRef_MakeHeapSafe(retval);
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -7376,7 +7375,7 @@
#endif
stack_pointer = _PyFrame_GetStackPointer(frame);
LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND);
value = temp;
value = PyStackRef_MakeHeapSafe(temp);
LLTRACE_RESUME_FRAME();
}
stack_pointer[0] = value;
@ -8873,6 +8872,44 @@
DISPATCH();
}
TARGET(LOAD_FAST_BORROW) {
#if Py_TAIL_CALL_INTERP
int opcode = LOAD_FAST_BORROW;
(void)(opcode);
#endif
frame->instr_ptr = next_instr;
next_instr += 1;
INSTRUCTION_STATS(LOAD_FAST_BORROW);
_PyStackRef value;
assert(!PyStackRef_IsNull(GETLOCAL(oparg)));
value = PyStackRef_Borrow(GETLOCAL(oparg));
stack_pointer[0] = value;
stack_pointer += 1;
assert(WITHIN_STACK_BOUNDS());
DISPATCH();
}
TARGET(LOAD_FAST_BORROW_LOAD_FAST_BORROW) {
#if Py_TAIL_CALL_INTERP
int opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
(void)(opcode);
#endif
frame->instr_ptr = next_instr;
next_instr += 1;
INSTRUCTION_STATS(LOAD_FAST_BORROW_LOAD_FAST_BORROW);
_PyStackRef value1;
_PyStackRef value2;
uint32_t oparg1 = oparg >> 4;
uint32_t oparg2 = oparg & 15;
value1 = PyStackRef_Borrow(GETLOCAL(oparg1));
value2 = PyStackRef_Borrow(GETLOCAL(oparg2));
stack_pointer[0] = value1;
stack_pointer[1] = value2;
stack_pointer += 2;
assert(WITHIN_STACK_BOUNDS());
DISPATCH();
}
TARGET(LOAD_FAST_CHECK) {
#if Py_TAIL_CALL_INTERP
int opcode = LOAD_FAST_CHECK;
@ -10346,8 +10383,7 @@
_PyStackRef res;
retval = stack_pointer[-1];
assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
_PyStackRef temp = retval;
assert(PyStackRef_IsHeapSafe(temp));
_PyStackRef temp = PyStackRef_MakeHeapSafe(retval);
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -10411,7 +10447,7 @@
PyGenObject *gen = (PyGenObject *)receiver_o;
_PyInterpreterFrame *gen_frame = &gen->gi_iframe;
STACK_SHRINK(1);
_PyFrame_StackPush(gen_frame, v);
_PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v));
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
@ -10512,7 +10548,7 @@
}
STAT_INC(SEND, hit);
gen_frame = &gen->gi_iframe;
_PyFrame_StackPush(gen_frame, v);
_PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v));
gen->gi_frame_state = FRAME_EXECUTING;
gen->gi_exc_state.previous_item = tstate->exc_info;
tstate->exc_info = &gen->gi_exc_state;
@ -12033,7 +12069,7 @@
#endif
stack_pointer = _PyFrame_GetStackPointer(frame);
LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND);
value = temp;
value = PyStackRef_MakeHeapSafe(temp);
LLTRACE_RESUME_FRAME();
stack_pointer[0] = value;
stack_pointer += 1;