GH-118095: Add tier two support for BINARY_SUBSCR_GETITEM (GH-120793)

This commit is contained in:
Mark Shannon 2024-08-02 00:19:05 +01:00 committed by GitHub
parent fda6bd842a
commit df13a1821a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 317 additions and 207 deletions

View file

@ -765,32 +765,40 @@ dummy_func(
res = PyStackRef_FromPyObjectSteal(res_o);
}
inst(BINARY_SUBSCR_GETITEM, (unused/1, container_st, sub_st -- unused)) {
PyObject *container = PyStackRef_AsPyObjectBorrow(container_st);
DEOPT_IF(tstate->interp->eval_frame);
PyTypeObject *tp = Py_TYPE(container);
op(_BINARY_SUBSCR_CHECK_FUNC, (container, unused -- container, unused)) {
PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE));
PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *cached = ht->_spec_cache.getitem;
DEOPT_IF(cached == NULL);
assert(PyFunction_Check(cached));
PyFunctionObject *getitem = (PyFunctionObject *)cached;
PyObject *getitem = ht->_spec_cache.getitem;
DEOPT_IF(getitem == NULL);
assert(PyFunction_Check(getitem));
uint32_t cached_version = ht->_spec_cache.getitem_version;
DEOPT_IF(getitem->func_version != cached_version);
PyCodeObject *code = (PyCodeObject *)getitem->func_code;
DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version);
PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem);
assert(code->co_argcount == 2);
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize));
STAT_INC(BINARY_SUBSCR, hit);
Py_INCREF(getitem);
_PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2);
STACK_SHRINK(2);
new_frame->localsplus[0] = container_st;
new_frame->localsplus[1] = sub_st;
frame->return_offset = (uint16_t)(next_instr - this_instr);
DISPATCH_INLINED(new_frame);
}
op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _PyInterpreterFrame* )) {
PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *getitem = ht->_spec_cache.getitem;
new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2);
SYNC_SP();
new_frame->localsplus[0] = container;
new_frame->localsplus[1] = sub;
frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
}
macro(BINARY_SUBSCR_GETITEM) =
unused/1 + // Skip over the counter
_CHECK_PEP_523 +
_BINARY_SUBSCR_CHECK_FUNC +
_BINARY_SUBSCR_INIT_CALL +
_PUSH_FRAME;
inst(LIST_APPEND, (list, unused[oparg-1], v -- list, unused[oparg-1])) {
ERROR_IF(_PyList_AppendTakeRef((PyListObject *)PyStackRef_AsPyObjectBorrow(list),
PyStackRef_AsPyObjectSteal(v)) < 0, error);

View file

@ -966,7 +966,57 @@
break;
}
/* _BINARY_SUBSCR_GETITEM is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */
case _BINARY_SUBSCR_CHECK_FUNC: {
_PyStackRef container;
container = stack_pointer[-2];
PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
if (!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *getitem = ht->_spec_cache.getitem;
if (getitem == NULL) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
assert(PyFunction_Check(getitem));
uint32_t cached_version = ht->_spec_cache.getitem_version;
if (((PyFunctionObject *)getitem)->func_version != cached_version) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem);
assert(code->co_argcount == 2);
if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
STAT_INC(BINARY_SUBSCR, hit);
Py_INCREF(getitem);
break;
}
case _BINARY_SUBSCR_INIT_CALL: {
_PyStackRef sub;
_PyStackRef container;
_PyInterpreterFrame *new_frame;
sub = stack_pointer[-1];
container = stack_pointer[-2];
PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *getitem = ht->_spec_cache.getitem;
new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2);
stack_pointer += -2;
assert(WITHIN_STACK_BOUNDS());
new_frame->localsplus[0] = container;
new_frame->localsplus[1] = sub;
frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
stack_pointer[0].bits = (uintptr_t)new_frame;
stack_pointer += 1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _LIST_APPEND: {
_PyStackRef v;

View file

@ -469,37 +469,63 @@
}
TARGET(BINARY_SUBSCR_GETITEM) {
_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(BINARY_SUBSCR_GETITEM);
static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size");
_PyStackRef container_st;
_PyStackRef sub_st;
_PyStackRef container;
_PyStackRef sub;
_PyInterpreterFrame *new_frame;
/* Skip 1 cache entry */
sub_st = stack_pointer[-1];
container_st = stack_pointer[-2];
PyObject *container = PyStackRef_AsPyObjectBorrow(container_st);
DEOPT_IF(tstate->interp->eval_frame, BINARY_SUBSCR);
PyTypeObject *tp = Py_TYPE(container);
DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR);
PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *cached = ht->_spec_cache.getitem;
DEOPT_IF(cached == NULL, BINARY_SUBSCR);
assert(PyFunction_Check(cached));
PyFunctionObject *getitem = (PyFunctionObject *)cached;
uint32_t cached_version = ht->_spec_cache.getitem_version;
DEOPT_IF(getitem->func_version != cached_version, BINARY_SUBSCR);
PyCodeObject *code = (PyCodeObject *)getitem->func_code;
assert(code->co_argcount == 2);
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR);
STAT_INC(BINARY_SUBSCR, hit);
Py_INCREF(getitem);
_PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2);
STACK_SHRINK(2);
new_frame->localsplus[0] = container_st;
new_frame->localsplus[1] = sub_st;
frame->return_offset = (uint16_t)(next_instr - this_instr);
DISPATCH_INLINED(new_frame);
// _CHECK_PEP_523
{
DEOPT_IF(tstate->interp->eval_frame, BINARY_SUBSCR);
}
// _BINARY_SUBSCR_CHECK_FUNC
container = stack_pointer[-2];
{
PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR);
PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *getitem = ht->_spec_cache.getitem;
DEOPT_IF(getitem == NULL, BINARY_SUBSCR);
assert(PyFunction_Check(getitem));
uint32_t cached_version = ht->_spec_cache.getitem_version;
DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version, BINARY_SUBSCR);
PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem);
assert(code->co_argcount == 2);
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR);
STAT_INC(BINARY_SUBSCR, hit);
Py_INCREF(getitem);
}
// _BINARY_SUBSCR_INIT_CALL
sub = stack_pointer[-1];
{
PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *getitem = ht->_spec_cache.getitem;
new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2);
stack_pointer += -2;
assert(WITHIN_STACK_BOUNDS());
new_frame->localsplus[0] = container;
new_frame->localsplus[1] = sub;
frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
}
// _PUSH_FRAME
{
// Write it out explicitly because it's subtly different.
// Eventually this should be the only occurrence of this code.
assert(tstate->interp->eval_frame == NULL);
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
CALL_STAT_INC(inlined_py_calls);
frame = tstate->current_frame = new_frame;
tstate->py_recursion_remaining--;
LOAD_SP();
LOAD_IP(0);
LLTRACE_RESUME_FRAME();
}
DISPATCH();
}
TARGET(BINARY_SUBSCR_LIST_INT) {

View file

@ -795,6 +795,7 @@ translate_bytecode_to_trace(
assert(i + 1 == nuops);
if (opcode == FOR_ITER_GEN ||
opcode == LOAD_ATTR_PROPERTY ||
opcode == BINARY_SUBSCR_GETITEM ||
opcode == SEND_GEN)
{
DPRINTF(2, "Bailing due to dynamic target\n");
@ -921,7 +922,9 @@ done:
2 * INSTR_IP(initial_instr, code));
return 0;
}
if (trace[trace_length-1].opcode != _JUMP_TO_TOP) {
if (!is_terminator(&trace[trace_length-1])) {
/* Allow space for _EXIT_TRACE */
max_length += 2;
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
}
DPRINTF(1,
@ -1102,7 +1105,7 @@ sanity_check(_PyExecutorObject *executor)
CHECK(inst->format == UOP_FORMAT_JUMP);
CHECK(inst->error_target < executor->code_size);
}
if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) {
if (is_terminator(inst)) {
ended = true;
i++;
break;
@ -1207,8 +1210,7 @@ int effective_trace_length(_PyUOpInstruction *buffer, int length)
if (opcode == _NOP) {
nop_count++;
}
if (opcode == _EXIT_TRACE ||
opcode == _JUMP_TO_TOP) {
if (is_terminator(&buffer[i])) {
return i+1-nop_count;
}
}
@ -1257,7 +1259,7 @@ uop_optimize(
else if (oparg < _PyUop_Replication[opcode]) {
buffer[pc].opcode = opcode + oparg + 1;
}
else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) {
else if (is_terminator(&buffer[pc])) {
break;
}
assert(_PyOpcode_uop_name[buffer[pc].opcode]);

View file

@ -52,14 +52,6 @@
#define DPRINTF(level, ...)
#endif
static inline bool
op_is_end(uint32_t opcode)
{
return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP;
}
static int
get_mutations(PyObject* dict) {
assert(PyDict_CheckExact(dict));
@ -288,7 +280,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
prechecked_function_version = (uint32_t)buffer[pc].operand;
break;
default:
if (op_is_end(opcode)) {
if (is_terminator(inst)) {
return 1;
}
break;
@ -552,6 +544,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
}
case _JUMP_TO_TOP:
case _EXIT_TRACE:
case _DYNAMIC_EXIT:
return pc + 1;
default:
{

View file

@ -539,7 +539,18 @@
break;
}
/* _BINARY_SUBSCR_GETITEM is not a viable micro-op for tier 2 */
case _BINARY_SUBSCR_CHECK_FUNC: {
break;
}
case _BINARY_SUBSCR_INIT_CALL: {
_PyInterpreterFrame *new_frame;
new_frame = sym_new_not_null(ctx);
stack_pointer[-2] = (_Py_UopsSymbol *)new_frame;
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _LIST_APPEND: {
stack_pointer += -1;