gh-115999: Enable BINARY_SUBSCR_GETITEM for free-threaded build (gh-127737)

This commit is contained in:
Donghee Na 2024-12-19 11:08:17 +09:00 committed by GitHub
parent f802c8bf87
commit 48c70b8f7d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 118 additions and 62 deletions

View file

@ -865,26 +865,24 @@ dummy_func(
res = PyStackRef_FromPyObjectSteal(res_o);
}
op(_BINARY_SUBSCR_CHECK_FUNC, (container, unused -- container, unused)) {
op(_BINARY_SUBSCR_CHECK_FUNC, (container, unused -- container, unused, getitem)) {
PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE));
PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *getitem = ht->_spec_cache.getitem;
DEOPT_IF(getitem == NULL);
assert(PyFunction_Check(getitem));
uint32_t cached_version = ht->_spec_cache.getitem_version;
DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version);
PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem);
PyObject *getitem_o = FT_ATOMIC_LOAD_PTR_ACQUIRE(ht->_spec_cache.getitem);
DEOPT_IF(getitem_o == NULL);
assert(PyFunction_Check(getitem_o));
uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version);
DEOPT_IF(((PyFunctionObject *)getitem_o)->func_version != cached_version);
PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem_o);
assert(code->co_argcount == 2);
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize));
getitem = PyStackRef_FromPyObjectNew(getitem_o);
STAT_INC(BINARY_SUBSCR, hit);
}
op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _PyInterpreterFrame* )) {
PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *getitem = ht->_spec_cache.getitem;
new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame);
op(_BINARY_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame: _PyInterpreterFrame* )) {
new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame);
new_frame->localsplus[0] = container;
new_frame->localsplus[1] = sub;
INPUTS_DEAD();

View file

@ -1125,6 +1125,7 @@
case _BINARY_SUBSCR_CHECK_FUNC: {
_PyStackRef container;
_PyStackRef getitem;
container = stack_pointer[-2];
PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
if (!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)) {
@ -1132,42 +1133,45 @@
JUMP_TO_JUMP_TARGET();
}
PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *getitem = ht->_spec_cache.getitem;
if (getitem == NULL) {
PyObject *getitem_o = FT_ATOMIC_LOAD_PTR_ACQUIRE(ht->_spec_cache.getitem);
if (getitem_o == NULL) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
assert(PyFunction_Check(getitem));
uint32_t cached_version = ht->_spec_cache.getitem_version;
if (((PyFunctionObject *)getitem)->func_version != cached_version) {
assert(PyFunction_Check(getitem_o));
uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version);
if (((PyFunctionObject *)getitem_o)->func_version != cached_version) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem);
PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem_o);
assert(code->co_argcount == 2);
if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
getitem = PyStackRef_FromPyObjectNew(getitem_o);
STAT_INC(BINARY_SUBSCR, hit);
stack_pointer[0] = getitem;
stack_pointer += 1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _BINARY_SUBSCR_INIT_CALL: {
_PyStackRef getitem;
_PyStackRef sub;
_PyStackRef container;
_PyInterpreterFrame *new_frame;
sub = stack_pointer[-1];
container = stack_pointer[-2];
PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *getitem = ht->_spec_cache.getitem;
new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame);
getitem = stack_pointer[-1];
sub = stack_pointer[-2];
container = stack_pointer[-3];
new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame);
new_frame->localsplus[0] = container;
new_frame->localsplus[1] = sub;
frame->return_offset = 2 ;
stack_pointer[-2].bits = (uintptr_t)new_frame;
stack_pointer += -1;
stack_pointer[-3].bits = (uintptr_t)new_frame;
stack_pointer += -2;
assert(WITHIN_STACK_BOUNDS());
break;
}

View file

@ -505,6 +505,7 @@
INSTRUCTION_STATS(BINARY_SUBSCR_GETITEM);
static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size");
_PyStackRef container;
_PyStackRef getitem;
_PyStackRef sub;
_PyInterpreterFrame *new_frame;
/* Skip 1 cache entry */
@ -518,23 +519,21 @@
PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR);
PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *getitem = ht->_spec_cache.getitem;
DEOPT_IF(getitem == NULL, BINARY_SUBSCR);
assert(PyFunction_Check(getitem));
uint32_t cached_version = ht->_spec_cache.getitem_version;
DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version, BINARY_SUBSCR);
PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem);
PyObject *getitem_o = FT_ATOMIC_LOAD_PTR_ACQUIRE(ht->_spec_cache.getitem);
DEOPT_IF(getitem_o == NULL, BINARY_SUBSCR);
assert(PyFunction_Check(getitem_o));
uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version);
DEOPT_IF(((PyFunctionObject *)getitem_o)->func_version != cached_version, BINARY_SUBSCR);
PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem_o);
assert(code->co_argcount == 2);
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR);
getitem = PyStackRef_FromPyObjectNew(getitem_o);
STAT_INC(BINARY_SUBSCR, hit);
}
// _BINARY_SUBSCR_INIT_CALL
{
sub = stack_pointer[-1];
PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *getitem = ht->_spec_cache.getitem;
new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame);
new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame);
new_frame->localsplus[0] = container;
new_frame->localsplus[1] = sub;
frame->return_offset = 2 ;

View file

@ -349,9 +349,10 @@ dummy_func(void) {
GETLOCAL(this_instr->operand0) = res;
}
op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _Py_UOpsAbstractFrame *)) {
op(_BINARY_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame: _Py_UOpsAbstractFrame *)) {
(void)container;
(void)sub;
(void)getitem;
new_frame = NULL;
ctx->done = true;
}

View file

@ -592,21 +592,29 @@
}
case _BINARY_SUBSCR_CHECK_FUNC: {
_Py_UopsSymbol *getitem;
getitem = sym_new_not_null(ctx);
stack_pointer[0] = getitem;
stack_pointer += 1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _BINARY_SUBSCR_INIT_CALL: {
_Py_UopsSymbol *getitem;
_Py_UopsSymbol *sub;
_Py_UopsSymbol *container;
_Py_UOpsAbstractFrame *new_frame;
sub = stack_pointer[-1];
container = stack_pointer[-2];
getitem = stack_pointer[-1];
sub = stack_pointer[-2];
container = stack_pointer[-3];
(void)container;
(void)sub;
(void)getitem;
new_frame = NULL;
ctx->done = true;
stack_pointer[-2] = (_Py_UopsSymbol *)new_frame;
stack_pointer += -1;
stack_pointer[-3] = (_Py_UopsSymbol *)new_frame;
stack_pointer += -2;
assert(WITHIN_STACK_BOUNDS());
break;
}

View file

@ -1096,6 +1096,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na
SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_METHOD);
return -1;
}
/* Don't specialize if PEP 523 is active */
if (_PyInterpreterState_GET()->eval_frame) {
SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OTHER);
return -1;
@ -1165,6 +1166,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na
if (version == 0) {
return -1;
}
/* Don't specialize if PEP 523 is active */
if (_PyInterpreterState_GET()->eval_frame) {
SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OTHER);
return -1;
@ -1781,12 +1783,12 @@ _Py_Specialize_BinarySubscr(
specialized_op = BINARY_SUBSCR_DICT;
goto success;
}
#ifndef Py_GIL_DISABLED
PyTypeObject *cls = Py_TYPE(container);
PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__));
unsigned int tp_version;
PyObject *descriptor = _PyType_LookupRefAndVersion(container_type, &_Py_ID(__getitem__), &tp_version);
if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) {
if (!(container_type->tp_flags & Py_TPFLAGS_HEAPTYPE)) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE);
Py_DECREF(descriptor);
goto fail;
}
PyFunctionObject *func = (PyFunctionObject *)descriptor;
@ -1794,30 +1796,29 @@ _Py_Specialize_BinarySubscr(
int kind = function_kind(fcode);
if (kind != SIMPLE_FUNCTION) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, kind);
Py_DECREF(descriptor);
goto fail;
}
if (fcode->co_argcount != 2) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
Py_DECREF(descriptor);
goto fail;
}
uint32_t version = _PyFunction_GetVersionForCurrentState(func);
if (!_PyFunction_IsVersionValid(version)) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS);
goto fail;
}
PyHeapTypeObject *ht = (PyHeapTypeObject *)container_type;
/* Don't specialize if PEP 523 is active */
if (_PyInterpreterState_GET()->eval_frame) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OTHER);
Py_DECREF(descriptor);
goto fail;
}
PyHeapTypeObject *ht = (PyHeapTypeObject *)container_type;
// This pointer is invalidated by PyType_Modified (see the comment on
// struct _specialization_cache):
ht->_spec_cache.getitem = descriptor;
ht->_spec_cache.getitem_version = version;
specialized_op = BINARY_SUBSCR_GETITEM;
goto success;
if (_PyType_CacheGetItemForSpecialization(ht, descriptor, (uint32_t)tp_version)) {
specialized_op = BINARY_SUBSCR_GETITEM;
Py_DECREF(descriptor);
goto success;
}
}
#endif // Py_GIL_DISABLED
Py_XDECREF(descriptor);
SPECIALIZATION_FAIL(BINARY_SUBSCR,
binary_subscr_fail_kind(container_type, sub));
fail:
@ -2617,6 +2618,7 @@ _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg)
assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == END_FOR ||
instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == INSTRUMENTED_END_FOR
);
/* Don't specialize if PEP 523 is active */
if (_PyInterpreterState_GET()->eval_frame) {
SPECIALIZATION_FAIL(FOR_ITER, SPEC_FAIL_OTHER);
goto failure;
@ -2645,6 +2647,7 @@ _Py_Specialize_Send(_PyStackRef receiver_st, _Py_CODEUNIT *instr)
assert(_PyOpcode_Caches[SEND] == INLINE_CACHE_ENTRIES_SEND);
PyTypeObject *tp = Py_TYPE(receiver);
if (tp == &PyGen_Type || tp == &PyCoro_Type) {
/* Don't specialize if PEP 523 is active */
if (_PyInterpreterState_GET()->eval_frame) {
SPECIALIZATION_FAIL(SEND, SPEC_FAIL_OTHER);
goto failure;