GH-130296: Avoid stack transients in four instructions. (GH-130310)

* Combine _GUARD_GLOBALS_VERSION_PUSH_KEYS and _LOAD_GLOBAL_MODULE_FROM_KEYS into _LOAD_GLOBAL_MODULE

* Combine _GUARD_BUILTINS_VERSION_PUSH_KEYS and _LOAD_GLOBAL_BUILTINS_FROM_KEYS into _LOAD_GLOBAL_BUILTINS

* Combine _CHECK_ATTR_MODULE_PUSH_KEYS and _LOAD_ATTR_MODULE_FROM_KEYS into _LOAD_ATTR_MODULE

* Remove stack transient in LOAD_ATTR_WITH_HINT
This commit is contained in:
Mark Shannon 2025-02-28 18:00:38 +00:00 committed by GitHub
parent ab11c09705
commit 54965f3fb2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 615 additions and 1005 deletions

View file

@ -8173,14 +8173,14 @@
INSTRUCTION_STATS(LOAD_ATTR_MODULE);
static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size");
_PyStackRef owner;
PyDictKeysObject *mod_keys;
_PyStackRef attr;
_PyStackRef null = PyStackRef_NULL;
/* Skip 1 cache entry */
// _CHECK_ATTR_MODULE_PUSH_KEYS
// _LOAD_ATTR_MODULE
{
owner = stack_pointer[-1];
uint32_t dict_version = read_u32(&this_instr[2].cache);
uint16_t index = read_u16(&this_instr[4].cache);
PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner);
if (Py_TYPE(owner_o)->tp_getattro != PyModule_Type.tp_getattro) {
UPDATE_MISS_STATS(LOAD_ATTR);
@ -8195,16 +8195,10 @@
assert(_PyOpcode_Deopt[opcode] == (LOAD_ATTR));
JUMP_TO_PREDICTED(LOAD_ATTR);
}
mod_keys = keys;
}
// _LOAD_ATTR_MODULE_FROM_KEYS
{
uint16_t index = read_u16(&this_instr[4].cache);
assert(mod_keys->dk_kind == DICT_KEYS_UNICODE);
assert(index < FT_ATOMIC_LOAD_SSIZE_RELAXED(mod_keys->dk_nentries));
PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(mod_keys) + index;
assert(keys->dk_kind == DICT_KEYS_UNICODE);
assert(index < FT_ATOMIC_LOAD_SSIZE_RELAXED(keys->dk_nentries));
PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(keys) + index;
PyObject *attr_o = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_value);
// Clear mod_keys from stack in case we need to deopt
if (attr_o == NULL) {
UPDATE_MISS_STATS(LOAD_ATTR);
assert(_PyOpcode_Deopt[opcode] == (LOAD_ATTR));
@ -8220,8 +8214,7 @@
}
}
#else
Py_INCREF(attr_o);
attr = PyStackRef_FromPyObjectSteal(attr_o);
attr = PyStackRef_FromPyObjectNew(attr_o);
#endif
STAT_INC(LOAD_ATTR, hit);
stack_pointer[-1] = attr;
@ -8530,7 +8523,6 @@
INSTRUCTION_STATS(LOAD_ATTR_WITH_HINT);
static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size");
_PyStackRef owner;
PyDictObject *dict;
_PyStackRef attr;
_PyStackRef null = PyStackRef_NULL;
/* Skip 1 cache entry */
@ -8546,22 +8538,18 @@
JUMP_TO_PREDICTED(LOAD_ATTR);
}
}
// _CHECK_ATTR_WITH_HINT
// _LOAD_ATTR_WITH_HINT
{
uint16_t hint = read_u16(&this_instr[4].cache);
PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner);
assert(Py_TYPE(owner_o)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
PyDictObject *dict_o = _PyObject_GetManagedDict(owner_o);
if (dict_o == NULL) {
PyDictObject *dict = _PyObject_GetManagedDict(owner_o);
if (dict == NULL) {
UPDATE_MISS_STATS(LOAD_ATTR);
assert(_PyOpcode_Deopt[opcode] == (LOAD_ATTR));
JUMP_TO_PREDICTED(LOAD_ATTR);
}
assert(PyDict_CheckExact((PyObject *)dict_o));
dict = dict_o;
}
// _LOAD_ATTR_WITH_HINT
{
uint16_t hint = read_u16(&this_instr[4].cache);
assert(PyDict_CheckExact((PyObject *)dict));
PyObject *attr_o;
if (!LOCK_OBJECT(dict)) {
if (true) {
@ -8608,13 +8596,10 @@
STAT_INC(LOAD_ATTR, hit);
attr = PyStackRef_FromPyObjectNew(attr_o);
UNLOCK_OBJECT(dict);
_PyFrame_SetStackPointer(frame, stack_pointer);
_PyStackRef tmp = owner;
owner = attr;
stack_pointer[-1] = owner;
PyStackRef_CLOSE(tmp);
stack_pointer = _PyFrame_GetStackPointer(frame);
stack_pointer[-1] = attr;
_PyFrame_SetStackPointer(frame, stack_pointer);
PyStackRef_CLOSE(owner);
stack_pointer = _PyFrame_GetStackPointer(frame);
}
/* Skip 5 cache entries */
// _PUSH_NULL_CONDITIONAL
@ -9052,7 +9037,6 @@
next_instr += 5;
INSTRUCTION_STATS(LOAD_GLOBAL_BUILTIN);
static_assert(INLINE_CACHE_ENTRIES_LOAD_GLOBAL == 4, "incorrect cache size");
PyDictKeysObject *builtins_keys;
_PyStackRef res;
_PyStackRef null = PyStackRef_NULL;
/* Skip 1 cache entry */
@ -9073,9 +9057,10 @@
}
assert(DK_IS_UNICODE(keys));
}
// _GUARD_BUILTINS_VERSION_PUSH_KEYS
// _LOAD_GLOBAL_BUILTINS
{
uint16_t version = read_u16(&this_instr[3].cache);
uint16_t index = read_u16(&this_instr[4].cache);
PyDictObject *dict = (PyDictObject *)BUILTINS();
if (!PyDict_CheckExact(dict)) {
UPDATE_MISS_STATS(LOAD_GLOBAL);
@ -9088,13 +9073,8 @@
assert(_PyOpcode_Deopt[opcode] == (LOAD_GLOBAL));
JUMP_TO_PREDICTED(LOAD_GLOBAL);
}
builtins_keys = keys;
assert(DK_IS_UNICODE(builtins_keys));
}
// _LOAD_GLOBAL_BUILTINS_FROM_KEYS
{
uint16_t index = read_u16(&this_instr[4].cache);
PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys);
assert(DK_IS_UNICODE(keys));
PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys);
PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value);
if (res_o == NULL) {
UPDATE_MISS_STATS(LOAD_GLOBAL);
@ -9109,8 +9089,7 @@
JUMP_TO_PREDICTED(LOAD_GLOBAL);
}
#else
Py_INCREF(res_o);
res = PyStackRef_FromPyObjectSteal(res_o);
res = PyStackRef_FromPyObjectNew(res_o);
#endif
STAT_INC(LOAD_GLOBAL, hit);
}
@ -9136,13 +9115,16 @@
next_instr += 5;
INSTRUCTION_STATS(LOAD_GLOBAL_MODULE);
static_assert(INLINE_CACHE_ENTRIES_LOAD_GLOBAL == 4, "incorrect cache size");
PyDictKeysObject *globals_keys;
_PyStackRef res;
_PyStackRef null = PyStackRef_NULL;
/* Skip 1 cache entry */
// _GUARD_GLOBALS_VERSION_PUSH_KEYS
// _NOP
{
}
// _LOAD_GLOBAL_MODULE
{
uint16_t version = read_u16(&this_instr[2].cache);
uint16_t index = read_u16(&this_instr[4].cache);
PyDictObject *dict = (PyDictObject *)GLOBALS();
if (!PyDict_CheckExact(dict)) {
UPDATE_MISS_STATS(LOAD_GLOBAL);
@ -9155,14 +9137,9 @@
assert(_PyOpcode_Deopt[opcode] == (LOAD_GLOBAL));
JUMP_TO_PREDICTED(LOAD_GLOBAL);
}
globals_keys = keys;
assert(DK_IS_UNICODE(globals_keys));
}
/* Skip 1 cache entry */
// _LOAD_GLOBAL_MODULE_FROM_KEYS
{
uint16_t index = read_u16(&this_instr[4].cache);
PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys);
assert(DK_IS_UNICODE(keys));
PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys);
assert(index < DK_SIZE(keys));
PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value);
if (res_o == NULL) {
UPDATE_MISS_STATS(LOAD_GLOBAL);
@ -9177,8 +9154,7 @@
JUMP_TO_PREDICTED(LOAD_GLOBAL);
}
#else
Py_INCREF(res_o);
res = PyStackRef_FromPyObjectSteal(res_o);
res = PyStackRef_FromPyObjectNew(res_o);
#endif
STAT_INC(LOAD_GLOBAL, hit);
}