mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
gh-115999: Refactor LOAD_GLOBAL
specializations to avoid reloading {globals, builtins} keys (gh-124953)
Each of the `LOAD_GLOBAL` specializations is implemented roughly as: 1. Load keys version. 2. Load cached keys version. 3. Deopt if (1) and (2) don't match. 4. Load keys. 5. Load cached index into keys. 6. Load object from (4) at offset from (5). This is not thread-safe in free-threaded builds; the keys object may be replaced in between steps (3) and (4). This change refactors the specializations to avoid reloading the keys object and instead pass the keys object from guards to be consumed by downstream uops.
This commit is contained in:
parent
b9a8ca0a6a
commit
f978fb4f8d
9 changed files with 379 additions and 162 deletions
|
@ -131,6 +131,26 @@ incorrect_keys(_PyUOpInstruction *inst, PyObject *obj)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
check_next_uop(_PyUOpInstruction *buffer, int size, int pc, uint16_t expected)
|
||||
{
|
||||
if (pc + 1 >= size) {
|
||||
DPRINTF(1, "Cannot rewrite %s at pc %d: buffer too small\n",
|
||||
_PyOpcode_uop_name[buffer[pc].opcode], pc);
|
||||
return 0;
|
||||
}
|
||||
uint16_t next_opcode = buffer[pc + 1].opcode;
|
||||
if (next_opcode != expected) {
|
||||
DPRINTF(1,
|
||||
"Cannot rewrite %s at pc %d: unexpected next opcode %s, "
|
||||
"expected %s\n",
|
||||
_PyOpcode_uop_name[buffer[pc].opcode], pc,
|
||||
_PyOpcode_uop_name[next_opcode], _PyOpcode_uop_name[expected]);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Returns 1 if successfully optimized
|
||||
* 0 if the trace is not suitable for optimization (yet)
|
||||
* -1 if there was an error. */
|
||||
|
@ -174,7 +194,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
|
|||
_PyUOpInstruction *inst = &buffer[pc];
|
||||
int opcode = inst->opcode;
|
||||
switch(opcode) {
|
||||
case _GUARD_BUILTINS_VERSION:
|
||||
case _GUARD_BUILTINS_VERSION_PUSH_KEYS:
|
||||
if (incorrect_keys(inst, builtins)) {
|
||||
OPT_STAT_INC(remove_globals_incorrect_keys);
|
||||
return 0;
|
||||
|
@ -182,6 +202,10 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
|
|||
if (interp->rare_events.builtin_dict >= _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS) {
|
||||
continue;
|
||||
}
|
||||
if (!check_next_uop(buffer, buffer_size, pc,
|
||||
_LOAD_GLOBAL_BUILTINS_FROM_KEYS)) {
|
||||
continue;
|
||||
}
|
||||
if ((builtins_watched & 1) == 0) {
|
||||
PyDict_Watch(BUILTINS_WATCHER_ID, builtins);
|
||||
builtins_watched |= 1;
|
||||
|
@ -194,8 +218,13 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
|
|||
buffer[pc].operand = function_version;
|
||||
function_checked |= 1;
|
||||
}
|
||||
// We're no longer pushing the builtins keys; rewrite the
|
||||
// instruction that consumed the keys to load them from the
|
||||
// frame.
|
||||
buffer[pc + 1].opcode = _LOAD_GLOBAL_BUILTINS;
|
||||
break;
|
||||
case _GUARD_GLOBALS_VERSION:
|
||||
case _GUARD_GLOBALS_VERSION_PUSH_KEYS:
|
||||
if (incorrect_keys(inst, globals)) {
|
||||
OPT_STAT_INC(remove_globals_incorrect_keys);
|
||||
return 0;
|
||||
|
@ -204,6 +233,11 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
|
|||
if (watched_mutations >= _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS) {
|
||||
continue;
|
||||
}
|
||||
if (opcode == _GUARD_GLOBALS_VERSION_PUSH_KEYS &&
|
||||
!check_next_uop(buffer, buffer_size, pc,
|
||||
_LOAD_GLOBAL_MODULE_FROM_KEYS)) {
|
||||
continue;
|
||||
}
|
||||
if ((globals_watched & 1) == 0) {
|
||||
PyDict_Watch(GLOBALS_WATCHER_ID, globals);
|
||||
_Py_BloomFilter_Add(dependencies, globals);
|
||||
|
@ -217,6 +251,12 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
|
|||
buffer[pc].operand = function_version;
|
||||
function_checked |= 1;
|
||||
}
|
||||
if (opcode == _GUARD_GLOBALS_VERSION_PUSH_KEYS) {
|
||||
// We're no longer pushing the globals keys; rewrite the
|
||||
// instruction that consumed the keys to load them from the
|
||||
// frame.
|
||||
buffer[pc + 1].opcode = _LOAD_GLOBAL_MODULE;
|
||||
}
|
||||
break;
|
||||
case _LOAD_GLOBAL_BUILTINS:
|
||||
if (function_checked & globals_watched & builtins_watched & 1) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue