GH-105229: Replace some superinstructions with single instruction equivalent. (GH-105230)

This commit is contained in:
Mark Shannon 2023-06-05 11:07:04 +01:00 committed by GitHub
parent e8ecb9ee6b
commit 0689340366
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 731 additions and 686 deletions

View file

@ -201,6 +201,15 @@ dummy_func(
GETLOCAL(oparg) = NULL;
}
inst(LOAD_FAST_LOAD_FAST, ( -- value1, value2)) {
uint32_t oparg1 = oparg >> 4;
uint32_t oparg2 = oparg & 15;
value1 = GETLOCAL(oparg1);
value2 = GETLOCAL(oparg2);
Py_INCREF(value1);
Py_INCREF(value2);
}
inst(LOAD_CONST, (-- value)) {
value = GETITEM(frame->f_code->co_consts, oparg);
Py_INCREF(value);
@ -210,10 +219,22 @@ dummy_func(
SETLOCAL(oparg, value);
}
super(LOAD_FAST__LOAD_FAST) = LOAD_FAST + LOAD_FAST;
inst(STORE_FAST_LOAD_FAST, (value1 -- value2)) {
uint32_t oparg1 = oparg >> 4;
uint32_t oparg2 = oparg & 15;
SETLOCAL(oparg1, value1);
value2 = GETLOCAL(oparg2);
Py_INCREF(value2);
}
inst(STORE_FAST_STORE_FAST, (value2, value1 --)) {
uint32_t oparg1 = oparg >> 4;
uint32_t oparg2 = oparg & 15;
SETLOCAL(oparg1, value1);
SETLOCAL(oparg2, value2);
}
super(LOAD_FAST__LOAD_CONST) = LOAD_FAST + LOAD_CONST;
super(STORE_FAST__LOAD_FAST) = STORE_FAST + LOAD_FAST;
super(STORE_FAST__STORE_FAST) = STORE_FAST + STORE_FAST;
super(LOAD_CONST__LOAD_FAST) = LOAD_CONST + LOAD_FAST;
inst(POP_TOP, (value --)) {
@ -386,8 +407,7 @@ dummy_func(
// At the end we just skip over the STORE_FAST.
op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) {
_Py_CODEUNIT true_next = next_instr[INLINE_CACHE_ENTRIES_BINARY_OP];
assert(true_next.op.code == STORE_FAST ||
true_next.op.code == STORE_FAST__LOAD_FAST);
assert(true_next.op.code == STORE_FAST);
PyObject **target_local = &GETLOCAL(true_next.op.arg);
DEOPT_IF(*target_local != left, BINARY_OP);
STAT_INC(BINARY_OP, hit);
@ -3484,5 +3504,3 @@ dummy_func(
}
// Future families go below this point //
family(store_fast) = { STORE_FAST, STORE_FAST__LOAD_FAST, STORE_FAST__STORE_FAST };

View file

@ -1586,6 +1586,56 @@ optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache)
return SUCCESS;
}
static void
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
{
int32_t line1 = inst1->i_loc.lineno;
int32_t line2 = inst2->i_loc.lineno;
/* Skip if instructions are on different lines */
if (line1 >= 0 && line2 >= 0 && line1 != line2) {
return;
}
if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
return;
}
INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
INSTR_SET_OP0(inst2, NOP);
}
static void
insert_superinstructions(cfg_builder *g)
{
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
for (int i = 0; i < b->b_iused; i++) {
cfg_instr *inst = &b->b_instr[i];
int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
switch(inst->i_opcode) {
case LOAD_FAST:
if (nextop == LOAD_FAST) {
make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
}
break;
case STORE_FAST:
switch (nextop) {
case LOAD_FAST:
make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
break;
case STORE_FAST:
make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
break;
}
break;
}
}
}
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
remove_redundant_nops(b);
}
eliminate_empty_basic_blocks(g);
assert(no_redundant_nops(g));
}
// helper functions for add_checks_for_loads_of_unknown_variables
static inline void
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
@ -2181,6 +2231,7 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
RETURN_IF_ERROR(
add_checks_for_loads_of_uninitialized_variables(
g->g_entryblock, nlocals, nparams));
insert_superinstructions(g);
RETURN_IF_ERROR(push_cold_blocks_to_end(g, code_flags));
RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));

File diff suppressed because it is too large Load diff

View file

@ -1488,10 +1488,7 @@ update_instrumentation_data(PyCodeObject *code, PyInterpreterState *interp)
}
static const uint8_t super_instructions[256] = {
[LOAD_FAST__LOAD_FAST] = 1,
[LOAD_FAST__LOAD_CONST] = 1,
[STORE_FAST__LOAD_FAST] = 1,
[STORE_FAST__STORE_FAST] = 1,
[LOAD_CONST__LOAD_FAST] = 1,
};

View file

@ -23,18 +23,18 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 0;
case LOAD_FAST_AND_CLEAR:
return 0;
case LOAD_FAST_LOAD_FAST:
return 0;
case LOAD_CONST:
return 0;
case STORE_FAST:
return 1;
case LOAD_FAST__LOAD_FAST:
return 0+0;
case STORE_FAST_LOAD_FAST:
return 1;
case STORE_FAST_STORE_FAST:
return 2;
case LOAD_FAST__LOAD_CONST:
return 0+0;
case STORE_FAST__LOAD_FAST:
return 1+0;
case STORE_FAST__STORE_FAST:
return 1+1;
case LOAD_CONST__LOAD_FAST:
return 0+0;
case POP_TOP:
@ -421,18 +421,18 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 1;
case LOAD_FAST_AND_CLEAR:
return 1;
case LOAD_FAST_LOAD_FAST:
return 2;
case LOAD_CONST:
return 1;
case STORE_FAST:
return 0;
case LOAD_FAST__LOAD_FAST:
return 1+1;
case STORE_FAST_LOAD_FAST:
return 1;
case STORE_FAST_STORE_FAST:
return 0;
case LOAD_FAST__LOAD_CONST:
return 1+1;
case STORE_FAST__LOAD_FAST:
return 0+1;
case STORE_FAST__STORE_FAST:
return 0+0;
case LOAD_CONST__LOAD_FAST:
return 1+1;
case POP_TOP:
@ -816,12 +816,12 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = {
[LOAD_FAST_CHECK] = { true, INSTR_FMT_IB },
[LOAD_FAST] = { true, INSTR_FMT_IB },
[LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB },
[LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB },
[LOAD_CONST] = { true, INSTR_FMT_IB },
[STORE_FAST] = { true, INSTR_FMT_IB },
[LOAD_FAST__LOAD_FAST] = { true, INSTR_FMT_IBIB },
[STORE_FAST_LOAD_FAST] = { true, INSTR_FMT_IB },
[STORE_FAST_STORE_FAST] = { true, INSTR_FMT_IB },
[LOAD_FAST__LOAD_CONST] = { true, INSTR_FMT_IBIB },
[STORE_FAST__LOAD_FAST] = { true, INSTR_FMT_IBIB },
[STORE_FAST__STORE_FAST] = { true, INSTR_FMT_IBIB },
[LOAD_CONST__LOAD_FAST] = { true, INSTR_FMT_IBIB },
[POP_TOP] = { true, INSTR_FMT_IX },
[PUSH_NULL] = { true, INSTR_FMT_IX },

View file

@ -87,7 +87,7 @@ static void *opcode_targets[256] = {
&&TARGET_SETUP_ANNOTATIONS,
&&TARGET_LOAD_FAST__LOAD_CONST,
&&TARGET_LOAD_LOCALS,
&&TARGET_LOAD_FAST__LOAD_FAST,
&&TARGET_LOAD_GLOBAL_BUILTIN,
&&TARGET_POP_EXCEPT,
&&TARGET_STORE_NAME,
&&TARGET_DELETE_NAME,
@ -110,9 +110,9 @@ static void *opcode_targets[256] = {
&&TARGET_IMPORT_NAME,
&&TARGET_IMPORT_FROM,
&&TARGET_JUMP_FORWARD,
&&TARGET_LOAD_GLOBAL_BUILTIN,
&&TARGET_LOAD_GLOBAL_MODULE,
&&TARGET_STORE_ATTR_INSTANCE_VALUE,
&&TARGET_STORE_ATTR_SLOT,
&&TARGET_POP_JUMP_IF_FALSE,
&&TARGET_POP_JUMP_IF_TRUE,
&&TARGET_LOAD_GLOBAL,
@ -147,29 +147,29 @@ static void *opcode_targets[256] = {
&&TARGET_LIST_APPEND,
&&TARGET_SET_ADD,
&&TARGET_MAP_ADD,
&&TARGET_STORE_ATTR_SLOT,
&&TARGET_STORE_ATTR_WITH_HINT,
&&TARGET_COPY_FREE_VARS,
&&TARGET_YIELD_VALUE,
&&TARGET_RESUME,
&&TARGET_MATCH_CLASS,
&&TARGET_STORE_ATTR_WITH_HINT,
&&TARGET_STORE_FAST__LOAD_FAST,
&&TARGET_STORE_SUBSCR_DICT,
&&TARGET_STORE_SUBSCR_LIST_INT,
&&TARGET_FORMAT_VALUE,
&&TARGET_BUILD_CONST_KEY_MAP,
&&TARGET_BUILD_STRING,
&&TARGET_STORE_FAST__STORE_FAST,
&&TARGET_STORE_SUBSCR_DICT,
&&TARGET_STORE_SUBSCR_LIST_INT,
&&TARGET_UNPACK_SEQUENCE_LIST,
&&TARGET_UNPACK_SEQUENCE_TUPLE,
&&TARGET_UNPACK_SEQUENCE_TWO_TUPLE,
&&TARGET_SEND_GEN,
&&TARGET_LIST_EXTEND,
&&TARGET_SET_UPDATE,
&&TARGET_DICT_MERGE,
&&TARGET_DICT_UPDATE,
&&TARGET_UNPACK_SEQUENCE_TUPLE,
&&TARGET_UNPACK_SEQUENCE_TWO_TUPLE,
&&TARGET_SEND_GEN,
&&_unknown_opcode,
&&_unknown_opcode,
&&TARGET_LOAD_FAST_LOAD_FAST,
&&TARGET_STORE_FAST_LOAD_FAST,
&&TARGET_STORE_FAST_STORE_FAST,
&&TARGET_CALL,
&&TARGET_KW_NAMES,
&&TARGET_CALL_INTRINSIC_1,

View file

@ -289,15 +289,6 @@ _PyCode_Quicken(PyCodeObject *code)
case LOAD_FAST << 8 | LOAD_CONST:
instructions[i - 1].op.code = LOAD_FAST__LOAD_CONST;
break;
case LOAD_FAST << 8 | LOAD_FAST:
instructions[i - 1].op.code = LOAD_FAST__LOAD_FAST;
break;
case STORE_FAST << 8 | LOAD_FAST:
instructions[i - 1].op.code = STORE_FAST__LOAD_FAST;
break;
case STORE_FAST << 8 | STORE_FAST:
instructions[i - 1].op.code = STORE_FAST__STORE_FAST;
break;
}
}
#endif /* ENABLE_SPECIALIZATION */
@ -1914,8 +1905,7 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
}
if (PyUnicode_CheckExact(lhs)) {
_Py_CODEUNIT next = instr[INLINE_CACHE_ENTRIES_BINARY_OP + 1];
bool to_store = (next.op.code == STORE_FAST ||
next.op.code == STORE_FAST__LOAD_FAST);
bool to_store = (next.op.code == STORE_FAST);
if (to_store && locals[next.op.arg] == lhs) {
instr->op.code = BINARY_OP_INPLACE_ADD_UNICODE;
goto success;