gh-134584: Specialize POP_TOP by reference and type in JIT (GH-135761)

This commit is contained in:
Ken Jin 2025-06-24 00:57:14 +08:00 committed by GitHub
parent 99712c45cc
commit 569fc6870f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 239 additions and 62 deletions

View file

@ -284,72 +284,76 @@ extern "C" {
#define _POP_JUMP_IF_FALSE 500
#define _POP_JUMP_IF_TRUE 501
#define _POP_TOP POP_TOP
#define _POP_TOP_LOAD_CONST_INLINE 502
#define _POP_TOP_LOAD_CONST_INLINE_BORROW 503
#define _POP_TWO 504
#define _POP_TWO_LOAD_CONST_INLINE_BORROW 505
#define _POP_TOP_FLOAT 502
#define _POP_TOP_INT 503
#define _POP_TOP_LOAD_CONST_INLINE 504
#define _POP_TOP_LOAD_CONST_INLINE_BORROW 505
#define _POP_TOP_NOP 506
#define _POP_TOP_UNICODE 507
#define _POP_TWO 508
#define _POP_TWO_LOAD_CONST_INLINE_BORROW 509
#define _PUSH_EXC_INFO PUSH_EXC_INFO
#define _PUSH_FRAME 506
#define _PUSH_FRAME 510
#define _PUSH_NULL PUSH_NULL
#define _PUSH_NULL_CONDITIONAL 507
#define _PY_FRAME_GENERAL 508
#define _PY_FRAME_KW 509
#define _QUICKEN_RESUME 510
#define _REPLACE_WITH_TRUE 511
#define _PUSH_NULL_CONDITIONAL 511
#define _PY_FRAME_GENERAL 512
#define _PY_FRAME_KW 513
#define _QUICKEN_RESUME 514
#define _REPLACE_WITH_TRUE 515
#define _RESUME_CHECK RESUME_CHECK
#define _RETURN_GENERATOR RETURN_GENERATOR
#define _RETURN_VALUE RETURN_VALUE
#define _SAVE_RETURN_OFFSET 512
#define _SEND 513
#define _SEND_GEN_FRAME 514
#define _SAVE_RETURN_OFFSET 516
#define _SEND 517
#define _SEND_GEN_FRAME 518
#define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
#define _SET_ADD SET_ADD
#define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
#define _SET_UPDATE SET_UPDATE
#define _START_EXECUTOR 515
#define _STORE_ATTR 516
#define _STORE_ATTR_INSTANCE_VALUE 517
#define _STORE_ATTR_SLOT 518
#define _STORE_ATTR_WITH_HINT 519
#define _START_EXECUTOR 519
#define _STORE_ATTR 520
#define _STORE_ATTR_INSTANCE_VALUE 521
#define _STORE_ATTR_SLOT 522
#define _STORE_ATTR_WITH_HINT 523
#define _STORE_DEREF STORE_DEREF
#define _STORE_FAST 520
#define _STORE_FAST_0 521
#define _STORE_FAST_1 522
#define _STORE_FAST_2 523
#define _STORE_FAST_3 524
#define _STORE_FAST_4 525
#define _STORE_FAST_5 526
#define _STORE_FAST_6 527
#define _STORE_FAST_7 528
#define _STORE_FAST 524
#define _STORE_FAST_0 525
#define _STORE_FAST_1 526
#define _STORE_FAST_2 527
#define _STORE_FAST_3 528
#define _STORE_FAST_4 529
#define _STORE_FAST_5 530
#define _STORE_FAST_6 531
#define _STORE_FAST_7 532
#define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
#define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
#define _STORE_GLOBAL STORE_GLOBAL
#define _STORE_NAME STORE_NAME
#define _STORE_SLICE 529
#define _STORE_SUBSCR 530
#define _STORE_SUBSCR_DICT 531
#define _STORE_SUBSCR_LIST_INT 532
#define _SWAP 533
#define _SWAP_2 534
#define _SWAP_3 535
#define _TIER2_RESUME_CHECK 536
#define _TO_BOOL 537
#define _STORE_SLICE 533
#define _STORE_SUBSCR 534
#define _STORE_SUBSCR_DICT 535
#define _STORE_SUBSCR_LIST_INT 536
#define _SWAP 537
#define _SWAP_2 538
#define _SWAP_3 539
#define _TIER2_RESUME_CHECK 540
#define _TO_BOOL 541
#define _TO_BOOL_BOOL TO_BOOL_BOOL
#define _TO_BOOL_INT TO_BOOL_INT
#define _TO_BOOL_LIST 538
#define _TO_BOOL_LIST 542
#define _TO_BOOL_NONE TO_BOOL_NONE
#define _TO_BOOL_STR 539
#define _TO_BOOL_STR 543
#define _UNARY_INVERT UNARY_INVERT
#define _UNARY_NEGATIVE UNARY_NEGATIVE
#define _UNARY_NOT UNARY_NOT
#define _UNPACK_EX UNPACK_EX
#define _UNPACK_SEQUENCE 540
#define _UNPACK_SEQUENCE_LIST 541
#define _UNPACK_SEQUENCE_TUPLE 542
#define _UNPACK_SEQUENCE_TWO_TUPLE 543
#define _UNPACK_SEQUENCE 544
#define _UNPACK_SEQUENCE_LIST 545
#define _UNPACK_SEQUENCE_TUPLE 546
#define _UNPACK_SEQUENCE_TWO_TUPLE 547
#define _WITH_EXCEPT_START WITH_EXCEPT_START
#define _YIELD_VALUE YIELD_VALUE
#define MAX_UOP_ID 543
#define MAX_UOP_ID 547
#ifdef __cplusplus
}

View file

@ -64,6 +64,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG,
[_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG,
[_POP_TOP] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
[_POP_TOP_NOP] = 0,
[_POP_TOP_INT] = 0,
[_POP_TOP_FLOAT] = 0,
[_POP_TOP_UNICODE] = 0,
[_POP_TWO] = HAS_ESCAPES_FLAG,
[_PUSH_NULL] = HAS_PURE_FLAG,
[_END_FOR] = HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG,
@ -593,8 +597,12 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
[_POP_EXCEPT] = "_POP_EXCEPT",
[_POP_ITER] = "_POP_ITER",
[_POP_TOP] = "_POP_TOP",
[_POP_TOP_FLOAT] = "_POP_TOP_FLOAT",
[_POP_TOP_INT] = "_POP_TOP_INT",
[_POP_TOP_LOAD_CONST_INLINE] = "_POP_TOP_LOAD_CONST_INLINE",
[_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW",
[_POP_TOP_NOP] = "_POP_TOP_NOP",
[_POP_TOP_UNICODE] = "_POP_TOP_UNICODE",
[_POP_TWO] = "_POP_TWO",
[_POP_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_TWO_LOAD_CONST_INLINE_BORROW",
[_PUSH_EXC_INFO] = "_PUSH_EXC_INFO",
@ -749,6 +757,14 @@ int _PyUop_num_popped(int opcode, int oparg)
return 2;
case _POP_TOP:
return 1;
case _POP_TOP_NOP:
return 1;
case _POP_TOP_INT:
return 1;
case _POP_TOP_FLOAT:
return 1;
case _POP_TOP_UNICODE:
return 1;
case _POP_TWO:
return 2;
case _PUSH_NULL:

View file

@ -2392,6 +2392,46 @@ class TestUopsOptimization(unittest.TestCase):
assert ex is not None
"""))
def test_pop_top_specialize_none(self):
def testfunc(n):
for _ in range(n):
global_identity(None)
testfunc(TIER2_THRESHOLD)
ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_POP_TOP_NOP", uops)
def test_pop_top_specialize_int(self):
def testfunc(n):
for _ in range(n):
global_identity(100000)
testfunc(TIER2_THRESHOLD)
ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_POP_TOP_INT", uops)
def test_pop_top_specialize_float(self):
def testfunc(n):
for _ in range(n):
global_identity(1e6)
testfunc(TIER2_THRESHOLD)
ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_POP_TOP_FLOAT", uops)
def global_identity(x):
return x

View file

@ -0,0 +1 @@
Specialize :opcode:`POP_TOP` in the JIT compiler by specializing for reference lifetime and type. This will also enable easier top of stack caching in the JIT compiler.

View file

@ -344,6 +344,27 @@ dummy_func(
PyStackRef_XCLOSE(value);
}
op(_POP_TOP_NOP, (value --)) {
assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) ||
_Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value))));
DEAD(value);
}
op(_POP_TOP_INT, (value --)) {
assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value)));
PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc);
}
op(_POP_TOP_FLOAT, (value --)) {
assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value)));
PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc);
}
op(_POP_TOP_UNICODE, (value --)) {
assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value)));
PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc);
}
tier2 op(_POP_TWO, (nos, tos --)) {
PyStackRef_CLOSE(tos);
PyStackRef_CLOSE(nos);

View file

@ -539,6 +539,46 @@
break;
}
case _POP_TOP_NOP: {
_PyStackRef value;
value = stack_pointer[-1];
assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) ||
_Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value))));
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _POP_TOP_INT: {
_PyStackRef value;
value = stack_pointer[-1];
assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value)));
PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc);
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _POP_TOP_FLOAT: {
_PyStackRef value;
value = stack_pointer[-1];
assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value)));
PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc);
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _POP_TOP_UNICODE: {
_PyStackRef value;
value = stack_pointer[-1];
assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value)));
PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc);
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _POP_TWO: {
_PyStackRef tos;
_PyStackRef nos;

View file

@ -345,7 +345,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
#define sym_new_tuple _Py_uop_sym_new_tuple
#define sym_tuple_getitem _Py_uop_sym_tuple_getitem
#define sym_tuple_length _Py_uop_sym_tuple_length
#define sym_is_immortal _Py_uop_sym_is_immortal
#define sym_is_immortal _Py_uop_symbol_is_immortal
#define sym_is_compact_int _Py_uop_sym_is_compact_int
#define sym_new_compact_int _Py_uop_sym_new_compact_int
#define sym_new_truthiness _Py_uop_sym_new_truthiness

View file

@ -34,7 +34,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
#define sym_new_tuple _Py_uop_sym_new_tuple
#define sym_tuple_getitem _Py_uop_sym_tuple_getitem
#define sym_tuple_length _Py_uop_sym_tuple_length
#define sym_is_immortal _Py_uop_sym_is_immortal
#define sym_is_immortal _Py_uop_symbol_is_immortal
#define sym_new_compact_int _Py_uop_sym_new_compact_int
#define sym_is_compact_int _Py_uop_sym_is_compact_int
#define sym_new_truthiness _Py_uop_sym_new_truthiness
@ -534,7 +534,7 @@ dummy_func(void) {
}
op(_LOAD_CONST_INLINE, (ptr/4 -- value)) {
value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
value = sym_new_const(ctx, ptr);
}
op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) {
@ -542,7 +542,7 @@ dummy_func(void) {
}
op(_POP_TOP_LOAD_CONST_INLINE, (ptr/4, pop -- value)) {
value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
value = sym_new_const(ctx, ptr);
}
op(_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) {
@ -561,6 +561,24 @@ dummy_func(void) {
value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
}
op(_POP_TOP, (value -- )) {
PyTypeObject *typ = sym_get_type(value);
if (PyJitRef_IsBorrowed(value) ||
sym_is_immortal(PyJitRef_Unwrap(value)) ||
sym_is_null(value)) {
REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0);
}
else if (typ == &PyLong_Type) {
REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0);
}
else if (typ == &PyFloat_Type) {
REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0);
}
else if (typ == &PyUnicode_Type) {
REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0);
}
}
op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) {
assert(oparg > 0);
top = bottom;
@ -803,7 +821,9 @@ dummy_func(void) {
}
op(_RETURN_VALUE, (retval -- res)) {
JitOptRef temp = retval;
// We wrap and unwrap the value to mimic PyStackRef_MakeHeapSafe
// in bytecodes.c
JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval));
DEAD(retval);
SAVE_STACK();
ctx->frame->stack_pointer = stack_pointer;

View file

@ -100,6 +100,47 @@
}
case _POP_TOP: {
JitOptRef value;
value = stack_pointer[-1];
PyTypeObject *typ = sym_get_type(value);
if (PyJitRef_IsBorrowed(value) ||
sym_is_immortal(PyJitRef_Unwrap(value)) ||
sym_is_null(value)) {
REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0);
}
else if (typ == &PyLong_Type) {
REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0);
}
else if (typ == &PyFloat_Type) {
REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0);
}
else if (typ == &PyUnicode_Type) {
REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0);
}
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _POP_TOP_NOP: {
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _POP_TOP_INT: {
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _POP_TOP_FLOAT: {
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _POP_TOP_UNICODE: {
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
break;
@ -784,7 +825,7 @@
JitOptRef retval;
JitOptRef res;
retval = stack_pointer[-1];
JitOptRef temp = retval;
JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval));
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
ctx->frame->stack_pointer = stack_pointer;
@ -2660,7 +2701,7 @@
case _LOAD_CONST_INLINE: {
JitOptRef value;
PyObject *ptr = (PyObject *)this_instr->operand0;
value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
value = sym_new_const(ctx, ptr);
stack_pointer[0] = value;
stack_pointer += 1;
assert(WITHIN_STACK_BOUNDS());
@ -2670,7 +2711,7 @@
case _POP_TOP_LOAD_CONST_INLINE: {
JitOptRef value;
PyObject *ptr = (PyObject *)this_instr->operand0;
value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
value = sym_new_const(ctx, ptr);
stack_pointer[-1] = value;
break;
}

View file

@ -668,9 +668,6 @@ _Py_uop_symbol_is_immortal(JitOptSymbol *sym)
if (sym->tag == JIT_SYM_KNOWN_CLASS_TAG) {
return sym->cls.type == &PyBool_Type;
}
if (sym->tag == JIT_SYM_TRUTHINESS_TAG) {
return true;
}
return false;
}

View file

@ -596,6 +596,7 @@ NON_ESCAPING_FUNCTIONS = (
"PyStackRef_IsNull",
"PyStackRef_MakeHeapSafe",
"PyStackRef_None",
"PyStackRef_RefcountOnObject",
"PyStackRef_TYPE",
"PyStackRef_True",
"PyTuple_GET_ITEM",

View file

@ -242,14 +242,10 @@ def generate_expansion_table(analysis: Analysis, out: CWriter) -> None:
assert name2 in analysis.instructions, f"{name2} doesn't match any instr"
instr1 = analysis.instructions[name1]
instr2 = analysis.instructions[name2]
assert (
len(instr1.parts) == 1
), f"{name1} is not a good superinstruction part"
assert (
len(instr2.parts) == 1
), f"{name2} is not a good superinstruction part"
expansions.append((instr1.parts[0].name, "OPARG_TOP", 0))
expansions.append((instr2.parts[0].name, "OPARG_BOTTOM", 0))
for part in instr1.parts:
expansions.append((part.name, "OPARG_TOP", 0))
for part in instr2.parts:
expansions.append((part.name, "OPARG_BOTTOM", 0))
elif not is_viable_expansion(inst):
continue
else: