GH-100982: Break up COMPARE_AND_BRANCH (GH-102801)

This commit is contained in:
Brandt Bucher 2023-03-23 15:25:09 -07:00 committed by GitHub
parent bd063756b3
commit 0444ae2487
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 377 additions and 479 deletions

View file

@ -1687,75 +1687,54 @@ dummy_func(
Py_DECREF(owner);
}
family(compare_op, INLINE_CACHE_ENTRIES_COMPARE_OP) = {
COMPARE_OP,
COMPARE_OP_FLOAT,
COMPARE_OP_INT,
COMPARE_OP_STR,
};
inst(COMPARE_OP, (unused/1, left, right -- res)) {
#if ENABLE_SPECIALIZATION
_PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr;
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
assert(cframe.use_tracing == 0);
next_instr--;
_Py_Specialize_CompareOp(left, right, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(COMPARE_OP, deferred);
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
#endif /* ENABLE_SPECIALIZATION */
assert((oparg >> 4) <= Py_GE);
res = PyObject_RichCompare(left, right, oparg>>4);
DECREF_INPUTS();
ERROR_IF(res == NULL, error);
}
// No cache size here, since this is a family of super-instructions.
family(compare_and_branch) = {
COMPARE_AND_BRANCH,
COMPARE_AND_BRANCH_FLOAT,
COMPARE_AND_BRANCH_INT,
COMPARE_AND_BRANCH_STR,
};
inst(COMPARE_AND_BRANCH, (unused/2, left, right -- )) {
#if ENABLE_SPECIALIZATION
_PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr;
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
assert(cframe.use_tracing == 0);
next_instr--;
_Py_Specialize_CompareAndBranch(left, right, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(COMPARE_AND_BRANCH, deferred);
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
#endif /* ENABLE_SPECIALIZATION */
assert((oparg >> 4) <= Py_GE);
PyObject *cond = PyObject_RichCompare(left, right, oparg>>4);
DECREF_INPUTS();
ERROR_IF(cond == NULL, error);
assert(next_instr[1].op.code == POP_JUMP_IF_FALSE ||
next_instr[1].op.code == POP_JUMP_IF_TRUE);
bool jump_on_true = next_instr[1].op.code == POP_JUMP_IF_TRUE;
int offset = next_instr[1].op.arg;
int err = PyObject_IsTrue(cond);
Py_DECREF(cond);
ERROR_IF(err < 0, error);
if (jump_on_true == (err != 0)) {
JUMPBY(offset);
}
}
inst(COMPARE_AND_BRANCH_FLOAT, (unused/2, left, right -- )) {
inst(COMPARE_OP_FLOAT, (unused/1, left, right -- res)) {
assert(cframe.use_tracing == 0);
DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_AND_BRANCH);
DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_AND_BRANCH);
STAT_INC(COMPARE_AND_BRANCH, hit);
DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP);
DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_OP);
STAT_INC(COMPARE_OP, hit);
double dleft = PyFloat_AS_DOUBLE(left);
double dright = PyFloat_AS_DOUBLE(right);
// 1 if NaN, 2 if <, 4 if >, 8 if ==; this matches low four bits of the oparg
int sign_ish = COMPARISON_BIT(dleft, dright);
_Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc);
_Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc);
if (sign_ish & oparg) {
int offset = next_instr[1].op.arg;
JUMPBY(offset);
}
res = (sign_ish & oparg) ? Py_True : Py_False;
Py_INCREF(res);
}
// Similar to COMPARE_AND_BRANCH_FLOAT
inst(COMPARE_AND_BRANCH_INT, (unused/2, left, right -- )) {
// Similar to COMPARE_OP_FLOAT
inst(COMPARE_OP_INT, (unused/1, left, right -- res)) {
assert(cframe.use_tracing == 0);
DEOPT_IF(!PyLong_CheckExact(left), COMPARE_AND_BRANCH);
DEOPT_IF(!PyLong_CheckExact(right), COMPARE_AND_BRANCH);
DEOPT_IF(!_PyLong_IsCompact((PyLongObject *)left), COMPARE_AND_BRANCH);
DEOPT_IF(!_PyLong_IsCompact((PyLongObject *)right), COMPARE_AND_BRANCH);
STAT_INC(COMPARE_AND_BRANCH, hit);
DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP);
DEOPT_IF(!PyLong_CheckExact(right), COMPARE_OP);
DEOPT_IF(!_PyLong_IsCompact((PyLongObject *)left), COMPARE_OP);
DEOPT_IF(!_PyLong_IsCompact((PyLongObject *)right), COMPARE_OP);
STAT_INC(COMPARE_OP, hit);
assert(_PyLong_DigitCount((PyLongObject *)left) <= 1 &&
_PyLong_DigitCount((PyLongObject *)right) <= 1);
Py_ssize_t ileft = _PyLong_CompactValue((PyLongObject *)left);
@ -1764,29 +1743,25 @@ dummy_func(
int sign_ish = COMPARISON_BIT(ileft, iright);
_Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free);
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);
if (sign_ish & oparg) {
int offset = next_instr[1].op.arg;
JUMPBY(offset);
}
res = (sign_ish & oparg) ? Py_True : Py_False;
Py_INCREF(res);
}
// Similar to COMPARE_AND_BRANCH_FLOAT, but for ==, != only
inst(COMPARE_AND_BRANCH_STR, (unused/2, left, right -- )) {
// Similar to COMPARE_OP_FLOAT, but for ==, != only
inst(COMPARE_OP_STR, (unused/1, left, right -- res)) {
assert(cframe.use_tracing == 0);
DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_AND_BRANCH);
DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_AND_BRANCH);
STAT_INC(COMPARE_AND_BRANCH, hit);
int res = _PyUnicode_Equal(left, right);
DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP);
DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP);
STAT_INC(COMPARE_OP, hit);
int eq = _PyUnicode_Equal(left, right);
assert((oparg >>4) == Py_EQ || (oparg >>4) == Py_NE);
_Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc);
_Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc);
assert(res == 0 || res == 1);
assert(eq == 0 || eq == 1);
assert((oparg & 0xf) == COMPARISON_NOT_EQUALS || (oparg & 0xf) == COMPARISON_EQUALS);
assert(COMPARISON_NOT_EQUALS + 1 == COMPARISON_EQUALS);
if ((res + COMPARISON_NOT_EQUALS) & oparg) {
int offset = next_instr[1].op.arg;
JUMPBY(offset);
}
res = ((COMPARISON_NOT_EQUALS + eq) & oparg) ? Py_True : Py_False;
Py_INCREF(res);
}
inst(IS_OP, (left, right -- b)) {

View file

@ -2800,6 +2800,15 @@ check_compare(struct compiler *c, expr_ty e)
return SUCCESS;
}
static const int compare_masks[] = {
[Py_LT] = COMPARISON_LESS_THAN,
[Py_LE] = COMPARISON_LESS_THAN | COMPARISON_EQUALS,
[Py_EQ] = COMPARISON_EQUALS,
[Py_NE] = COMPARISON_NOT_EQUALS,
[Py_GT] = COMPARISON_GREATER_THAN,
[Py_GE] = COMPARISON_GREATER_THAN | COMPARISON_EQUALS,
};
static int compiler_addcompare(struct compiler *c, location loc,
cmpop_ty op)
{
@ -2840,7 +2849,7 @@ static int compiler_addcompare(struct compiler *c, location loc,
}
/* cmp goes in top bits of the oparg, while the low bits are used by quickened
* versions of this opcode to store the comparison mask. */
ADDOP_I(c, loc, COMPARE_OP, cmp << 4);
ADDOP_I(c, loc, COMPARE_OP, (cmp << 4) | compare_masks[cmp]);
return SUCCESS;
}

File diff suppressed because it is too large Load diff

View file

@ -215,13 +215,11 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 2;
case COMPARE_OP:
return 2;
case COMPARE_AND_BRANCH:
case COMPARE_OP_FLOAT:
return 2;
case COMPARE_AND_BRANCH_FLOAT:
case COMPARE_OP_INT:
return 2;
case COMPARE_AND_BRANCH_INT:
return 2;
case COMPARE_AND_BRANCH_STR:
case COMPARE_OP_STR:
return 2;
case IS_OP:
return 2;
@ -563,14 +561,12 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 0;
case COMPARE_OP:
return 1;
case COMPARE_AND_BRANCH:
return 0;
case COMPARE_AND_BRANCH_FLOAT:
return 0;
case COMPARE_AND_BRANCH_INT:
return 0;
case COMPARE_AND_BRANCH_STR:
return 0;
case COMPARE_OP_FLOAT:
return 1;
case COMPARE_OP_INT:
return 1;
case COMPARE_OP_STR:
return 1;
case IS_OP:
return 1;
case CONTAINS_OP:
@ -699,7 +695,7 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
}
#endif
enum InstructionFormat { INSTR_FMT_IB, INSTR_FMT_IBC, INSTR_FMT_IBC0, INSTR_FMT_IBC000, INSTR_FMT_IBC00000000, INSTR_FMT_IBIB, INSTR_FMT_IX, INSTR_FMT_IXC, INSTR_FMT_IXC000 };
enum InstructionFormat { INSTR_FMT_IB, INSTR_FMT_IBC, INSTR_FMT_IBC000, INSTR_FMT_IBC00000000, INSTR_FMT_IBIB, INSTR_FMT_IX, INSTR_FMT_IXC, INSTR_FMT_IXC000 };
struct opcode_metadata {
bool valid_entry;
enum InstructionFormat instr_format;
@ -812,10 +808,9 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = {
[STORE_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC000 },
[STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC000 },
[COMPARE_OP] = { true, INSTR_FMT_IBC },
[COMPARE_AND_BRANCH] = { true, INSTR_FMT_IBC0 },
[COMPARE_AND_BRANCH_FLOAT] = { true, INSTR_FMT_IBC0 },
[COMPARE_AND_BRANCH_INT] = { true, INSTR_FMT_IBC0 },
[COMPARE_AND_BRANCH_STR] = { true, INSTR_FMT_IBC0 },
[COMPARE_OP_FLOAT] = { true, INSTR_FMT_IBC },
[COMPARE_OP_INT] = { true, INSTR_FMT_IBC },
[COMPARE_OP_STR] = { true, INSTR_FMT_IBC },
[IS_OP] = { true, INSTR_FMT_IB },
[CONTAINS_OP] = { true, INSTR_FMT_IB },
[CHECK_EG_MATCH] = { true, INSTR_FMT_IX },

View file

@ -47,7 +47,7 @@ static void *opcode_targets[256] = {
&&TARGET_CALL_NO_KW_STR_1,
&&TARGET_CALL_NO_KW_TUPLE_1,
&&TARGET_CALL_NO_KW_TYPE_1,
&&TARGET_COMPARE_AND_BRANCH_FLOAT,
&&TARGET_COMPARE_OP_FLOAT,
&&TARGET_WITH_EXCEPT_START,
&&TARGET_GET_AITER,
&&TARGET_GET_ANEXT,
@ -55,8 +55,8 @@ static void *opcode_targets[256] = {
&&TARGET_BEFORE_WITH,
&&TARGET_END_ASYNC_FOR,
&&TARGET_CLEANUP_THROW,
&&TARGET_COMPARE_AND_BRANCH_INT,
&&TARGET_COMPARE_AND_BRANCH_STR,
&&TARGET_COMPARE_OP_INT,
&&TARGET_COMPARE_OP_STR,
&&TARGET_FOR_ITER_LIST,
&&TARGET_FOR_ITER_TUPLE,
&&TARGET_STORE_SUBSCR,
@ -140,9 +140,9 @@ static void *opcode_targets[256] = {
&&TARGET_STORE_DEREF,
&&TARGET_DELETE_DEREF,
&&TARGET_JUMP_BACKWARD,
&&TARGET_COMPARE_AND_BRANCH,
&&TARGET_CALL_FUNCTION_EX,
&&TARGET_STORE_SUBSCR_LIST_INT,
&&TARGET_CALL_FUNCTION_EX,
&&TARGET_UNPACK_SEQUENCE_LIST,
&&TARGET_EXTENDED_ARG,
&&TARGET_LIST_APPEND,
&&TARGET_SET_ADD,
@ -152,15 +152,15 @@ static void *opcode_targets[256] = {
&&TARGET_YIELD_VALUE,
&&TARGET_RESUME,
&&TARGET_MATCH_CLASS,
&&TARGET_UNPACK_SEQUENCE_LIST,
&&TARGET_UNPACK_SEQUENCE_TUPLE,
&&TARGET_UNPACK_SEQUENCE_TWO_TUPLE,
&&TARGET_FORMAT_VALUE,
&&TARGET_BUILD_CONST_KEY_MAP,
&&TARGET_BUILD_STRING,
&&TARGET_UNPACK_SEQUENCE_TWO_TUPLE,
&&TARGET_SEND_GEN,
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&TARGET_LIST_EXTEND,
&&TARGET_SET_UPDATE,
&&TARGET_DICT_MERGE,

View file

@ -264,15 +264,6 @@ do { \
#define SPECIALIZATION_FAIL(opcode, kind) ((void)0)
#endif
static int compare_masks[] = {
[Py_LT] = COMPARISON_LESS_THAN,
[Py_LE] = COMPARISON_LESS_THAN | COMPARISON_EQUALS,
[Py_EQ] = COMPARISON_EQUALS,
[Py_NE] = COMPARISON_NOT_EQUALS,
[Py_GT] = COMPARISON_GREATER_THAN,
[Py_GE] = COMPARISON_GREATER_THAN | COMPARISON_EQUALS,
};
// Initialize warmup counters and insert superinstructions. This cannot fail.
void
_PyCode_Quicken(PyCodeObject *code)
@ -305,19 +296,6 @@ _PyCode_Quicken(PyCodeObject *code)
case STORE_FAST << 8 | STORE_FAST:
instructions[i - 1].op.code = STORE_FAST__STORE_FAST;
break;
case COMPARE_OP << 8 | POP_JUMP_IF_TRUE:
case COMPARE_OP << 8 | POP_JUMP_IF_FALSE:
{
int oparg = instructions[i - 1 - INLINE_CACHE_ENTRIES_COMPARE_OP].op.arg;
assert((oparg >> 4) <= Py_GE);
int mask = compare_masks[oparg >> 4];
if (opcode == POP_JUMP_IF_FALSE) {
mask = mask ^ 0xf;
}
instructions[i - 1 - INLINE_CACHE_ENTRIES_COMPARE_OP].op.code = COMPARE_AND_BRANCH;
instructions[i - 1 - INLINE_CACHE_ENTRIES_COMPARE_OP].op.arg = (oparg & 0xf0) | mask;
break;
}
}
}
#endif /* ENABLE_SPECIALIZATION */
@ -436,19 +414,17 @@ _PyCode_Quicken(PyCodeObject *code)
#define SPEC_FAIL_CALL_OPERATOR_WRAPPER 29
/* COMPARE_OP */
#define SPEC_FAIL_COMPARE_DIFFERENT_TYPES 12
#define SPEC_FAIL_COMPARE_STRING 13
#define SPEC_FAIL_COMPARE_NOT_FOLLOWED_BY_COND_JUMP 14
#define SPEC_FAIL_COMPARE_BIG_INT 15
#define SPEC_FAIL_COMPARE_BYTES 16
#define SPEC_FAIL_COMPARE_TUPLE 17
#define SPEC_FAIL_COMPARE_LIST 18
#define SPEC_FAIL_COMPARE_SET 19
#define SPEC_FAIL_COMPARE_BOOL 20
#define SPEC_FAIL_COMPARE_BASEOBJECT 21
#define SPEC_FAIL_COMPARE_FLOAT_LONG 22
#define SPEC_FAIL_COMPARE_LONG_FLOAT 23
#define SPEC_FAIL_COMPARE_EXTENDED_ARG 24
#define SPEC_FAIL_COMPARE_OP_DIFFERENT_TYPES 12
#define SPEC_FAIL_COMPARE_OP_STRING 13
#define SPEC_FAIL_COMPARE_OP_BIG_INT 14
#define SPEC_FAIL_COMPARE_OP_BYTES 15
#define SPEC_FAIL_COMPARE_OP_TUPLE 16
#define SPEC_FAIL_COMPARE_OP_LIST 17
#define SPEC_FAIL_COMPARE_OP_SET 18
#define SPEC_FAIL_COMPARE_OP_BOOL 19
#define SPEC_FAIL_COMPARE_OP_BASEOBJECT 20
#define SPEC_FAIL_COMPARE_OP_FLOAT_LONG 21
#define SPEC_FAIL_COMPARE_OP_LONG_FLOAT 22
/* FOR_ITER */
#define SPEC_FAIL_FOR_ITER_GENERATOR 10
@ -1958,83 +1934,79 @@ compare_op_fail_kind(PyObject *lhs, PyObject *rhs)
{
if (Py_TYPE(lhs) != Py_TYPE(rhs)) {
if (PyFloat_CheckExact(lhs) && PyLong_CheckExact(rhs)) {
return SPEC_FAIL_COMPARE_FLOAT_LONG;
return SPEC_FAIL_COMPARE_OP_FLOAT_LONG;
}
if (PyLong_CheckExact(lhs) && PyFloat_CheckExact(rhs)) {
return SPEC_FAIL_COMPARE_LONG_FLOAT;
return SPEC_FAIL_COMPARE_OP_LONG_FLOAT;
}
return SPEC_FAIL_COMPARE_DIFFERENT_TYPES;
return SPEC_FAIL_COMPARE_OP_DIFFERENT_TYPES;
}
if (PyBytes_CheckExact(lhs)) {
return SPEC_FAIL_COMPARE_BYTES;
return SPEC_FAIL_COMPARE_OP_BYTES;
}
if (PyTuple_CheckExact(lhs)) {
return SPEC_FAIL_COMPARE_TUPLE;
return SPEC_FAIL_COMPARE_OP_TUPLE;
}
if (PyList_CheckExact(lhs)) {
return SPEC_FAIL_COMPARE_LIST;
return SPEC_FAIL_COMPARE_OP_LIST;
}
if (PySet_CheckExact(lhs) || PyFrozenSet_CheckExact(lhs)) {
return SPEC_FAIL_COMPARE_SET;
return SPEC_FAIL_COMPARE_OP_SET;
}
if (PyBool_Check(lhs)) {
return SPEC_FAIL_COMPARE_BOOL;
return SPEC_FAIL_COMPARE_OP_BOOL;
}
if (Py_TYPE(lhs)->tp_richcompare == PyBaseObject_Type.tp_richcompare) {
return SPEC_FAIL_COMPARE_BASEOBJECT;
return SPEC_FAIL_COMPARE_OP_BASEOBJECT;
}
return SPEC_FAIL_OTHER;
}
#endif
void
_Py_Specialize_CompareAndBranch(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
_Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
int oparg)
{
assert(ENABLE_SPECIALIZATION);
assert(_PyOpcode_Caches[COMPARE_AND_BRANCH] == INLINE_CACHE_ENTRIES_COMPARE_OP);
assert(_PyOpcode_Caches[COMPARE_OP] == INLINE_CACHE_ENTRIES_COMPARE_OP);
_PyCompareOpCache *cache = (_PyCompareOpCache *)(instr + 1);
#ifndef NDEBUG
int next_opcode = instr[INLINE_CACHE_ENTRIES_COMPARE_OP + 1].op.code;
assert(next_opcode == POP_JUMP_IF_FALSE || next_opcode == POP_JUMP_IF_TRUE);
#endif
if (Py_TYPE(lhs) != Py_TYPE(rhs)) {
SPECIALIZATION_FAIL(COMPARE_AND_BRANCH, compare_op_fail_kind(lhs, rhs));
SPECIALIZATION_FAIL(COMPARE_OP, compare_op_fail_kind(lhs, rhs));
goto failure;
}
if (PyFloat_CheckExact(lhs)) {
instr->op.code = COMPARE_AND_BRANCH_FLOAT;
instr->op.code = COMPARE_OP_FLOAT;
goto success;
}
if (PyLong_CheckExact(lhs)) {
if (_PyLong_IsCompact((PyLongObject *)lhs) && _PyLong_IsCompact((PyLongObject *)rhs)) {
instr->op.code = COMPARE_AND_BRANCH_INT;
instr->op.code = COMPARE_OP_INT;
goto success;
}
else {
SPECIALIZATION_FAIL(COMPARE_AND_BRANCH, SPEC_FAIL_COMPARE_BIG_INT);
SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_COMPARE_OP_BIG_INT);
goto failure;
}
}
if (PyUnicode_CheckExact(lhs)) {
int cmp = oparg >> 4;
if (cmp != Py_EQ && cmp != Py_NE) {
SPECIALIZATION_FAIL(COMPARE_AND_BRANCH, SPEC_FAIL_COMPARE_STRING);
SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_COMPARE_OP_STRING);
goto failure;
}
else {
instr->op.code = COMPARE_AND_BRANCH_STR;
instr->op.code = COMPARE_OP_STR;
goto success;
}
}
SPECIALIZATION_FAIL(COMPARE_AND_BRANCH, compare_op_fail_kind(lhs, rhs));
SPECIALIZATION_FAIL(COMPARE_OP, compare_op_fail_kind(lhs, rhs));
failure:
STAT_INC(COMPARE_AND_BRANCH, failure);
instr->op.code = COMPARE_AND_BRANCH;
STAT_INC(COMPARE_OP, failure);
instr->op.code = COMPARE_OP;
cache->counter = adaptive_counter_backoff(cache->counter);
return;
success:
STAT_INC(COMPARE_AND_BRANCH, success);
STAT_INC(COMPARE_OP, success);
cache->counter = adaptive_counter_cooldown();
}