GH-100923: Embed jump mask in COMPARE_OP oparg (GH-100924)

This commit is contained in:
Mark Shannon 2023-01-11 20:40:43 +00:00 committed by GitHub
parent 61f12b8ff7
commit 6e4e14d98f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 200 additions and 177 deletions

View file

@ -88,7 +88,7 @@ static PyObject *aiter, *awaitable, *iterable, *w, *exc_value, *bc;
static PyObject *orig, *excs, *update, *b, *fromlist, *level, *from;
static size_t jump;
// Dummy variables for cache effects
static uint16_t when_to_jump_mask, invert, counter, index, hint;
static uint16_t invert, counter, index, hint;
static uint32_t type_version;
// Dummy opcode names for 'op' opcodes
#define _COMPARE_OP_FLOAT 1003
@ -1836,7 +1836,7 @@ dummy_func(
_COMPARE_OP_STR,
};
inst(COMPARE_OP, (unused/2, left, right -- res)) {
inst(COMPARE_OP, (unused/1, left, right -- res)) {
_PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr;
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
assert(cframe.use_tracing == 0);
@ -1846,15 +1846,15 @@ dummy_func(
}
STAT_INC(COMPARE_OP, deferred);
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
assert(oparg <= Py_GE);
res = PyObject_RichCompare(left, right, oparg);
assert((oparg >> 4) <= Py_GE);
res = PyObject_RichCompare(left, right, oparg>>4);
Py_DECREF(left);
Py_DECREF(right);
ERROR_IF(res == NULL, error);
}
// The result is an int disguised as an object pointer.
op(_COMPARE_OP_FLOAT, (unused/1, when_to_jump_mask/1, left, right -- jump: size_t)) {
op(_COMPARE_OP_FLOAT, (unused/1, left, right -- jump: size_t)) {
assert(cframe.use_tracing == 0);
// Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false)
DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP);
@ -1862,11 +1862,11 @@ dummy_func(
STAT_INC(COMPARE_OP, hit);
double dleft = PyFloat_AS_DOUBLE(left);
double dright = PyFloat_AS_DOUBLE(right);
// 1 if NaN, 2 if <, 4 if >, 8 if ==; this matches when_to_jump_mask
int sign_ish = 1 << (2 * (dleft >= dright) + (dleft <= dright));
// 1 if NaN, 2 if <, 4 if >, 8 if ==; this matches low four bits of the oparg
int sign_ish = COMPARISON_BIT(dleft, dright);
_Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc);
_Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc);
jump = sign_ish & when_to_jump_mask;
jump = sign_ish & oparg;
}
// The input is an int disguised as an object pointer!
op(_JUMP_IF, (jump: size_t --)) {
@ -1879,7 +1879,7 @@ dummy_func(
super(COMPARE_OP_FLOAT_JUMP) = _COMPARE_OP_FLOAT + _JUMP_IF;
// Similar to COMPARE_OP_FLOAT
op(_COMPARE_OP_INT, (unused/1, when_to_jump_mask/1, left, right -- jump: size_t)) {
op(_COMPARE_OP_INT, (unused/1, left, right -- jump: size_t)) {
assert(cframe.use_tracing == 0);
// Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false)
DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP);
@ -1890,29 +1890,31 @@ dummy_func(
assert(Py_ABS(Py_SIZE(left)) <= 1 && Py_ABS(Py_SIZE(right)) <= 1);
Py_ssize_t ileft = Py_SIZE(left) * ((PyLongObject *)left)->ob_digit[0];
Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->ob_digit[0];
// 2 if <, 4 if >, 8 if ==; this matches when_to_jump_mask
int sign_ish = 1 << (2 * (ileft >= iright) + (ileft <= iright));
// 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg
int sign_ish = COMPARISON_BIT(ileft, iright);
_Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free);
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);
jump = sign_ish & when_to_jump_mask;
jump = sign_ish & oparg;
}
super(COMPARE_OP_INT_JUMP) = _COMPARE_OP_INT + _JUMP_IF;
// Similar to COMPARE_OP_FLOAT, but for ==, != only
op(_COMPARE_OP_STR, (unused/1, invert/1, left, right -- jump: size_t)) {
op(_COMPARE_OP_STR, (unused/1, left, right -- jump: size_t)) {
assert(cframe.use_tracing == 0);
// Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false)
DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP);
DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP);
STAT_INC(COMPARE_OP, hit);
int res = _PyUnicode_Equal(left, right);
assert(oparg == Py_EQ || oparg == Py_NE);
assert((oparg >>4) == Py_EQ || (oparg >>4) == Py_NE);
_Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc);
_Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc);
assert(res == 0 || res == 1);
assert(invert == 0 || invert == 1);
jump = res ^ invert;
assert((oparg & 0xf) == COMPARISON_NOT_EQUALS || (oparg & 0xf) == COMPARISON_EQUALS);
assert(COMPARISON_NOT_EQUALS + 1 == COMPARISON_EQUALS);
jump = (res + COMPARISON_NOT_EQUALS) & oparg;
}
super(COMPARE_OP_STR_JUMP) = _COMPARE_OP_STR + _JUMP_IF;
inst(IS_OP, (left, right -- b)) {

View file

@ -2887,7 +2887,9 @@ static int compiler_addcompare(struct compiler *c, location loc,
default:
Py_UNREACHABLE();
}
ADDOP_I(c, loc, COMPARE_OP, cmp);
/* cmp goes in top bits of the oparg, while the low bits are used by quickened
* versions of this opcode to store the comparison mask. */
ADDOP_I(c, loc, COMPARE_OP, cmp << 4);
return SUCCESS;
}

View file

@ -2090,14 +2090,14 @@
}
STAT_INC(COMPARE_OP, deferred);
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
assert(oparg <= Py_GE);
res = PyObject_RichCompare(left, right, oparg);
assert((oparg >> 4) <= Py_GE);
res = PyObject_RichCompare(left, right, oparg>>4);
Py_DECREF(left);
Py_DECREF(right);
if (res == NULL) goto pop_2_error;
STACK_SHRINK(1);
POKE(1, res);
JUMPBY(2);
JUMPBY(1);
DISPATCH();
}
@ -2108,7 +2108,6 @@
PyObject *right = _tmp_1;
PyObject *left = _tmp_2;
size_t jump;
uint16_t when_to_jump_mask = read_u16(&next_instr[1].cache);
assert(cframe.use_tracing == 0);
// Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false)
DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP);
@ -2116,14 +2115,14 @@
STAT_INC(COMPARE_OP, hit);
double dleft = PyFloat_AS_DOUBLE(left);
double dright = PyFloat_AS_DOUBLE(right);
// 1 if NaN, 2 if <, 4 if >, 8 if ==; this matches when_to_jump_mask
int sign_ish = 1 << (2 * (dleft >= dright) + (dleft <= dright));
// 1 if NaN, 2 if <, 4 if >, 8 if ==; this matches low four bits of the oparg
int sign_ish = COMPARISON_BIT(dleft, dright);
_Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc);
_Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc);
jump = sign_ish & when_to_jump_mask;
jump = sign_ish & oparg;
_tmp_2 = (PyObject *)jump;
}
JUMPBY(2);
JUMPBY(1);
NEXTOPARG();
JUMPBY(1);
{
@ -2144,7 +2143,6 @@
PyObject *right = _tmp_1;
PyObject *left = _tmp_2;
size_t jump;
uint16_t when_to_jump_mask = read_u16(&next_instr[1].cache);
assert(cframe.use_tracing == 0);
// Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false)
DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP);
@ -2155,14 +2153,14 @@
assert(Py_ABS(Py_SIZE(left)) <= 1 && Py_ABS(Py_SIZE(right)) <= 1);
Py_ssize_t ileft = Py_SIZE(left) * ((PyLongObject *)left)->ob_digit[0];
Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->ob_digit[0];
// 2 if <, 4 if >, 8 if ==; this matches when_to_jump_mask
int sign_ish = 1 << (2 * (ileft >= iright) + (ileft <= iright));
// 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg
int sign_ish = COMPARISON_BIT(ileft, iright);
_Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free);
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);
jump = sign_ish & when_to_jump_mask;
jump = sign_ish & oparg;
_tmp_2 = (PyObject *)jump;
}
JUMPBY(2);
JUMPBY(1);
NEXTOPARG();
JUMPBY(1);
{
@ -2183,22 +2181,22 @@
PyObject *right = _tmp_1;
PyObject *left = _tmp_2;
size_t jump;
uint16_t invert = read_u16(&next_instr[1].cache);
assert(cframe.use_tracing == 0);
// Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false)
DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP);
DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP);
STAT_INC(COMPARE_OP, hit);
int res = _PyUnicode_Equal(left, right);
assert(oparg == Py_EQ || oparg == Py_NE);
assert((oparg >>4) == Py_EQ || (oparg >>4) == Py_NE);
_Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc);
_Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc);
assert(res == 0 || res == 1);
assert(invert == 0 || invert == 1);
jump = res ^ invert;
assert((oparg & 0xf) == COMPARISON_NOT_EQUALS || (oparg & 0xf) == COMPARISON_EQUALS);
assert(COMPARISON_NOT_EQUALS + 1 == COMPARISON_EQUALS);
jump = (res + COMPARISON_NOT_EQUALS) & oparg;
_tmp_2 = (PyObject *)jump;
}
JUMPBY(2);
JUMPBY(1);
NEXTOPARG();
JUMPBY(1);
{

View file

@ -2,7 +2,7 @@
// from Python/bytecodes.c
// Do not edit!
enum Direction { DIR_NONE, DIR_READ, DIR_WRITE };
enum InstructionFormat { INSTR_FMT_IB, INSTR_FMT_IBC, INSTR_FMT_IBC0, INSTR_FMT_IBC000, INSTR_FMT_IBC0IB, INSTR_FMT_IBIB };
enum InstructionFormat { INSTR_FMT_IB, INSTR_FMT_IBC, INSTR_FMT_IBC000, INSTR_FMT_IBCIB, INSTR_FMT_IBIB };
static const struct {
short n_popped;
short n_pushed;
@ -112,10 +112,10 @@ static const struct {
[STORE_ATTR_INSTANCE_VALUE] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC000 },
[STORE_ATTR_WITH_HINT] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC000 },
[STORE_ATTR_SLOT] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC000 },
[COMPARE_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC0 },
[COMPARE_OP_FLOAT_JUMP] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC0IB },
[COMPARE_OP_INT_JUMP] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC0IB },
[COMPARE_OP_STR_JUMP] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC0IB },
[COMPARE_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC },
[COMPARE_OP_FLOAT_JUMP] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBCIB },
[COMPARE_OP_INT_JUMP] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBCIB },
[COMPARE_OP_STR_JUMP] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBCIB },
[IS_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
[CONTAINS_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
[CHECK_EG_MATCH] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },

View file

@ -262,18 +262,27 @@ do { \
#define SPECIALIZATION_FAIL(opcode, kind) ((void)0)
#endif
static int compare_masks[] = {
[Py_LT] = COMPARISON_LESS_THAN,
[Py_LE] = COMPARISON_LESS_THAN | COMPARISON_EQUALS,
[Py_EQ] = COMPARISON_EQUALS,
[Py_NE] = COMPARISON_NOT_EQUALS,
[Py_GT] = COMPARISON_GREATER_THAN,
[Py_GE] = COMPARISON_GREATER_THAN | COMPARISON_EQUALS,
};
// Initialize warmup counters and insert superinstructions. This cannot fail.
void
_PyCode_Quicken(PyCodeObject *code)
{
int previous_opcode = 0;
int opcode = 0;
_Py_CODEUNIT *instructions = _PyCode_CODE(code);
for (int i = 0; i < Py_SIZE(code); i++) {
int opcode = _PyOpcode_Deopt[_Py_OPCODE(instructions[i])];
int previous_opcode = opcode;
opcode = _PyOpcode_Deopt[_Py_OPCODE(instructions[i])];
int caches = _PyOpcode_Caches[opcode];
if (caches) {
instructions[i + 1].cache = adaptive_counter_warmup();
previous_opcode = 0;
i += caches;
continue;
}
@ -293,8 +302,19 @@ _PyCode_Quicken(PyCodeObject *code)
case STORE_FAST << 8 | STORE_FAST:
instructions[i - 1].opcode = STORE_FAST__STORE_FAST;
break;
case COMPARE_OP << 8 | POP_JUMP_IF_TRUE:
case COMPARE_OP << 8 | POP_JUMP_IF_FALSE:
{
int oparg = instructions[i - 1 - INLINE_CACHE_ENTRIES_COMPARE_OP].oparg;
assert((oparg >> 4) <= Py_GE);
int mask = compare_masks[oparg >> 4];
if (opcode == POP_JUMP_IF_FALSE) {
mask = mask ^ 0xf;
}
instructions[i - 1 - INLINE_CACHE_ENTRIES_COMPARE_OP].oparg = (oparg & 0xf0) | mask;
break;
}
}
previous_opcode = opcode;
}
}
@ -1977,20 +1997,6 @@ compare_op_fail_kind(PyObject *lhs, PyObject *rhs)
}
#endif
static int compare_masks[] = {
// 1-bit: jump if unordered
// 2-bit: jump if less
// 4-bit: jump if greater
// 8-bit: jump if equal
[Py_LT] = 0 | 2 | 0 | 0,
[Py_LE] = 0 | 2 | 0 | 8,
[Py_EQ] = 0 | 0 | 0 | 8,
[Py_NE] = 1 | 2 | 4 | 0,
[Py_GT] = 0 | 0 | 4 | 0,
[Py_GE] = 0 | 0 | 4 | 8,
};
void
_Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
int oparg)
@ -2006,24 +2012,17 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_COMPARE_OP_NOT_FOLLOWED_BY_COND_JUMP);
goto failure;
}
assert(oparg <= Py_GE);
int when_to_jump_mask = compare_masks[oparg];
if (next_opcode == POP_JUMP_IF_FALSE) {
when_to_jump_mask = (1 | 2 | 4 | 8) & ~when_to_jump_mask;
}
if (Py_TYPE(lhs) != Py_TYPE(rhs)) {
SPECIALIZATION_FAIL(COMPARE_OP, compare_op_fail_kind(lhs, rhs));
goto failure;
}
if (PyFloat_CheckExact(lhs)) {
_py_set_opcode(instr, COMPARE_OP_FLOAT_JUMP);
cache->mask = when_to_jump_mask;
goto success;
}
if (PyLong_CheckExact(lhs)) {
if (Py_ABS(Py_SIZE(lhs)) <= 1 && Py_ABS(Py_SIZE(rhs)) <= 1) {
_py_set_opcode(instr, COMPARE_OP_INT_JUMP);
cache->mask = when_to_jump_mask;
goto success;
}
else {
@ -2032,13 +2031,13 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
}
}
if (PyUnicode_CheckExact(lhs)) {
if (oparg != Py_EQ && oparg != Py_NE) {
int cmp = oparg >> 4;
if (cmp != Py_EQ && cmp != Py_NE) {
SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_COMPARE_OP_STRING);
goto failure;
}
else {
_py_set_opcode(instr, COMPARE_OP_STR_JUMP);
cache->mask = (when_to_jump_mask & 8) == 0;
goto success;
}
}