bpo-46841: Use *inline* caching for BINARY_OP (GH-31543)

This commit is contained in:
Brandt Bucher 2022-02-25 04:11:34 -08:00 committed by GitHub
parent 18b5dd68c6
commit 0f41aac109
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 429 additions and 351 deletions

View file

@ -65,7 +65,6 @@ static uint8_t cache_requirements[256] = {
[CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
[PRECALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
[STORE_ATTR] = 1, // _PyAdaptiveEntry
[BINARY_OP] = 1, // _PyAdaptiveEntry
[COMPARE_OP] = 1, /* _PyAdaptiveEntry */
[UNPACK_SEQUENCE] = 1, // _PyAdaptiveEntry
};
@ -385,29 +384,34 @@ optimize(SpecializedCacheOrInstruction *quickened, int len)
int opcode = _Py_OPCODE(instructions[i]);
int oparg = _Py_OPARG(instructions[i]);
uint8_t adaptive_opcode = adaptive_opcodes[opcode];
if (adaptive_opcode && previous_opcode != EXTENDED_ARG) {
int new_oparg = oparg_from_instruction_and_update_offset(
i, opcode, oparg, &cache_offset
);
if (new_oparg < 0) {
/* Not possible to allocate a cache for this instruction */
previous_opcode = opcode;
continue;
if (adaptive_opcode) {
if (_PyOpcode_InlineCacheEntries[opcode]) {
instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, oparg);
}
previous_opcode = adaptive_opcode;
int entries_needed = cache_requirements[opcode];
if (entries_needed) {
/* Initialize the adpative cache entry */
int cache0_offset = cache_offset-entries_needed;
SpecializedCacheEntry *cache =
_GetSpecializedCacheEntry(instructions, cache0_offset);
cache->adaptive.original_oparg = oparg;
cache->adaptive.counter = 0;
} else {
// oparg is the adaptive cache counter
new_oparg = 0;
else if (previous_opcode != EXTENDED_ARG) {
int new_oparg = oparg_from_instruction_and_update_offset(
i, opcode, oparg, &cache_offset
);
if (new_oparg < 0) {
/* Not possible to allocate a cache for this instruction */
previous_opcode = opcode;
continue;
}
previous_opcode = adaptive_opcode;
int entries_needed = cache_requirements[opcode];
if (entries_needed) {
/* Initialize the adpative cache entry */
int cache0_offset = cache_offset-entries_needed;
SpecializedCacheEntry *cache =
_GetSpecializedCacheEntry(instructions, cache0_offset);
cache->adaptive.original_oparg = oparg;
cache->adaptive.counter = 0;
} else {
// oparg is the adaptive cache counter
new_oparg = 0;
}
instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, new_oparg);
}
instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, new_oparg);
}
else {
/* Super instructions don't use the cache,
@ -1922,10 +1926,12 @@ binary_op_fail_kind(int oparg, PyObject *lhs, PyObject *rhs)
void
_Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
SpecializedCacheEntry *cache)
int oparg)
{
_PyAdaptiveEntry *adaptive = &cache->adaptive;
switch (adaptive->original_oparg) {
assert(_PyOpcode_InlineCacheEntries[BINARY_OP] ==
INLINE_CACHE_ENTRIES_BINARY_OP);
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)(instr + 1);
switch (oparg) {
case NB_ADD:
case NB_INPLACE_ADD:
if (!Py_IS_TYPE(lhs, Py_TYPE(rhs))) {
@ -1934,20 +1940,18 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
if (PyUnicode_CheckExact(lhs)) {
if (_Py_OPCODE(instr[1]) == STORE_FAST && Py_REFCNT(lhs) == 2) {
*instr = _Py_MAKECODEUNIT(BINARY_OP_INPLACE_ADD_UNICODE,
_Py_OPARG(*instr));
oparg);
goto success;
}
*instr = _Py_MAKECODEUNIT(BINARY_OP_ADD_UNICODE,
_Py_OPARG(*instr));
*instr = _Py_MAKECODEUNIT(BINARY_OP_ADD_UNICODE, oparg);
goto success;
}
if (PyLong_CheckExact(lhs)) {
*instr = _Py_MAKECODEUNIT(BINARY_OP_ADD_INT, _Py_OPARG(*instr));
*instr = _Py_MAKECODEUNIT(BINARY_OP_ADD_INT, oparg);
goto success;
}
if (PyFloat_CheckExact(lhs)) {
*instr = _Py_MAKECODEUNIT(BINARY_OP_ADD_FLOAT,
_Py_OPARG(*instr));
*instr = _Py_MAKECODEUNIT(BINARY_OP_ADD_FLOAT, oparg);
goto success;
}
break;
@ -1957,13 +1961,11 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
break;
}
if (PyLong_CheckExact(lhs)) {
*instr = _Py_MAKECODEUNIT(BINARY_OP_MULTIPLY_INT,
_Py_OPARG(*instr));
*instr = _Py_MAKECODEUNIT(BINARY_OP_MULTIPLY_INT, oparg);
goto success;
}
if (PyFloat_CheckExact(lhs)) {
*instr = _Py_MAKECODEUNIT(BINARY_OP_MULTIPLY_FLOAT,
_Py_OPARG(*instr));
*instr = _Py_MAKECODEUNIT(BINARY_OP_MULTIPLY_FLOAT, oparg);
goto success;
}
break;
@ -1973,13 +1975,11 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
break;
}
if (PyLong_CheckExact(lhs)) {
*instr = _Py_MAKECODEUNIT(BINARY_OP_SUBTRACT_INT,
_Py_OPARG(*instr));
*instr = _Py_MAKECODEUNIT(BINARY_OP_SUBTRACT_INT, oparg);
goto success;
}
if (PyFloat_CheckExact(lhs)) {
*instr = _Py_MAKECODEUNIT(BINARY_OP_SUBTRACT_FLOAT,
_Py_OPARG(*instr));
*instr = _Py_MAKECODEUNIT(BINARY_OP_SUBTRACT_FLOAT, oparg);
goto success;
}
break;
@ -1990,18 +1990,17 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
// back to BINARY_OP (unless we're collecting stats, where it's more
// important to get accurate hit counts for the unadaptive version
// and each of the different failure types):
*instr = _Py_MAKECODEUNIT(BINARY_OP, adaptive->original_oparg);
*instr = _Py_MAKECODEUNIT(BINARY_OP, oparg);
return;
#endif
}
SPECIALIZATION_FAIL(
BINARY_OP, binary_op_fail_kind(adaptive->original_oparg, lhs, rhs));
SPECIALIZATION_FAIL(BINARY_OP, binary_op_fail_kind(oparg, lhs, rhs));
STAT_INC(BINARY_OP, failure);
cache_backoff(adaptive);
cache->counter = ADAPTIVE_CACHE_BACKOFF;
return;
success:
STAT_INC(BINARY_OP, success);
adaptive->counter = initial_counter_value();
cache->counter = initial_counter_value();
}