mirror of
https://github.com/python/cpython.git
synced 2025-08-30 21:48:47 +00:00
gh-109039: Branch prediction for Tier 2 interpreter (#109038)
This adds a 16-bit inline cache entry to the conditional branch instructions POP_JUMP_IF_{FALSE,TRUE,NONE,NOT_NONE} and their instrumented variants, which is used to keep track of the branch direction. Each time we encounter these instructions we shift the cache entry left by one and set the bottom bit to whether we jumped. Then when it's time to translate such a branch to Tier 2 uops, we use the bit count from the cache entry to decided whether to continue translating the "didn't jump" branch or the "jumped" branch. The counter is initialized to a pattern of alternating ones and zeros to avoid bias. The .pyc file magic number is updated. There's a new test, some fixes for existing tests, and a few miscellaneous cleanups.
This commit is contained in:
parent
ecd21a629a
commit
bcce5e2718
15 changed files with 339 additions and 181 deletions
|
@ -2292,14 +2292,22 @@ dummy_func(
|
|||
goto resume_frame;
|
||||
}
|
||||
|
||||
inst(POP_JUMP_IF_FALSE, (cond -- )) {
|
||||
inst(POP_JUMP_IF_FALSE, (unused/1, cond -- )) {
|
||||
assert(PyBool_Check(cond));
|
||||
JUMPBY(oparg * Py_IsFalse(cond));
|
||||
int flag = Py_IsFalse(cond);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
next_instr->cache = (next_instr->cache << 1) | flag;
|
||||
#endif
|
||||
JUMPBY(oparg * flag);
|
||||
}
|
||||
|
||||
inst(POP_JUMP_IF_TRUE, (cond -- )) {
|
||||
inst(POP_JUMP_IF_TRUE, (unused/1, cond -- )) {
|
||||
assert(PyBool_Check(cond));
|
||||
JUMPBY(oparg * Py_IsTrue(cond));
|
||||
int flag = Py_IsTrue(cond);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
next_instr->cache = (next_instr->cache << 1) | flag;
|
||||
#endif
|
||||
JUMPBY(oparg * flag);
|
||||
}
|
||||
|
||||
op(IS_NONE, (value -- b)) {
|
||||
|
@ -3751,47 +3759,63 @@ dummy_func(
|
|||
INSTRUMENTED_JUMP(next_instr-1, next_instr+1-oparg, PY_MONITORING_EVENT_JUMP);
|
||||
}
|
||||
|
||||
inst(INSTRUMENTED_POP_JUMP_IF_TRUE, ( -- )) {
|
||||
inst(INSTRUMENTED_POP_JUMP_IF_TRUE, (unused/1 -- )) {
|
||||
PyObject *cond = POP();
|
||||
assert(PyBool_Check(cond));
|
||||
_Py_CODEUNIT *here = next_instr - 1;
|
||||
int offset = Py_IsTrue(cond) * oparg;
|
||||
int flag = Py_IsTrue(cond);
|
||||
int offset = flag * oparg;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
next_instr->cache = (next_instr->cache << 1) | flag;
|
||||
#endif
|
||||
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
}
|
||||
|
||||
inst(INSTRUMENTED_POP_JUMP_IF_FALSE, ( -- )) {
|
||||
inst(INSTRUMENTED_POP_JUMP_IF_FALSE, (unused/1 -- )) {
|
||||
PyObject *cond = POP();
|
||||
assert(PyBool_Check(cond));
|
||||
_Py_CODEUNIT *here = next_instr - 1;
|
||||
int offset = Py_IsFalse(cond) * oparg;
|
||||
int flag = Py_IsFalse(cond);
|
||||
int offset = flag * oparg;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
next_instr->cache = (next_instr->cache << 1) | flag;
|
||||
#endif
|
||||
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
}
|
||||
|
||||
inst(INSTRUMENTED_POP_JUMP_IF_NONE, ( -- )) {
|
||||
inst(INSTRUMENTED_POP_JUMP_IF_NONE, (unused/1 -- )) {
|
||||
PyObject *value = POP();
|
||||
_Py_CODEUNIT *here = next_instr-1;
|
||||
_Py_CODEUNIT *here = next_instr - 1;
|
||||
int flag = Py_IsNone(value);
|
||||
int offset;
|
||||
if (Py_IsNone(value)) {
|
||||
if (flag) {
|
||||
offset = oparg;
|
||||
}
|
||||
else {
|
||||
Py_DECREF(value);
|
||||
offset = 0;
|
||||
}
|
||||
#if ENABLE_SPECIALIZATION
|
||||
next_instr->cache = (next_instr->cache << 1) | flag;
|
||||
#endif
|
||||
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
}
|
||||
|
||||
inst(INSTRUMENTED_POP_JUMP_IF_NOT_NONE, ( -- )) {
|
||||
inst(INSTRUMENTED_POP_JUMP_IF_NOT_NONE, (unused/1 -- )) {
|
||||
PyObject *value = POP();
|
||||
_Py_CODEUNIT *here = next_instr-1;
|
||||
int offset;
|
||||
if (Py_IsNone(value)) {
|
||||
int nflag = Py_IsNone(value);
|
||||
if (nflag) {
|
||||
offset = 0;
|
||||
}
|
||||
else {
|
||||
Py_DECREF(value);
|
||||
offset = oparg;
|
||||
}
|
||||
#if ENABLE_SPECIALIZATION
|
||||
next_instr->cache = (next_instr->cache << 1) | !nflag;
|
||||
#endif
|
||||
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
}
|
||||
|
||||
|
|
58
Python/generated_cases.c.h
generated
58
Python/generated_cases.c.h
generated
|
@ -2996,8 +2996,13 @@
|
|||
PyObject *cond;
|
||||
cond = stack_pointer[-1];
|
||||
assert(PyBool_Check(cond));
|
||||
JUMPBY(oparg * Py_IsFalse(cond));
|
||||
int flag = Py_IsFalse(cond);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
next_instr->cache = (next_instr->cache << 1) | flag;
|
||||
#endif
|
||||
JUMPBY(oparg * flag);
|
||||
STACK_SHRINK(1);
|
||||
next_instr += 1;
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
|
@ -3005,8 +3010,13 @@
|
|||
PyObject *cond;
|
||||
cond = stack_pointer[-1];
|
||||
assert(PyBool_Check(cond));
|
||||
JUMPBY(oparg * Py_IsTrue(cond));
|
||||
int flag = Py_IsTrue(cond);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
next_instr->cache = (next_instr->cache << 1) | flag;
|
||||
#endif
|
||||
JUMPBY(oparg * flag);
|
||||
STACK_SHRINK(1);
|
||||
next_instr += 1;
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
|
@ -3029,9 +3039,14 @@
|
|||
cond = b;
|
||||
{
|
||||
assert(PyBool_Check(cond));
|
||||
JUMPBY(oparg * Py_IsTrue(cond));
|
||||
int flag = Py_IsTrue(cond);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
next_instr->cache = (next_instr->cache << 1) | flag;
|
||||
#endif
|
||||
JUMPBY(oparg * flag);
|
||||
}
|
||||
STACK_SHRINK(1);
|
||||
next_instr += 1;
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
|
@ -3054,9 +3069,14 @@
|
|||
cond = b;
|
||||
{
|
||||
assert(PyBool_Check(cond));
|
||||
JUMPBY(oparg * Py_IsFalse(cond));
|
||||
int flag = Py_IsFalse(cond);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
next_instr->cache = (next_instr->cache << 1) | flag;
|
||||
#endif
|
||||
JUMPBY(oparg * flag);
|
||||
}
|
||||
STACK_SHRINK(1);
|
||||
next_instr += 1;
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
|
@ -4921,8 +4941,13 @@
|
|||
PyObject *cond = POP();
|
||||
assert(PyBool_Check(cond));
|
||||
_Py_CODEUNIT *here = next_instr - 1;
|
||||
int offset = Py_IsTrue(cond) * oparg;
|
||||
int flag = Py_IsTrue(cond);
|
||||
int offset = flag * oparg;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
next_instr->cache = (next_instr->cache << 1) | flag;
|
||||
#endif
|
||||
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
next_instr += 1;
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
|
@ -4930,23 +4955,33 @@
|
|||
PyObject *cond = POP();
|
||||
assert(PyBool_Check(cond));
|
||||
_Py_CODEUNIT *here = next_instr - 1;
|
||||
int offset = Py_IsFalse(cond) * oparg;
|
||||
int flag = Py_IsFalse(cond);
|
||||
int offset = flag * oparg;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
next_instr->cache = (next_instr->cache << 1) | flag;
|
||||
#endif
|
||||
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
next_instr += 1;
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
TARGET(INSTRUMENTED_POP_JUMP_IF_NONE) {
|
||||
PyObject *value = POP();
|
||||
_Py_CODEUNIT *here = next_instr-1;
|
||||
_Py_CODEUNIT *here = next_instr - 1;
|
||||
int flag = Py_IsNone(value);
|
||||
int offset;
|
||||
if (Py_IsNone(value)) {
|
||||
if (flag) {
|
||||
offset = oparg;
|
||||
}
|
||||
else {
|
||||
Py_DECREF(value);
|
||||
offset = 0;
|
||||
}
|
||||
#if ENABLE_SPECIALIZATION
|
||||
next_instr->cache = (next_instr->cache << 1) | flag;
|
||||
#endif
|
||||
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
next_instr += 1;
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
|
@ -4954,14 +4989,19 @@
|
|||
PyObject *value = POP();
|
||||
_Py_CODEUNIT *here = next_instr-1;
|
||||
int offset;
|
||||
if (Py_IsNone(value)) {
|
||||
int nflag = Py_IsNone(value);
|
||||
if (nflag) {
|
||||
offset = 0;
|
||||
}
|
||||
else {
|
||||
Py_DECREF(value);
|
||||
offset = oparg;
|
||||
}
|
||||
#if ENABLE_SPECIALIZATION
|
||||
next_instr->cache = (next_instr->cache << 1) | !nflag;
|
||||
#endif
|
||||
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
next_instr += 1;
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include "opcode_ids.h"
|
||||
|
||||
#include "pycore_bitutils.h" // _Py_popcount32
|
||||
#include "pycore_call.h"
|
||||
#include "pycore_code.h" // _PyCode_Clear_Executors()
|
||||
#include "pycore_frame.h"
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "Python.h"
|
||||
#include "opcode.h"
|
||||
#include "pycore_interp.h"
|
||||
#include "pycore_bitutils.h" // _Py_popcount32()
|
||||
#include "pycore_opcode_metadata.h" // _PyOpcode_OpName()
|
||||
#include "pycore_opcode_utils.h" // MAX_REAL_OPCODE
|
||||
#include "pycore_optimizer.h" // _Py_uop_analyze_and_optimize()
|
||||
|
@ -501,7 +502,7 @@ translate_bytecode_to_trace(
|
|||
code->co_firstlineno,
|
||||
2 * INSTR_IP(initial_instr, code));
|
||||
|
||||
top: // Jump here after _PUSH_FRAME
|
||||
top: // Jump here after _PUSH_FRAME or likely branches
|
||||
for (;;) {
|
||||
RESERVE_RAW(2, "epilogue"); // Always need space for SAVE_IP and EXIT_TRACE
|
||||
ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code), 0);
|
||||
|
@ -547,16 +548,29 @@ top: // Jump here after _PUSH_FRAME
|
|||
case POP_JUMP_IF_TRUE:
|
||||
{
|
||||
pop_jump_if_bool:
|
||||
// Assume jump unlikely (TODO: handle jump likely case)
|
||||
RESERVE(1, 2);
|
||||
_Py_CODEUNIT *target_instr =
|
||||
instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg;
|
||||
max_length -= 2; // Really the start of the stubs
|
||||
uint32_t uopcode = opcode == POP_JUMP_IF_TRUE ?
|
||||
int counter = instr[1].cache;
|
||||
int bitcount = _Py_popcount32(counter);
|
||||
bool jump_likely = bitcount > 8;
|
||||
bool jump_sense = opcode == POP_JUMP_IF_TRUE;
|
||||
uint32_t uopcode = jump_sense ^ jump_likely ?
|
||||
_POP_JUMP_IF_TRUE : _POP_JUMP_IF_FALSE;
|
||||
_Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
|
||||
_Py_CODEUNIT *target_instr = next_instr + oparg;
|
||||
_Py_CODEUNIT *stub_target = jump_likely ? next_instr : target_instr;
|
||||
DPRINTF(4, "%s(%d): counter=%x, bitcount=%d, likely=%d, sense=%d, uopcode=%s\n",
|
||||
uop_name(opcode), oparg,
|
||||
counter, bitcount, jump_likely, jump_sense, uop_name(uopcode));
|
||||
ADD_TO_TRACE(uopcode, max_length, 0);
|
||||
ADD_TO_STUB(max_length, SAVE_IP, INSTR_IP(target_instr, code), 0);
|
||||
ADD_TO_STUB(max_length, SAVE_IP, INSTR_IP(stub_target, code), 0);
|
||||
ADD_TO_STUB(max_length + 1, EXIT_TRACE, 0, 0);
|
||||
if (jump_likely) {
|
||||
DPRINTF(2, "Jump likely (%x = %d bits), continue at byte offset %d\n",
|
||||
instr[1].cache, bitcount, 2 * INSTR_IP(target_instr, code));
|
||||
instr = target_instr;
|
||||
goto top;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -927,6 +941,6 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
|
|||
opt->resume_threshold = UINT16_MAX;
|
||||
// Need at least 3 iterations to settle specializations.
|
||||
// A few lower bits of the counter are reserved for other flags.
|
||||
opt->backedge_threshold = 3 << OPTIMIZER_BITS_IN_COUNTER;
|
||||
opt->backedge_threshold = 16 << OPTIMIZER_BITS_IN_COUNTER;
|
||||
return (PyObject *)opt;
|
||||
}
|
||||
|
|
|
@ -338,9 +338,23 @@ _PyCode_Quicken(PyCodeObject *code)
|
|||
assert(opcode < MIN_INSTRUMENTED_OPCODE);
|
||||
int caches = _PyOpcode_Caches[opcode];
|
||||
if (caches) {
|
||||
// JUMP_BACKWARD counter counts up from 0 until it is > backedge_threshold
|
||||
instructions[i + 1].cache =
|
||||
opcode == JUMP_BACKWARD ? 0 : adaptive_counter_warmup();
|
||||
// The initial value depends on the opcode
|
||||
int initial_value;
|
||||
switch (opcode) {
|
||||
case JUMP_BACKWARD:
|
||||
initial_value = 0;
|
||||
break;
|
||||
case POP_JUMP_IF_FALSE:
|
||||
case POP_JUMP_IF_TRUE:
|
||||
case POP_JUMP_IF_NONE:
|
||||
case POP_JUMP_IF_NOT_NONE:
|
||||
initial_value = 0x5555; // Alternating 0, 1 bits
|
||||
break;
|
||||
default:
|
||||
initial_value = adaptive_counter_warmup();
|
||||
break;
|
||||
}
|
||||
instructions[i + 1].cache = initial_value;
|
||||
i += caches;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue