gh-112320: Implement on-trace confidence tracking for branches (#112321)

We track the confidence as a scaled int.
2025-07-17 00:05:20 +00:00 · 2023-12-12 13:43:08 -08:00 · 2023-12-12 13:43:08 -08:00 · 7316dfb0eb
commit 7316dfb0eb
parent dfaa9e060b
6 changed files with 56 additions and 3 deletions
--- a/Include/cpython/pystats.h
+++ b/Include/cpython/pystats.h
@ -114,6 +114,7 @@ typedef struct _optimization_stats {
    uint64_t trace_too_short;
    uint64_t inner_loop;
    uint64_t recursive_call;
    uint64_t low_confidence;
    UOpStats opcode[512];
    uint64_t unsupported_opcode[256];
    uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];
--- a/Lib/test/test_capi/test_misc.py
+++ b/Lib/test/test_capi/test_misc.py
@ -2985,6 +2985,37 @@ class TestUops(unittest.TestCase):
        uops = {opname for opname, _, _ in ex}
        self.assertIn("_FOR_ITER_TIER_TWO", uops)
    def test_confidence_score(self):
        def testfunc(n):
            bits = 0
            for i in range(n):
                if i & 0x01:
                    bits += 1
                if i & 0x02:
                    bits += 1
                if i&0x04:
                    bits += 1
                if i&0x08:
                    bits += 1
                if i&0x10:
                    bits += 1
                if i&0x20:
                    bits += 1
            return bits
        opt = _testinternalcapi.get_uop_optimizer()
        with temporary_optimizer(opt):
            x = testfunc(20)
        self.assertEqual(x, 40)
        ex = get_first_executor(testfunc)
        self.assertIsNotNone(ex)
        ops = [opname for opname, _, _ in ex]
        count = ops.count("_GUARD_IS_TRUE_POP")
        # Because Each 'if' halves the score, the second branch is
        # too much already.
        self.assertEqual(count, 1)
@unittest.skipUnless(support.Py_GIL_DISABLED, 'need Py_GIL_DISABLED')
 class TestPyThreadId(unittest.TestCase):
--- a/Builtins/2023-11-22-13-17-54.gh-issue-112320.EddM51.rst
+++ b/Builtins/2023-11-22-13-17-54.gh-issue-112320.EddM51.rst
@ -0,0 +1,4 @@
 The Tier 2 translator now tracks the confidence level for staying "on trace"
 (i.e. not exiting back to the Tier 1 interpreter) for branch instructions
 based on the number of bits set in the branch "counter". Trace translation
 ends when the confidence drops below 1/3rd.
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@ -409,6 +409,9 @@ BRANCH_TO_GUARD[4][2] = {
 #define TRACE_STACK_SIZE 5
 #define CONFIDENCE_RANGE 1000
 #define CONFIDENCE_CUTOFF 333
 /* Returns 1 on success,
 * 0 if it failed to produce a worthwhile trace,
 * and -1 on an error.
@ -431,6 +434,7 @@ translate_bytecode_to_trace(
        _Py_CODEUNIT *instr;
    } trace_stack[TRACE_STACK_SIZE];
    int trace_stack_depth = 0;
    int confidence = CONFIDENCE_RANGE;  // Adjusted by branch instructions
 #ifdef Py_DEBUG
    char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
@ -513,7 +517,6 @@ top:  // Jump here after _PUSH_FRAME or likely branches
        uint32_t oparg = instr->op.arg;
        uint32_t extras = 0;
        if (opcode == EXTENDED_ARG) {
            instr++;
            extras += 1;
@ -543,11 +546,22 @@ top:  // Jump here after _PUSH_FRAME or likely branches
                int counter = instr[1].cache;
                int bitcount = _Py_popcount32(counter);
                int jump_likely = bitcount > 8;
                if (jump_likely) {
                    confidence = confidence * bitcount / 16;
                }
                else {
                    confidence = confidence * (16 - bitcount) / 16;
                }
                if (confidence < CONFIDENCE_CUTOFF) {
                    DPRINTF(2, "Confidence too low (%d)\n", confidence);
                    OPT_STAT_INC(low_confidence);
                    goto done;
                }
                uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely];
                _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
-                DPRINTF(4, "%s(%d): counter=%x, bitcount=%d, likely=%d, uopcode=%s\n",
+                DPRINTF(2, "%s(%d): counter=%x, bitcount=%d, likely=%d, confidence=%d, uopcode=%s\n",
                        _PyUOpName(opcode), oparg,
-                        counter, bitcount, jump_likely, _PyUOpName(uopcode));
+                        counter, bitcount, jump_likely, confidence, _PyUOpName(uopcode));
                ADD_TO_TRACE(uopcode, max_length, 0, target);
                if (jump_likely) {
                    _Py_CODEUNIT *target_instr = next_instr + oparg;
--- a/Python/specialize.c
+++ b/Python/specialize.c
@ -233,6 +233,7 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
    fprintf(out, "Optimization trace too short: %" PRIu64 "\n", stats->trace_too_short);
    fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop);
    fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call);
    fprintf(out, "Optimization low confidence: %" PRIu64 "\n", stats->low_confidence);
    print_histogram(out, "Trace length", stats->trace_length_hist);
    print_histogram(out, "Trace run length", stats->trace_run_length_hist);
--- a/Tools/scripts/summarize_stats.py
+++ b/Tools/scripts/summarize_stats.py
@ -386,6 +386,7 @@ class Stats:
        trace_too_short = self._data["Optimization trace too short"]
        inner_loop = self._data["Optimization inner loop"]
        recursive_call = self._data["Optimization recursive call"]
        low_confidence = self._data["Optimization low confidence"]
        return {
            "Optimization attempts": (attempts, None),
@ -396,6 +397,7 @@ class Stats:
            "Trace too short": (trace_too_short, attempts),
            "Inner loop found": (inner_loop, attempts),
            "Recursive call": (recursive_call, attempts),
            "Low confidence": (low_confidence, attempts),
            "Traces executed": (executed, None),
            "Uops executed": (uops, executed),
        }