gh-116968: Reimplement Tier 2 counters (#117144)

Introduce a unified 16-bit backoff counter type (``_Py_BackoffCounter``),
shared between the Tier 1 adaptive specializer and the Tier 2 optimizer. The
API used for adaptive specialization counters is changed but the behavior is
(supposed to be) identical.

The behavior of the Tier 2 counters is changed:
- There are no longer dynamic thresholds (we never varied these).
- All counters now use the same exponential backoff.
- The counter for ``JUMP_BACKWARD`` starts counting down from 16.
- The ``temperature`` in side exits starts counting down from 64.
This commit is contained in:
Guido van Rossum 2024-04-04 08:03:27 -07:00 committed by GitHub
parent 63bbe77d9b
commit 060a96f1a9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 313 additions and 235 deletions

View file

@ -1,6 +1,7 @@
#include "Python.h"
#include "opcode.h"
#include "pycore_interp.h"
#include "pycore_backoff.h"
#include "pycore_bitutils.h" // _Py_popcount32()
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
#include "pycore_opcode_metadata.h" // _PyOpcode_OpName[]
@ -110,9 +111,7 @@ never_optimize(
_PyExecutorObject **exec,
int Py_UNUSED(stack_entries))
{
/* Although it should be benign for this to be called,
* it shouldn't happen, so fail in debug builds. */
assert(0 && "never optimize should never be called");
// This may be called if the optimizer is reset
return 0;
}
@ -127,25 +126,12 @@ PyTypeObject _PyDefaultOptimizer_Type = {
static _PyOptimizerObject _PyOptimizer_Default = {
PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type)
.optimize = never_optimize,
.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
.backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
.side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
};
static uint32_t
shift_and_offset_threshold(uint32_t threshold)
{
return (threshold << OPTIMIZER_BITS_IN_COUNTER) + (1 << 15);
}
_PyOptimizerObject *
PyUnstable_GetOptimizer(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
assert(interp->optimizer_backedge_threshold ==
shift_and_offset_threshold(interp->optimizer->backedge_threshold));
assert(interp->optimizer_resume_threshold ==
shift_and_offset_threshold(interp->optimizer->resume_threshold));
if (interp->optimizer == &_PyOptimizer_Default) {
return NULL;
}
@ -190,13 +176,6 @@ _Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer)
}
Py_INCREF(optimizer);
interp->optimizer = optimizer;
interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold);
interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold);
interp->optimizer_side_threshold = optimizer->side_threshold;
if (optimizer == &_PyOptimizer_Default) {
assert(interp->optimizer_backedge_threshold > (1 << 16));
assert(interp->optimizer_resume_threshold > (1 << 16));
}
return old;
}
@ -1109,7 +1088,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil
assert(exit_count < COLD_EXIT_COUNT);
for (int i = 0; i < exit_count; i++) {
executor->exits[i].executor = &COLD_EXITS[i];
executor->exits[i].temperature = 0;
executor->exits[i].temperature = initial_temperature_backoff_counter();
}
int next_exit = exit_count-1;
_PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length];
@ -1291,11 +1270,6 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
return NULL;
}
opt->optimize = uop_optimize;
opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
// Need a few iterations to settle specializations,
// and to ammortize the cost of optimization.
opt->side_threshold = 16;
opt->backedge_threshold = 16;
return (PyObject *)opt;
}
@ -1385,9 +1359,6 @@ PyUnstable_Optimizer_NewCounter(void)
return NULL;
}
opt->base.optimize = counter_optimize;
opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
opt->base.side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
opt->base.backedge_threshold = 0;
opt->count = 0;
return (PyObject *)opt;
}
@ -1554,7 +1525,7 @@ _Py_ExecutorClear(_PyExecutorObject *executor)
for (uint32_t i = 0; i < executor->exit_count; i++) {
Py_DECREF(executor->exits[i].executor);
executor->exits[i].executor = &COLD_EXITS[i];
executor->exits[i].temperature = INT16_MIN;
executor->exits[i].temperature = initial_unreachable_backoff_counter();
}
_Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index];
assert(instruction->op.code == ENTER_EXECUTOR);