gh-116968: Reimplement Tier 2 counters (#117144)

Introduce a unified 16-bit backoff counter type (``_Py_BackoffCounter``),
shared between the Tier 1 adaptive specializer and the Tier 2 optimizer. The
API used for adaptive specialization counters is changed but the behavior is
(supposed to be) identical.

The behavior of the Tier 2 counters is changed:
- There are no longer dynamic thresholds (we never varied these).
- All counters now use the same exponential backoff.
- The counter for ``JUMP_BACKWARD`` starts counting down from 16.
- The ``temperature`` in side exits starts counting down from 64.
This commit is contained in:
Guido van Rossum 2024-04-04 08:03:27 -07:00 committed by GitHub
parent 63bbe77d9b
commit 060a96f1a9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 313 additions and 235 deletions

View file

@ -115,13 +115,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY);
DISPATCH_SAME_OPARG();
}
STAT_INC(BINARY_OP, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
assert(NB_ADD <= oparg);
assert(oparg <= NB_INPLACE_XOR);
@ -432,13 +432,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_BinarySubscr(container, sub, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(BINARY_SUBSCR, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _BINARY_SUBSCR
@ -760,13 +760,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_Call(callable, next_instr, oparg + (self_or_null != NULL));
DISPATCH_SAME_OPARG();
}
STAT_INC(CALL, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
/* Skip 2 cache entries */
@ -2036,13 +2036,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_CompareOp(left, right, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(COMPARE_OP, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _COMPARE_OP
@ -2185,13 +2185,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ContainsOp(right, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(CONTAINS_OP, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _CONTAINS_OP
@ -2596,13 +2596,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ForIter(iter, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(FOR_ITER, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _FOR_ITER
@ -3026,7 +3026,7 @@
tstate, PY_MONITORING_EVENT_CALL,
frame, this_instr, function, arg);
if (err) goto error;
INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
GO_TO_INSTRUCTION(CALL);
}
@ -3142,7 +3142,7 @@
if (next_opcode < 0) goto error;
next_instr = this_instr;
if (_PyOpcode_Caches[next_opcode]) {
INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
PAUSE_ADAPTIVE_COUNTER(next_instr[1].counter);
}
assert(next_opcode > 0 && next_opcode < 256);
opcode = next_opcode;
@ -3177,7 +3177,7 @@
/* Skip 1 cache entry */
// cancel out the decrement that will happen in LOAD_SUPER_ATTR; we
// don't want to specialize instrumented instructions
INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
GO_TO_INSTRUCTION(LOAD_SUPER_ATTR);
}
@ -3415,16 +3415,8 @@
assert(oparg <= INSTR_OFFSET());
JUMPBY(-oparg);
#if ENABLE_SPECIALIZATION
uint16_t counter = this_instr[1].cache;
this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER);
/* We are using unsigned values, but we really want signed values, so
* do the 2s complement adjustment manually */
uint32_t offset_counter = counter ^ (1 << 15);
uint32_t threshold = tstate->interp->optimizer_backedge_threshold;
assert((threshold & OPTIMIZER_BITS_MASK) == 0);
// Use '>=' not '>' so that the optimizer/backoff bits do not effect the result.
// Double-check that the opcode isn't instrumented or something:
if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) {
_Py_BackoffCounter counter = this_instr[1].counter;
if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD) {
_Py_CODEUNIT *start = this_instr;
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
while (oparg > 255) {
@ -3440,17 +3432,12 @@
GOTO_TIER_TWO(executor);
}
else {
int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
backoff++;
if (backoff < MIN_TIER2_BACKOFF) {
backoff = MIN_TIER2_BACKOFF;
}
else if (backoff > MAX_TIER2_BACKOFF) {
backoff = MAX_TIER2_BACKOFF;
}
this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff;
this_instr[1].counter = restart_backoff_counter(counter);
}
}
else {
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
}
#endif /* ENABLE_SPECIALIZATION */
DISPATCH();
}
@ -3543,14 +3530,14 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
next_instr = this_instr;
_Py_Specialize_LoadAttr(owner, next_instr, name);
DISPATCH_SAME_OPARG();
}
STAT_INC(LOAD_ATTR, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
/* Skip 8 cache entries */
@ -4238,14 +4225,14 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
next_instr = this_instr;
_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name);
DISPATCH_SAME_OPARG();
}
STAT_INC(LOAD_GLOBAL, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
/* Skip 1 cache entry */
@ -4442,13 +4429,13 @@
(void)counter;
#if ENABLE_SPECIALIZATION
int load_method = oparg & 1;
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_LoadSuperAttr(global_super, class, next_instr, load_method);
DISPATCH_SAME_OPARG();
}
STAT_INC(LOAD_SUPER_ATTR, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _LOAD_SUPER_ATTR
@ -5083,13 +5070,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_Send(receiver, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(SEND, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _SEND
@ -5271,14 +5258,14 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
next_instr = this_instr;
_Py_Specialize_StoreAttr(owner, next_instr, name);
DISPATCH_SAME_OPARG();
}
STAT_INC(STORE_ATTR, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
/* Skip 3 cache entries */
@ -5562,13 +5549,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_StoreSubscr(container, sub, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(STORE_SUBSCR, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _STORE_SUBSCR
@ -5665,13 +5652,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ToBool(value, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(TO_BOOL, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
/* Skip 2 cache entries */
@ -5882,13 +5869,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_UnpackSequence(seq, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(UNPACK_SEQUENCE, deferred);
DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
(void)seq;
(void)counter;