mirror of
https://github.com/python/cpython.git
synced 2025-08-07 18:38:38 +00:00
gh-116968: Reimplement Tier 2 counters (#117144)
Introduce a unified 16-bit backoff counter type (``_Py_BackoffCounter``), shared between the Tier 1 adaptive specializer and the Tier 2 optimizer. The API used for adaptive specialization counters is changed but the behavior is (supposed to be) identical. The behavior of the Tier 2 counters is changed: - There are no longer dynamic thresholds (we never varied these). - All counters now use the same exponential backoff. - The counter for ``JUMP_BACKWARD`` starts counting down from 16. - The ``temperature`` in side exits starts counting down from 64.
This commit is contained in:
parent
63bbe77d9b
commit
060a96f1a9
19 changed files with 313 additions and 235 deletions
128
Include/internal/pycore_backoff.h
Normal file
128
Include/internal/pycore_backoff.h
Normal file
|
@ -0,0 +1,128 @@
|
|||
|
||||
#ifndef Py_INTERNAL_BACKOFF_H
|
||||
#define Py_INTERNAL_BACKOFF_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef Py_BUILD_CORE
|
||||
# error "this header requires Py_BUILD_CORE define"
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* 16-bit countdown counters using exponential backoff.
|
||||
|
||||
These are used by the adaptive specializer to count down until
|
||||
it is time to specialize an instruction. If specialization fails
|
||||
the counter is reset using exponential backoff.
|
||||
|
||||
Another use is for the Tier 2 optimizer to decide when to create
|
||||
a new Tier 2 trace (executor). Again, exponential backoff is used.
|
||||
|
||||
The 16-bit counter is structured as a 12-bit unsigned 'value'
|
||||
and a 4-bit 'backoff' field. When resetting the counter, the
|
||||
backoff field is incremented (until it reaches a limit) and the
|
||||
value is set to a bit mask representing the value 2**backoff - 1.
|
||||
The maximum backoff is 12 (the number of value bits).
|
||||
|
||||
There is an exceptional value which must not be updated, 0xFFFF.
|
||||
*/
|
||||
|
||||
#define UNREACHABLE_BACKOFF 0xFFFF
|
||||
|
||||
static inline bool
|
||||
is_unreachable_backoff_counter(_Py_BackoffCounter counter)
|
||||
{
|
||||
return counter.as_counter == UNREACHABLE_BACKOFF;
|
||||
}
|
||||
|
||||
static inline _Py_BackoffCounter
|
||||
make_backoff_counter(uint16_t value, uint16_t backoff)
|
||||
{
|
||||
assert(backoff <= 15);
|
||||
assert(value <= 0xFFF);
|
||||
return (_Py_BackoffCounter){.value = value, .backoff = backoff};
|
||||
}
|
||||
|
||||
static inline _Py_BackoffCounter
|
||||
forge_backoff_counter(uint16_t counter)
|
||||
{
|
||||
return (_Py_BackoffCounter){.as_counter = counter};
|
||||
}
|
||||
|
||||
static inline _Py_BackoffCounter
|
||||
restart_backoff_counter(_Py_BackoffCounter counter)
|
||||
{
|
||||
assert(!is_unreachable_backoff_counter(counter));
|
||||
if (counter.backoff < 12) {
|
||||
return make_backoff_counter((1 << (counter.backoff + 1)) - 1, counter.backoff + 1);
|
||||
}
|
||||
else {
|
||||
return make_backoff_counter((1 << 12) - 1, 12);
|
||||
}
|
||||
}
|
||||
|
||||
static inline _Py_BackoffCounter
|
||||
pause_backoff_counter(_Py_BackoffCounter counter)
|
||||
{
|
||||
return make_backoff_counter(counter.value | 1, counter.backoff);
|
||||
}
|
||||
|
||||
static inline _Py_BackoffCounter
|
||||
advance_backoff_counter(_Py_BackoffCounter counter)
|
||||
{
|
||||
if (!is_unreachable_backoff_counter(counter)) {
|
||||
return make_backoff_counter((counter.value - 1) & 0xFFF, counter.backoff);
|
||||
}
|
||||
else {
|
||||
return counter;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
backoff_counter_triggers(_Py_BackoffCounter counter)
|
||||
{
|
||||
return counter.value == 0;
|
||||
}
|
||||
|
||||
/* Initial JUMP_BACKWARD counter.
|
||||
* This determines when we create a trace for a loop.
|
||||
* Backoff sequence 16, 32, 64, 128, 256, 512, 1024, 2048, 4096. */
|
||||
#define JUMP_BACKWARD_INITIAL_VALUE 16
|
||||
#define JUMP_BACKWARD_INITIAL_BACKOFF 4
|
||||
static inline _Py_BackoffCounter
|
||||
initial_jump_backoff_counter(void)
|
||||
{
|
||||
return make_backoff_counter(JUMP_BACKWARD_INITIAL_VALUE,
|
||||
JUMP_BACKWARD_INITIAL_BACKOFF);
|
||||
}
|
||||
|
||||
/* Initial exit temperature.
|
||||
* Must be larger than ADAPTIVE_COOLDOWN_VALUE,
|
||||
* otherwise when a side exit warms up we may construct
|
||||
* a new trace before the Tier 1 code has properly re-specialized.
|
||||
* Backoff sequence 64, 128, 256, 512, 1024, 2048, 4096. */
|
||||
#define COLD_EXIT_INITIAL_VALUE 64
|
||||
#define COLD_EXIT_INITIAL_BACKOFF 6
|
||||
|
||||
static inline _Py_BackoffCounter
|
||||
initial_temperature_backoff_counter(void)
|
||||
{
|
||||
return make_backoff_counter(COLD_EXIT_INITIAL_VALUE,
|
||||
COLD_EXIT_INITIAL_BACKOFF);
|
||||
}
|
||||
|
||||
/* Unreachable backoff counter. */
|
||||
static inline _Py_BackoffCounter
|
||||
initial_unreachable_backoff_counter(void)
|
||||
{
|
||||
return forge_backoff_counter(UNREACHABLE_BACKOFF);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_INTERNAL_BACKOFF_H */
|
|
@ -31,7 +31,7 @@ extern "C" {
|
|||
#define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT))
|
||||
|
||||
typedef struct {
|
||||
uint16_t counter;
|
||||
_Py_BackoffCounter counter;
|
||||
uint16_t module_keys_version;
|
||||
uint16_t builtin_keys_version;
|
||||
uint16_t index;
|
||||
|
@ -40,44 +40,44 @@ typedef struct {
|
|||
#define INLINE_CACHE_ENTRIES_LOAD_GLOBAL CACHE_ENTRIES(_PyLoadGlobalCache)
|
||||
|
||||
typedef struct {
|
||||
uint16_t counter;
|
||||
_Py_BackoffCounter counter;
|
||||
} _PyBinaryOpCache;
|
||||
|
||||
#define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache)
|
||||
|
||||
typedef struct {
|
||||
uint16_t counter;
|
||||
_Py_BackoffCounter counter;
|
||||
} _PyUnpackSequenceCache;
|
||||
|
||||
#define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \
|
||||
CACHE_ENTRIES(_PyUnpackSequenceCache)
|
||||
|
||||
typedef struct {
|
||||
uint16_t counter;
|
||||
_Py_BackoffCounter counter;
|
||||
} _PyCompareOpCache;
|
||||
|
||||
#define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache)
|
||||
|
||||
typedef struct {
|
||||
uint16_t counter;
|
||||
_Py_BackoffCounter counter;
|
||||
} _PyBinarySubscrCache;
|
||||
|
||||
#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
|
||||
|
||||
typedef struct {
|
||||
uint16_t counter;
|
||||
_Py_BackoffCounter counter;
|
||||
} _PySuperAttrCache;
|
||||
|
||||
#define INLINE_CACHE_ENTRIES_LOAD_SUPER_ATTR CACHE_ENTRIES(_PySuperAttrCache)
|
||||
|
||||
typedef struct {
|
||||
uint16_t counter;
|
||||
_Py_BackoffCounter counter;
|
||||
uint16_t version[2];
|
||||
uint16_t index;
|
||||
} _PyAttrCache;
|
||||
|
||||
typedef struct {
|
||||
uint16_t counter;
|
||||
_Py_BackoffCounter counter;
|
||||
uint16_t type_version[2];
|
||||
union {
|
||||
uint16_t keys_version[2];
|
||||
|
@ -93,39 +93,39 @@ typedef struct {
|
|||
#define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache)
|
||||
|
||||
typedef struct {
|
||||
uint16_t counter;
|
||||
_Py_BackoffCounter counter;
|
||||
uint16_t func_version[2];
|
||||
} _PyCallCache;
|
||||
|
||||
#define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)
|
||||
|
||||
typedef struct {
|
||||
uint16_t counter;
|
||||
_Py_BackoffCounter counter;
|
||||
} _PyStoreSubscrCache;
|
||||
|
||||
#define INLINE_CACHE_ENTRIES_STORE_SUBSCR CACHE_ENTRIES(_PyStoreSubscrCache)
|
||||
|
||||
typedef struct {
|
||||
uint16_t counter;
|
||||
_Py_BackoffCounter counter;
|
||||
} _PyForIterCache;
|
||||
|
||||
#define INLINE_CACHE_ENTRIES_FOR_ITER CACHE_ENTRIES(_PyForIterCache)
|
||||
|
||||
typedef struct {
|
||||
uint16_t counter;
|
||||
_Py_BackoffCounter counter;
|
||||
} _PySendCache;
|
||||
|
||||
#define INLINE_CACHE_ENTRIES_SEND CACHE_ENTRIES(_PySendCache)
|
||||
|
||||
typedef struct {
|
||||
uint16_t counter;
|
||||
_Py_BackoffCounter counter;
|
||||
uint16_t version[2];
|
||||
} _PyToBoolCache;
|
||||
|
||||
#define INLINE_CACHE_ENTRIES_TO_BOOL CACHE_ENTRIES(_PyToBoolCache)
|
||||
|
||||
typedef struct {
|
||||
uint16_t counter;
|
||||
_Py_BackoffCounter counter;
|
||||
} _PyContainsOpCache;
|
||||
|
||||
#define INLINE_CACHE_ENTRIES_CONTAINS_OP CACHE_ENTRIES(_PyContainsOpCache)
|
||||
|
@ -451,18 +451,14 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
|
|||
|
||||
/** Counters
|
||||
* The first 16-bit value in each inline cache is a counter.
|
||||
* When counting misses, the counter is treated as a simple unsigned value.
|
||||
*
|
||||
* When counting executions until the next specialization attempt,
|
||||
* exponential backoff is used to reduce the number of specialization failures.
|
||||
* The high 12 bits store the counter, the low 4 bits store the backoff exponent.
|
||||
* On a specialization failure, the backoff exponent is incremented and the
|
||||
* counter set to (2**backoff - 1).
|
||||
* Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023.
|
||||
* See pycore_backoff.h for more details.
|
||||
* On a specialization failure, the backoff counter is restarted.
|
||||
*/
|
||||
|
||||
/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
|
||||
#define ADAPTIVE_BACKOFF_BITS 4
|
||||
#include "pycore_backoff.h"
|
||||
|
||||
// A value of 1 means that we attempt to specialize the *second* time each
|
||||
// instruction is executed. Executing twice is a much better indicator of
|
||||
|
@ -480,36 +476,30 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
|
|||
#define ADAPTIVE_COOLDOWN_VALUE 52
|
||||
#define ADAPTIVE_COOLDOWN_BACKOFF 0
|
||||
|
||||
#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)
|
||||
// Can't assert this in pycore_backoff.h because of header order dependencies
|
||||
static_assert(COLD_EXIT_INITIAL_VALUE > ADAPTIVE_COOLDOWN_VALUE,
|
||||
"Cold exit value should be larger than adaptive cooldown value");
|
||||
|
||||
|
||||
static inline uint16_t
|
||||
static inline _Py_BackoffCounter
|
||||
adaptive_counter_bits(uint16_t value, uint16_t backoff) {
|
||||
return ((value << ADAPTIVE_BACKOFF_BITS)
|
||||
| (backoff & ((1 << ADAPTIVE_BACKOFF_BITS) - 1)));
|
||||
return make_backoff_counter(value, backoff);
|
||||
}
|
||||
|
||||
static inline uint16_t
|
||||
static inline _Py_BackoffCounter
|
||||
adaptive_counter_warmup(void) {
|
||||
return adaptive_counter_bits(ADAPTIVE_WARMUP_VALUE,
|
||||
ADAPTIVE_WARMUP_BACKOFF);
|
||||
}
|
||||
|
||||
static inline uint16_t
|
||||
static inline _Py_BackoffCounter
|
||||
adaptive_counter_cooldown(void) {
|
||||
return adaptive_counter_bits(ADAPTIVE_COOLDOWN_VALUE,
|
||||
ADAPTIVE_COOLDOWN_BACKOFF);
|
||||
}
|
||||
|
||||
static inline uint16_t
|
||||
adaptive_counter_backoff(uint16_t counter) {
|
||||
uint16_t backoff = counter & ((1 << ADAPTIVE_BACKOFF_BITS) - 1);
|
||||
backoff++;
|
||||
if (backoff > MAX_BACKOFF_VALUE) {
|
||||
backoff = MAX_BACKOFF_VALUE;
|
||||
}
|
||||
uint16_t value = (uint16_t)(1 << backoff) - 1;
|
||||
return adaptive_counter_bits(value, backoff);
|
||||
static inline _Py_BackoffCounter
|
||||
adaptive_counter_backoff(_Py_BackoffCounter counter) {
|
||||
return restart_backoff_counter(counter);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -239,12 +239,6 @@ struct _is {
|
|||
_PyOptimizerObject *optimizer;
|
||||
_PyExecutorObject *executor_list_head;
|
||||
|
||||
/* These two values are shifted and offset to speed up check in JUMP_BACKWARD */
|
||||
uint32_t optimizer_resume_threshold;
|
||||
uint32_t optimizer_backedge_threshold;
|
||||
|
||||
uint16_t optimizer_side_threshold;
|
||||
|
||||
_rare_events rare_events;
|
||||
PyDict_WatchCallback builtins_dict_watcher;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue