mirror of
https://github.com/python/cpython.git
synced 2025-07-24 11:44:31 +00:00
bpo-44187: Quickening infrastructure (GH-26264)
* Add co_firstinstr field to code object. * Implement barebones quickening. * Use non-quickened bytecode when tracing. * Add NEWS item * Add new file to Windows build. * Don't specialize instructions with EXTENDED_ARG.
This commit is contained in:
parent
89e50ab36f
commit
001eb520b5
12 changed files with 416 additions and 12 deletions
|
@ -4,6 +4,7 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Legacy Opcache */
|
||||
|
||||
typedef struct {
|
||||
PyObject *ptr; /* Cached pointer (borrowed reference) */
|
||||
|
@ -26,6 +27,129 @@ struct _PyOpcache {
|
|||
};
|
||||
|
||||
|
||||
/* PEP 659
|
||||
* Specialization and quickening structs and helper functions
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
int32_t cache_count;
|
||||
int32_t _; /* Force 8 byte size */
|
||||
} _PyEntryZero;
|
||||
|
||||
typedef struct {
|
||||
uint8_t original_oparg;
|
||||
uint8_t counter;
|
||||
uint16_t index;
|
||||
} _PyAdaptiveEntry;
|
||||
|
||||
/* Add specialized versions of entries to this union.
|
||||
*
|
||||
* Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
|
||||
* Preserving this invariant is necessary because:
|
||||
- If any one form uses more space, then all must and on 64 bit machines
|
||||
this is likely to double the memory consumption of caches
|
||||
- The function for calculating the offset of caches assumes a 4:1
|
||||
cache:instruction size ratio. Changing that would need careful
|
||||
analysis to choose a new function.
|
||||
*/
|
||||
typedef union {
|
||||
_PyEntryZero zero;
|
||||
_PyAdaptiveEntry adaptive;
|
||||
} SpecializedCacheEntry;
|
||||
|
||||
#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
|
||||
|
||||
/* Maximum size of code to quicken, in code units. */
|
||||
#define MAX_SIZE_TO_QUICKEN 5000
|
||||
|
||||
typedef union _cache_or_instruction {
|
||||
_Py_CODEUNIT code[1];
|
||||
SpecializedCacheEntry entry;
|
||||
} SpecializedCacheOrInstruction;
|
||||
|
||||
/* Get pointer to the nth cache entry, from the first instruction and n.
|
||||
* Cache entries are indexed backwards, with [count-1] first in memory, and [0] last.
|
||||
* The zeroth entry immediately precedes the instructions.
|
||||
*/
|
||||
static inline SpecializedCacheEntry *
|
||||
_GetSpecializedCacheEntry(_Py_CODEUNIT *first_instr, Py_ssize_t n)
|
||||
{
|
||||
SpecializedCacheOrInstruction *last_cache_plus_one = (SpecializedCacheOrInstruction *)first_instr;
|
||||
assert(&last_cache_plus_one->code[0] == first_instr);
|
||||
return &last_cache_plus_one[-1-n].entry;
|
||||
}
|
||||
|
||||
/* Following two functions form a pair.
|
||||
*
|
||||
* oparg_from_offset_and_index() is used to compute the oparg
|
||||
* when quickening, so that offset_from_oparg_and_nexti()
|
||||
* can be used at runtime to compute the offset.
|
||||
*
|
||||
* The relationship between the three values is currently
|
||||
* offset == (index>>1) + oparg
|
||||
* This relation is chosen based on the following observations:
|
||||
* 1. typically 1 in 4 instructions need a cache
|
||||
* 2. instructions that need a cache typically use 2 entries
|
||||
* These observations imply: offset ≈ index/2
|
||||
* We use the oparg to fine tune the relation to avoid wasting space
|
||||
* and allow consecutive instructions to use caches.
|
||||
*
|
||||
* If the number of cache entries < number of instructions/2 we will waste
|
||||
* some small amoount of space.
|
||||
* If the number of cache entries > (number of instructions/2) + 255, then
|
||||
* some instructions will not be able to use a cache.
|
||||
* In practice, we expect some small amount of wasted space in a shorter functions
|
||||
* and only functions exceeding a 1000 lines or more not to have enugh cache space.
|
||||
*
|
||||
*/
|
||||
static inline int
|
||||
oparg_from_offset_and_nexti(int offset, int nexti)
|
||||
{
|
||||
return offset-(nexti>>1);
|
||||
}
|
||||
|
||||
static inline int
|
||||
offset_from_oparg_and_nexti(int oparg, int nexti)
|
||||
{
|
||||
return (nexti>>1)+oparg;
|
||||
}
|
||||
|
||||
/* Get pointer to the cache entry associated with an instruction.
|
||||
* nexti is the index of the instruction plus one.
|
||||
* nexti is used as it corresponds to the instruction pointer in the interpreter.
|
||||
* This doesn't check that an entry has been allocated for that instruction. */
|
||||
static inline SpecializedCacheEntry *
|
||||
_GetSpecializedCacheEntryForInstruction(_Py_CODEUNIT *first_instr, int nexti, int oparg)
|
||||
{
|
||||
return _GetSpecializedCacheEntry(
|
||||
first_instr,
|
||||
offset_from_oparg_and_nexti(oparg, nexti)
|
||||
);
|
||||
}
|
||||
|
||||
#define QUICKENING_WARMUP_DELAY 8
|
||||
|
||||
/* We want to compare to zero for efficiency, so we offset values accordingly */
|
||||
#define QUICKENING_INITIAL_WARMUP_VALUE (-QUICKENING_WARMUP_DELAY)
|
||||
#define QUICKENING_WARMUP_COLDEST 1
|
||||
|
||||
static inline void
|
||||
PyCodeObject_IncrementWarmup(PyCodeObject * co)
|
||||
{
|
||||
co->co_warmup++;
|
||||
}
|
||||
|
||||
/* Used by the interpreter to determine when a code object should be quickened */
|
||||
static inline int
|
||||
PyCodeObject_IsWarmedUp(PyCodeObject * co)
|
||||
{
|
||||
return (co->co_warmup == 0);
|
||||
}
|
||||
|
||||
int _Py_Quicken(PyCodeObject *code);
|
||||
|
||||
extern Py_ssize_t _Py_QuickenedCount;
|
||||
|
||||
struct _PyCodeConstructor {
|
||||
/* metadata */
|
||||
PyObject *filename;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue