bpo-44187: Quickening infrastructure (GH-26264)

* Add co_firstinstr field to code object.

* Implement barebones quickening.

* Use non-quickened bytecode when tracing.

* Add NEWS item

* Add new file to Windows build.

* Don't specialize instructions with EXTENDED_ARG.
This commit is contained in:
Mark Shannon 2021-06-07 18:38:06 +01:00 committed by GitHub
parent 89e50ab36f
commit 001eb520b5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 416 additions and 12 deletions

View file

@ -7,9 +7,11 @@ typedef uint16_t _Py_CODEUNIT;
#ifdef WORDS_BIGENDIAN #ifdef WORDS_BIGENDIAN
# define _Py_OPCODE(word) ((word) >> 8) # define _Py_OPCODE(word) ((word) >> 8)
# define _Py_OPARG(word) ((word) & 255) # define _Py_OPARG(word) ((word) & 255)
# define _Py_MAKECODEUNIT(opcode, oparg) (((opcode)<<8)|(oparg))
#else #else
# define _Py_OPCODE(word) ((word) & 255) # define _Py_OPCODE(word) ((word) & 255)
# define _Py_OPARG(word) ((word) >> 8) # define _Py_OPARG(word) ((word) >> 8)
# define _Py_MAKECODEUNIT(opcode, oparg) ((opcode)|((oparg)<<8))
#endif #endif
typedef struct _PyOpcache _PyOpcache; typedef struct _PyOpcache _PyOpcache;
@ -43,16 +45,20 @@ struct PyCodeObject {
/* These fields are set with provided values on new code objects. */ /* These fields are set with provided values on new code objects. */
// The hottest fields (in the eval loop) are grouped here at the top. // The hottest fields (in the eval loop) are grouped here at the top.
PyObject *co_code; /* instruction opcodes */
PyObject *co_consts; /* list (constants used) */ PyObject *co_consts; /* list (constants used) */
PyObject *co_names; /* list of strings (names used) */ PyObject *co_names; /* list of strings (names used) */
_Py_CODEUNIT *co_firstinstr; /* Pointer to first instruction, used for quickening */
PyObject *co_exceptiontable; /* Byte string encoding exception handling table */
int co_flags; /* CO_..., see below */ int co_flags; /* CO_..., see below */
int co_warmup; /* Warmup counter for quickening */
// The rest are not so impactful on performance. // The rest are not so impactful on performance.
int co_argcount; /* #arguments, except *args */ int co_argcount; /* #arguments, except *args */
int co_posonlyargcount; /* #positional only arguments */ int co_posonlyargcount; /* #positional only arguments */
int co_kwonlyargcount; /* #keyword only arguments */ int co_kwonlyargcount; /* #keyword only arguments */
int co_stacksize; /* #entries needed for evaluation stack */ int co_stacksize; /* #entries needed for evaluation stack */
int co_firstlineno; /* first source line number */ int co_firstlineno; /* first source line number */
PyObject *co_code; /* instruction opcodes */
PyObject *co_varnames; /* tuple of strings (local variable names) */ PyObject *co_varnames; /* tuple of strings (local variable names) */
PyObject *co_cellvars; /* tuple of strings (cell variable names) */ PyObject *co_cellvars; /* tuple of strings (cell variable names) */
PyObject *co_freevars; /* tuple of strings (free variable names) */ PyObject *co_freevars; /* tuple of strings (free variable names) */
@ -60,7 +66,6 @@ struct PyCodeObject {
PyObject *co_name; /* unicode (name, for reference) */ PyObject *co_name; /* unicode (name, for reference) */
PyObject *co_linetable; /* string (encoding addr<->lineno mapping) See PyObject *co_linetable; /* string (encoding addr<->lineno mapping) See
Objects/lnotab_notes.txt for details. */ Objects/lnotab_notes.txt for details. */
PyObject *co_exceptiontable; /* Byte string encoding exception handling table */
/* These fields are set with computed values on new code objects. */ /* These fields are set with computed values on new code objects. */
@ -78,6 +83,10 @@ struct PyCodeObject {
Type is a void* to keep the format private in codeobject.c to force Type is a void* to keep the format private in codeobject.c to force
people to go through the proper APIs. */ people to go through the proper APIs. */
void *co_extra; void *co_extra;
/* Quickened instructions and cache, or NULL
This should be treated as opaque by all code except the specializer and
interpreter. */
union _cache_or_instruction *co_quickened;
/* Per opcodes just-in-time cache /* Per opcodes just-in-time cache
* *

View file

@ -4,6 +4,7 @@
extern "C" { extern "C" {
#endif #endif
/* Legacy Opcache */
typedef struct { typedef struct {
PyObject *ptr; /* Cached pointer (borrowed reference) */ PyObject *ptr; /* Cached pointer (borrowed reference) */
@ -26,6 +27,129 @@ struct _PyOpcache {
}; };
/* PEP 659
* Specialization and quickening structs and helper functions
*/
typedef struct {
int32_t cache_count;
int32_t _; /* Force 8 byte size */
} _PyEntryZero;
typedef struct {
uint8_t original_oparg;
uint8_t counter;
uint16_t index;
} _PyAdaptiveEntry;
/* Add specialized versions of entries to this union.
*
* Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
* Preserving this invariant is necessary because:
- If any one form uses more space, then all must and on 64 bit machines
this is likely to double the memory consumption of caches
- The function for calculating the offset of caches assumes a 4:1
cache:instruction size ratio. Changing that would need careful
analysis to choose a new function.
*/
typedef union {
_PyEntryZero zero;
_PyAdaptiveEntry adaptive;
} SpecializedCacheEntry;
#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
/* Maximum size of code to quicken, in code units. */
#define MAX_SIZE_TO_QUICKEN 5000
typedef union _cache_or_instruction {
_Py_CODEUNIT code[1];
SpecializedCacheEntry entry;
} SpecializedCacheOrInstruction;
/* Get pointer to the nth cache entry, from the first instruction and n.
* Cache entries are indexed backwards, with [count-1] first in memory, and [0] last.
* The zeroth entry immediately precedes the instructions.
*/
static inline SpecializedCacheEntry *
_GetSpecializedCacheEntry(_Py_CODEUNIT *first_instr, Py_ssize_t n)
{
SpecializedCacheOrInstruction *last_cache_plus_one = (SpecializedCacheOrInstruction *)first_instr;
assert(&last_cache_plus_one->code[0] == first_instr);
return &last_cache_plus_one[-1-n].entry;
}
/* Following two functions form a pair.
*
* oparg_from_offset_and_index() is used to compute the oparg
* when quickening, so that offset_from_oparg_and_nexti()
* can be used at runtime to compute the offset.
*
* The relationship between the three values is currently
* offset == (index>>1) + oparg
* This relation is chosen based on the following observations:
* 1. typically 1 in 4 instructions need a cache
* 2. instructions that need a cache typically use 2 entries
* These observations imply: offset index/2
* We use the oparg to fine tune the relation to avoid wasting space
* and allow consecutive instructions to use caches.
*
* If the number of cache entries < number of instructions/2 we will waste
* some small amoount of space.
* If the number of cache entries > (number of instructions/2) + 255, then
* some instructions will not be able to use a cache.
* In practice, we expect some small amount of wasted space in a shorter functions
* and only functions exceeding a 1000 lines or more not to have enugh cache space.
*
*/
static inline int
oparg_from_offset_and_nexti(int offset, int nexti)
{
return offset-(nexti>>1);
}
static inline int
offset_from_oparg_and_nexti(int oparg, int nexti)
{
return (nexti>>1)+oparg;
}
/* Get pointer to the cache entry associated with an instruction.
* nexti is the index of the instruction plus one.
* nexti is used as it corresponds to the instruction pointer in the interpreter.
* This doesn't check that an entry has been allocated for that instruction. */
static inline SpecializedCacheEntry *
_GetSpecializedCacheEntryForInstruction(_Py_CODEUNIT *first_instr, int nexti, int oparg)
{
return _GetSpecializedCacheEntry(
first_instr,
offset_from_oparg_and_nexti(oparg, nexti)
);
}
#define QUICKENING_WARMUP_DELAY 8
/* We want to compare to zero for efficiency, so we offset values accordingly */
#define QUICKENING_INITIAL_WARMUP_VALUE (-QUICKENING_WARMUP_DELAY)
#define QUICKENING_WARMUP_COLDEST 1
static inline void
PyCodeObject_IncrementWarmup(PyCodeObject * co)
{
co->co_warmup++;
}
/* Used by the interpreter to determine when a code object should be quickened */
static inline int
PyCodeObject_IsWarmedUp(PyCodeObject * co)
{
return (co->co_warmup == 0);
}
int _Py_Quicken(PyCodeObject *code);
extern Py_ssize_t _Py_QuickenedCount;
struct _PyCodeConstructor { struct _PyCodeConstructor {
/* metadata */ /* metadata */
PyObject *filename; PyObject *filename;

View file

@ -73,9 +73,10 @@ def dash_R(ns, test_name, test_func):
alloc_deltas = [0] * repcount alloc_deltas = [0] * repcount
fd_deltas = [0] * repcount fd_deltas = [0] * repcount
getallocatedblocks = sys.getallocatedblocks getallocatedblocks = sys.getallocatedblocks
getallocatedblocks = sys.getallocatedblocks
gettotalrefcount = sys.gettotalrefcount gettotalrefcount = sys.gettotalrefcount
_getquickenedcount = sys._getquickenedcount
fd_count = os_helper.fd_count fd_count = os_helper.fd_count
# initialize variables to make pyflakes quiet # initialize variables to make pyflakes quiet
rc_before = alloc_before = fd_before = 0 rc_before = alloc_before = fd_before = 0
@ -92,7 +93,7 @@ def dash_R(ns, test_name, test_func):
# dash_R_cleanup() ends with collecting cyclic trash: # dash_R_cleanup() ends with collecting cyclic trash:
# read memory statistics immediately after. # read memory statistics immediately after.
alloc_after = getallocatedblocks() alloc_after = getallocatedblocks() - _getquickenedcount()
rc_after = gettotalrefcount() rc_after = gettotalrefcount()
fd_after = fd_count() fd_after = fd_count()

View file

@ -378,6 +378,7 @@ PYTHON_OBJS= \
Python/pythonrun.o \ Python/pythonrun.o \
Python/pytime.o \ Python/pytime.o \
Python/bootstrap_hash.o \ Python/bootstrap_hash.o \
Python/specialize.o \
Python/structmember.o \ Python/structmember.o \
Python/symtable.o \ Python/symtable.o \
Python/sysmodule.o \ Python/sysmodule.o \

View file

@ -0,0 +1,3 @@
Implement quickening in the interpreter. This offers no advantages as
yet, but is an enabler of future optimizations. See PEP 659 for full
explanation.

View file

@ -211,6 +211,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
Py_INCREF(con->code); Py_INCREF(con->code);
co->co_code = con->code; co->co_code = con->code;
co->co_firstinstr = (_Py_CODEUNIT *)PyBytes_AS_STRING(con->code);
co->co_firstlineno = con->firstlineno; co->co_firstlineno = con->firstlineno;
Py_INCREF(con->linetable); Py_INCREF(con->linetable);
co->co_linetable = con->linetable; co->co_linetable = con->linetable;
@ -250,6 +251,8 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
co->co_opcache = NULL; co->co_opcache = NULL;
co->co_opcache_flag = 0; co->co_opcache_flag = 0;
co->co_opcache_size = 0; co->co_opcache_size = 0;
co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE;
co->co_quickened = NULL;
} }
/* The caller is responsible for ensuring that the given data is valid. */ /* The caller is responsible for ensuring that the given data is valid. */
@ -376,7 +379,8 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
if (_PyCode_Validate(&con) < 0) { if (_PyCode_Validate(&con) < 0) {
return NULL; return NULL;
} }
assert(PyBytes_GET_SIZE(code) % sizeof(_Py_CODEUNIT) == 0);
assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(code), sizeof(_Py_CODEUNIT)));
if (nlocals != PyTuple_GET_SIZE(varnames)) { if (nlocals != PyTuple_GET_SIZE(varnames)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"code: co_nlocals != len(co_varnames)"); "code: co_nlocals != len(co_varnames)");
@ -1039,6 +1043,10 @@ code_dealloc(PyCodeObject *co)
PyMem_Free(co->co_cell2arg); PyMem_Free(co->co_cell2arg);
if (co->co_weakreflist != NULL) if (co->co_weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject*)co); PyObject_ClearWeakRefs((PyObject*)co);
if (co->co_quickened) {
PyMem_Free(co->co_quickened);
_Py_QuickenedCount--;
}
PyObject_Free(co); PyObject_Free(co);
} }

View file

@ -487,6 +487,7 @@
<ClCompile Include="..\Python\dtoa.c" /> <ClCompile Include="..\Python\dtoa.c" />
<ClCompile Include="..\Python\Python-ast.c" /> <ClCompile Include="..\Python\Python-ast.c" />
<ClCompile Include="..\Python\pythonrun.c" /> <ClCompile Include="..\Python\pythonrun.c" />
<ClCompile Include="..\Python\specialize.c" />
<ClCompile Include="..\Python\suggestions.c" /> <ClCompile Include="..\Python\suggestions.c" />
<ClCompile Include="..\Python\structmember.c" /> <ClCompile Include="..\Python\structmember.c" />
<ClCompile Include="..\Python\symtable.c" /> <ClCompile Include="..\Python\symtable.c" />

View file

@ -1103,6 +1103,9 @@
<ClCompile Include="..\Python\pythonrun.c"> <ClCompile Include="..\Python\pythonrun.c">
<Filter>Python</Filter> <Filter>Python</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\Python\specialize.c">
<Filter>Python</Filter>
</ClCompile>
<ClCompile Include="..\Python\structmember.c"> <ClCompile Include="..\Python\structmember.c">
<Filter>Python</Filter> <Filter>Python</Filter>
</ClCompile> </ClCompile>

View file

@ -1343,6 +1343,14 @@ eval_frame_handle_pending(PyThreadState *tstate)
#define JUMPTO(x) (next_instr = first_instr + (x)) #define JUMPTO(x) (next_instr = first_instr + (x))
#define JUMPBY(x) (next_instr += (x)) #define JUMPBY(x) (next_instr += (x))
/* Get opcode and oparg from original instructions, not quickened form. */
#define TRACING_NEXTOPARG() do { \
_Py_CODEUNIT word = ((_Py_CODEUNIT *)PyBytes_AS_STRING(co->co_code))[INSTR_OFFSET()]; \
opcode = _Py_OPCODE(word); \
oparg = _Py_OPARG(word); \
next_instr++; \
} while (0)
/* OpCode prediction macros /* OpCode prediction macros
Some opcodes tend to come in pairs thus making it possible to Some opcodes tend to come in pairs thus making it possible to
predict the second code when the first is run. For example, predict the second code when the first is run. For example,
@ -1644,15 +1652,23 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
if (PyDTrace_FUNCTION_ENTRY_ENABLED()) if (PyDTrace_FUNCTION_ENTRY_ENABLED())
dtrace_function_entry(f); dtrace_function_entry(f);
/* Increment the warmup counter and quicken if warm enough
* _Py_Quicken is idempotent so we don't worry about overflow */
if (!PyCodeObject_IsWarmedUp(co)) {
PyCodeObject_IncrementWarmup(co);
if (PyCodeObject_IsWarmedUp(co)) {
if (_Py_Quicken(co)) {
goto exit_eval_frame;
}
}
}
names = co->co_names; names = co->co_names;
consts = co->co_consts; consts = co->co_consts;
fastlocals = f->f_localsptr; fastlocals = f->f_localsptr;
first_instr = co->co_firstinstr;
freevars = f->f_localsptr + co->co_nlocals; freevars = f->f_localsptr + co->co_nlocals;
assert(PyBytes_Check(co->co_code));
assert(PyBytes_GET_SIZE(co->co_code) <= INT_MAX);
assert(PyBytes_GET_SIZE(co->co_code) % sizeof(_Py_CODEUNIT) == 0);
assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(co->co_code), sizeof(_Py_CODEUNIT)));
first_instr = (_Py_CODEUNIT *) PyBytes_AS_STRING(co->co_code);
/* /*
f->f_lasti refers to the index of the last instruction, f->f_lasti refers to the index of the last instruction,
unless it's -1 in which case next_instr should be first_instr. unless it's -1 in which case next_instr should be first_instr.
@ -1757,7 +1773,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
tracing_dispatch: tracing_dispatch:
f->f_lasti = INSTR_OFFSET(); f->f_lasti = INSTR_OFFSET();
NEXTOPARG(); TRACING_NEXTOPARG();
if (PyDTrace_LINE_ENABLED()) if (PyDTrace_LINE_ENABLED())
maybe_dtrace_line(f, &trace_info); maybe_dtrace_line(f, &trace_info);

View file

@ -710,6 +710,33 @@ exit:
#endif /* defined(Py_REF_DEBUG) */ #endif /* defined(Py_REF_DEBUG) */
PyDoc_STRVAR(sys__getquickenedcount__doc__,
"_getquickenedcount($module, /)\n"
"--\n"
"\n");
#define SYS__GETQUICKENEDCOUNT_METHODDEF \
{"_getquickenedcount", (PyCFunction)sys__getquickenedcount, METH_NOARGS, sys__getquickenedcount__doc__},
static Py_ssize_t
sys__getquickenedcount_impl(PyObject *module);
static PyObject *
sys__getquickenedcount(PyObject *module, PyObject *Py_UNUSED(ignored))
{
PyObject *return_value = NULL;
Py_ssize_t _return_value;
_return_value = sys__getquickenedcount_impl(module);
if ((_return_value == -1) && PyErr_Occurred()) {
goto exit;
}
return_value = PyLong_FromSsize_t(_return_value);
exit:
return return_value;
}
PyDoc_STRVAR(sys_getallocatedblocks__doc__, PyDoc_STRVAR(sys_getallocatedblocks__doc__,
"getallocatedblocks($module, /)\n" "getallocatedblocks($module, /)\n"
"--\n" "--\n"
@ -983,4 +1010,4 @@ sys__deactivate_opcache(PyObject *module, PyObject *Py_UNUSED(ignored))
#ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF
#define SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF
#endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */
/*[clinic end generated code: output=68c62b9ca317a0c8 input=a9049054013a1b77]*/ /*[clinic end generated code: output=e77bf636a177c5c3 input=a9049054013a1b77]*/

197
Python/specialize.c Normal file
View file

@ -0,0 +1,197 @@
#include "Python.h"
#include "pycore_code.h"
#include "opcode.h"
/* We layout the quickened data as a bi-directional array:
* Instructions upwards, cache entries downwards.
* first_instr is aligned to a SpecializedCacheEntry.
* The nth instruction is located at first_instr[n]
* The nth cache is located at ((SpecializedCacheEntry *)first_instr)[-1-n]
* The first (index 0) cache entry is reserved for the count, to enable finding
* the first instruction from the base pointer.
* The cache_count argument must include space for the count.
* We use the SpecializedCacheOrInstruction union to refer to the data
* to avoid type punning.
Layout of quickened data, each line 8 bytes for M cache entries and N instructions:
<cache_count> <---- co->co_quickened
<cache M-1>
<cache M-2>
...
<cache 0>
<instr 0> <instr 1> <instr 2> <instr 3> <--- co->co_first_instr
<instr 4> <instr 5> <instr 6> <instr 7>
...
<instr N-1>
*/
Py_ssize_t _Py_QuickenedCount = 0;
static SpecializedCacheOrInstruction *
allocate(int cache_count, int instruction_count)
{
assert(sizeof(SpecializedCacheOrInstruction) == 2*sizeof(int32_t));
assert(sizeof(SpecializedCacheEntry) == 2*sizeof(int32_t));
assert(cache_count > 0);
assert(instruction_count > 0);
int count = cache_count + (instruction_count + INSTRUCTIONS_PER_ENTRY -1)/INSTRUCTIONS_PER_ENTRY;
SpecializedCacheOrInstruction *array = (SpecializedCacheOrInstruction *)
PyMem_Malloc(sizeof(SpecializedCacheOrInstruction) * count);
if (array == NULL) {
PyErr_NoMemory();
return NULL;
}
_Py_QuickenedCount++;
array[0].entry.zero.cache_count = cache_count;
return array;
}
static int
get_cache_count(SpecializedCacheOrInstruction *quickened) {
return quickened[0].entry.zero.cache_count;
}
/* Map from opcode to adaptive opcode.
Values of zero are ignored. */
static uint8_t adaptive_opcodes[256] = { 0 };
/* The number of cache entries required for a "family" of instructions. */
static uint8_t cache_requirements[256] = { 0 };
/* Return the oparg for the cache_offset and instruction index.
*
* If no cache is needed then return the original oparg.
* If a cache is needed, but cannot be accessed because
* oparg would be too large, then return -1.
*
* Also updates the cache_offset, as it may need to be incremented by
* more than the cache requirements, if many instructions do not need caches.
*
* See pycore_code.h for details of how the cache offset,
* instruction index and oparg are related */
static int
oparg_from_instruction_and_update_offset(int index, int opcode, int original_oparg, int *cache_offset) {
/* The instruction pointer in the interpreter points to the next
* instruction, so we compute the offset using nexti (index + 1) */
int nexti = index + 1;
uint8_t need = cache_requirements[opcode];
if (need == 0) {
return original_oparg;
}
assert(adaptive_opcodes[opcode] != 0);
int oparg = oparg_from_offset_and_nexti(*cache_offset, nexti);
assert(*cache_offset == offset_from_oparg_and_nexti(oparg, nexti));
/* Some cache space is wasted here as the minimum possible offset is (nexti>>1) */
if (oparg < 0) {
oparg = 0;
*cache_offset = offset_from_oparg_and_nexti(oparg, nexti);
}
else if (oparg > 255) {
return -1;
}
*cache_offset += need;
return oparg;
}
static int
entries_needed(_Py_CODEUNIT *code, int len)
{
int cache_offset = 0;
int previous_opcode = -1;
for (int i = 0; i < len; i++) {
uint8_t opcode = _Py_OPCODE(code[i]);
if (previous_opcode != EXTENDED_ARG) {
oparg_from_instruction_and_update_offset(i, opcode, 0, &cache_offset);
}
previous_opcode = opcode;
}
return cache_offset + 1; // One extra for the count entry
}
static inline _Py_CODEUNIT *
first_instruction(SpecializedCacheOrInstruction *quickened)
{
return &quickened[get_cache_count(quickened)].code[0];
}
/** Insert adaptive instructions and superinstructions.
*
* Skip instruction preceded by EXTENDED_ARG for adaptive
* instructions as those are both very rare and tricky
* to handle.
*/
static void
optimize(SpecializedCacheOrInstruction *quickened, int len)
{
_Py_CODEUNIT *instructions = first_instruction(quickened);
int cache_offset = 0;
int previous_opcode = -1;
for(int i = 0; i < len; i++) {
int opcode = _Py_OPCODE(instructions[i]);
int oparg = _Py_OPARG(instructions[i]);
uint8_t adaptive_opcode = adaptive_opcodes[opcode];
if (adaptive_opcode && previous_opcode != EXTENDED_ARG) {
int new_oparg = oparg_from_instruction_and_update_offset(
i, opcode, oparg, &cache_offset
);
if (new_oparg < 0) {
/* Not possible to allocate a cache for this instruction */
previous_opcode = opcode;
continue;
}
instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, new_oparg);
previous_opcode = adaptive_opcode;
int entries_needed = cache_requirements[opcode];
if (entries_needed) {
/* Initialize the adpative cache entry */
int cache0_offset = cache_offset-entries_needed;
SpecializedCacheEntry *cache =
_GetSpecializedCacheEntry(instructions, cache0_offset);
cache->adaptive.original_oparg = oparg;
cache->adaptive.counter = 0;
}
}
else {
/* Super instructions don't use the cache,
* so no need to update the offset. */
switch (opcode) {
/* Insert superinstructions here
E.g.
case LOAD_FAST:
if (previous_opcode == LOAD_FAST)
instructions[i-1] = _Py_MAKECODEUNIT(LOAD_FAST__LOAD_FAST, oparg);
*/
}
previous_opcode = opcode;
}
}
assert(cache_offset+1 == get_cache_count(quickened));
}
int
_Py_Quicken(PyCodeObject *code) {
if (code->co_quickened) {
return 0;
}
Py_ssize_t size = PyBytes_GET_SIZE(code->co_code);
int instr_count = (int)(size/sizeof(_Py_CODEUNIT));
if (instr_count > MAX_SIZE_TO_QUICKEN) {
code->co_warmup = QUICKENING_WARMUP_COLDEST;
return 0;
}
int entry_count = entries_needed(code->co_firstinstr, instr_count);
SpecializedCacheOrInstruction *quickened = allocate(entry_count, instr_count);
if (quickened == NULL) {
return -1;
}
_Py_CODEUNIT *new_instructions = first_instruction(quickened);
memcpy(new_instructions, code->co_firstinstr, size);
optimize(quickened, instr_count);
code->co_quickened = quickened;
code->co_firstinstr = new_instructions;
return 0;
}

View file

@ -18,6 +18,7 @@ Data members:
#include "pycore_ceval.h" // _Py_RecursionLimitLowerWaterMark() #include "pycore_ceval.h" // _Py_RecursionLimitLowerWaterMark()
#include "pycore_initconfig.h" // _PyStatus_EXCEPTION() #include "pycore_initconfig.h" // _PyStatus_EXCEPTION()
#include "pycore_object.h" // _PyObject_IS_GC() #include "pycore_object.h" // _PyObject_IS_GC()
#include "pycore_code.h" // _Py_QuickenedCount
#include "pycore_pathconfig.h" // _PyPathConfig_ComputeSysPath0() #include "pycore_pathconfig.h" // _PyPathConfig_ComputeSysPath0()
#include "pycore_pyerrors.h" // _PyErr_Fetch() #include "pycore_pyerrors.h" // _PyErr_Fetch()
#include "pycore_pylifecycle.h" // _PyErr_WriteUnraisableDefaultHook() #include "pycore_pylifecycle.h" // _PyErr_WriteUnraisableDefaultHook()
@ -1763,8 +1764,20 @@ sys_gettotalrefcount_impl(PyObject *module)
{ {
return _Py_GetRefTotal(); return _Py_GetRefTotal();
} }
#endif /* Py_REF_DEBUG */ #endif /* Py_REF_DEBUG */
/*[clinic input]
sys._getquickenedcount -> Py_ssize_t
[clinic start generated code]*/
static Py_ssize_t
sys__getquickenedcount_impl(PyObject *module)
/*[clinic end generated code: output=1ab259e7f91248a2 input=249d448159eca912]*/
{
return _Py_QuickenedCount;
}
/*[clinic input] /*[clinic input]
sys.getallocatedblocks -> Py_ssize_t sys.getallocatedblocks -> Py_ssize_t
@ -1995,6 +2008,7 @@ static PyMethodDef sys_methods[] = {
#endif #endif
SYS_GETFILESYSTEMENCODING_METHODDEF SYS_GETFILESYSTEMENCODING_METHODDEF
SYS_GETFILESYSTEMENCODEERRORS_METHODDEF SYS_GETFILESYSTEMENCODEERRORS_METHODDEF
SYS__GETQUICKENEDCOUNT_METHODDEF
#ifdef Py_TRACE_REFS #ifdef Py_TRACE_REFS
{"getobjects", _Py_GetObjects, METH_VARARGS}, {"getobjects", _Py_GetObjects, METH_VARARGS},
#endif #endif