mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
bpo-46841: Quicken code in-place (GH-31888)
* Moves the bytecode to the end of the corresponding PyCodeObject, and quickens it in-place. * Removes the almost-always-unused co_varnames, co_freevars, and co_cellvars member caches * _PyOpcode_Deopt is a new mapping from all opcodes to their un-quickened forms. * _PyOpcode_InlineCacheEntries is renamed to _PyOpcode_Caches * _Py_IncrementCountAndMaybeQuicken is renamed to _PyCode_Warmup * _Py_Quicken is renamed to _PyCode_Quicken * _co_quickened is renamed to _co_code_adaptive (and is now a read-only memoryview). * Do not emit unused nonzero opargs anymore in the compiler.
This commit is contained in:
parent
08eb754d84
commit
2bde6827ea
18 changed files with 832 additions and 688 deletions
|
@ -15,31 +15,6 @@
|
|||
* ./adaptive.md
|
||||
*/
|
||||
|
||||
|
||||
/* We layout the quickened data as a bi-directional array:
|
||||
* Instructions upwards, cache entries downwards.
|
||||
* first_instr is aligned to a SpecializedCacheEntry.
|
||||
* The nth instruction is located at first_instr[n]
|
||||
* The nth cache is located at ((SpecializedCacheEntry *)first_instr)[-1-n]
|
||||
* The first (index 0) cache entry is reserved for the count, to enable finding
|
||||
* the first instruction from the base pointer.
|
||||
* The cache_count argument must include space for the count.
|
||||
* We use the SpecializedCacheOrInstruction union to refer to the data
|
||||
* to avoid type punning.
|
||||
|
||||
Layout of quickened data, each line 8 bytes for M cache entries and N instructions:
|
||||
|
||||
<cache_count> <---- co->co_quickened
|
||||
<cache M-1>
|
||||
<cache M-2>
|
||||
...
|
||||
<cache 0>
|
||||
<instr 0> <instr 1> <instr 2> <instr 3> <--- co->co_first_instr
|
||||
<instr 4> <instr 5> <instr 6> <instr 7>
|
||||
...
|
||||
<instr N-1>
|
||||
*/
|
||||
|
||||
/* Map from opcode to adaptive opcode.
|
||||
Values of zero are ignored. */
|
||||
static uint8_t adaptive_opcodes[256] = {
|
||||
|
@ -275,26 +250,14 @@ _Py_PrintSpecializationStats(int to_file)
|
|||
#define SPECIALIZATION_FAIL(opcode, kind) ((void)0)
|
||||
#endif
|
||||
|
||||
static _Py_CODEUNIT *
|
||||
allocate(int instruction_count)
|
||||
// Insert adaptive instructions and superinstructions. This cannot fail.
|
||||
void
|
||||
_PyCode_Quicken(PyCodeObject *code)
|
||||
{
|
||||
assert(instruction_count > 0);
|
||||
void *array = PyMem_Malloc(sizeof(_Py_CODEUNIT) * instruction_count);
|
||||
if (array == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
_Py_QuickenedCount++;
|
||||
return (_Py_CODEUNIT *)array;
|
||||
}
|
||||
|
||||
|
||||
// Insert adaptive instructions and superinstructions.
|
||||
static void
|
||||
optimize(_Py_CODEUNIT *instructions, int len)
|
||||
{
|
||||
int previous_opcode = -1;
|
||||
for(int i = 0; i < len; i++) {
|
||||
_Py_CODEUNIT *instructions = _PyCode_CODE(code);
|
||||
for (int i = 0; i < Py_SIZE(code); i++) {
|
||||
int opcode = _Py_OPCODE(instructions[i]);
|
||||
uint8_t adaptive_opcode = adaptive_opcodes[opcode];
|
||||
if (adaptive_opcode) {
|
||||
|
@ -302,10 +265,10 @@ optimize(_Py_CODEUNIT *instructions, int len)
|
|||
// Make sure the adaptive counter is zero:
|
||||
assert(instructions[i + 1] == 0);
|
||||
previous_opcode = -1;
|
||||
i += _PyOpcode_InlineCacheEntries[opcode];
|
||||
i += _PyOpcode_Caches[opcode];
|
||||
}
|
||||
else {
|
||||
assert(!_PyOpcode_InlineCacheEntries[opcode]);
|
||||
assert(!_PyOpcode_Caches[opcode]);
|
||||
switch (opcode) {
|
||||
case JUMP_ABSOLUTE:
|
||||
_Py_SET_OPCODE(instructions[i], JUMP_ABSOLUTE_QUICK);
|
||||
|
@ -347,28 +310,6 @@ optimize(_Py_CODEUNIT *instructions, int len)
|
|||
}
|
||||
}
|
||||
|
||||
int
|
||||
_Py_Quicken(PyCodeObject *code) {
|
||||
if (code->co_quickened) {
|
||||
return 0;
|
||||
}
|
||||
Py_ssize_t size = PyBytes_GET_SIZE(code->co_code);
|
||||
int instr_count = (int)(size/sizeof(_Py_CODEUNIT));
|
||||
if (instr_count > MAX_SIZE_TO_QUICKEN) {
|
||||
code->co_warmup = QUICKENING_WARMUP_COLDEST;
|
||||
return 0;
|
||||
}
|
||||
_Py_CODEUNIT *quickened = allocate(instr_count);
|
||||
if (quickened == NULL) {
|
||||
return -1;
|
||||
}
|
||||
memcpy(quickened, code->co_firstinstr, size);
|
||||
optimize(quickened, instr_count);
|
||||
code->co_quickened = quickened;
|
||||
code->co_firstinstr = quickened;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
initial_counter_value(void) {
|
||||
/* Starting value for the counter.
|
||||
|
@ -705,8 +646,7 @@ specialize_dict_access(
|
|||
int
|
||||
_Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name)
|
||||
{
|
||||
assert(_PyOpcode_InlineCacheEntries[LOAD_ATTR] ==
|
||||
INLINE_CACHE_ENTRIES_LOAD_ATTR);
|
||||
assert(_PyOpcode_Caches[LOAD_ATTR] == INLINE_CACHE_ENTRIES_LOAD_ATTR);
|
||||
_PyAttrCache *cache = (_PyAttrCache *)(instr + 1);
|
||||
if (PyModule_CheckExact(owner)) {
|
||||
int err = specialize_module_load_attr(owner, instr, name, LOAD_ATTR,
|
||||
|
@ -804,8 +744,7 @@ success:
|
|||
int
|
||||
_Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name)
|
||||
{
|
||||
assert(_PyOpcode_InlineCacheEntries[STORE_ATTR] ==
|
||||
INLINE_CACHE_ENTRIES_STORE_ATTR);
|
||||
assert(_PyOpcode_Caches[STORE_ATTR] == INLINE_CACHE_ENTRIES_STORE_ATTR);
|
||||
_PyAttrCache *cache = (_PyAttrCache *)(instr + 1);
|
||||
PyTypeObject *type = Py_TYPE(owner);
|
||||
if (PyModule_CheckExact(owner)) {
|
||||
|
@ -965,8 +904,7 @@ typedef enum {
|
|||
int
|
||||
_Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name)
|
||||
{
|
||||
assert(_PyOpcode_InlineCacheEntries[LOAD_METHOD] ==
|
||||
INLINE_CACHE_ENTRIES_LOAD_METHOD);
|
||||
assert(_PyOpcode_Caches[LOAD_METHOD] == INLINE_CACHE_ENTRIES_LOAD_METHOD);
|
||||
_PyLoadMethodCache *cache = (_PyLoadMethodCache *)(instr + 1);
|
||||
PyTypeObject *owner_cls = Py_TYPE(owner);
|
||||
|
||||
|
@ -1098,8 +1036,7 @@ _Py_Specialize_LoadGlobal(
|
|||
PyObject *globals, PyObject *builtins,
|
||||
_Py_CODEUNIT *instr, PyObject *name)
|
||||
{
|
||||
assert(_PyOpcode_InlineCacheEntries[LOAD_GLOBAL] ==
|
||||
INLINE_CACHE_ENTRIES_LOAD_GLOBAL);
|
||||
assert(_PyOpcode_Caches[LOAD_GLOBAL] == INLINE_CACHE_ENTRIES_LOAD_GLOBAL);
|
||||
/* Use inline cache */
|
||||
_PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)(instr + 1);
|
||||
assert(PyUnicode_CheckExact(name));
|
||||
|
@ -1235,7 +1172,7 @@ int
|
|||
_Py_Specialize_BinarySubscr(
|
||||
PyObject *container, PyObject *sub, _Py_CODEUNIT *instr)
|
||||
{
|
||||
assert(_PyOpcode_InlineCacheEntries[BINARY_SUBSCR] ==
|
||||
assert(_PyOpcode_Caches[BINARY_SUBSCR] ==
|
||||
INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
|
||||
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)(instr + 1);
|
||||
PyTypeObject *container_type = Py_TYPE(container);
|
||||
|
@ -1663,8 +1600,7 @@ int
|
|||
_Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
|
||||
PyObject *kwnames, int oparg)
|
||||
{
|
||||
assert(_PyOpcode_InlineCacheEntries[PRECALL] ==
|
||||
INLINE_CACHE_ENTRIES_PRECALL);
|
||||
assert(_PyOpcode_Caches[PRECALL] == INLINE_CACHE_ENTRIES_PRECALL);
|
||||
_PyPrecallCache *cache = (_PyPrecallCache *)(instr + 1);
|
||||
int fail;
|
||||
if (PyCFunction_CheckExact(callable)) {
|
||||
|
@ -1710,7 +1646,7 @@ int
|
|||
_Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
|
||||
PyObject *kwnames)
|
||||
{
|
||||
assert(_PyOpcode_InlineCacheEntries[CALL] == INLINE_CACHE_ENTRIES_CALL);
|
||||
assert(_PyOpcode_Caches[CALL] == INLINE_CACHE_ENTRIES_CALL);
|
||||
_PyCallCache *cache = (_PyCallCache *)(instr + 1);
|
||||
int fail;
|
||||
if (PyFunction_Check(callable)) {
|
||||
|
@ -1808,8 +1744,7 @@ void
|
|||
_Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
|
||||
int oparg)
|
||||
{
|
||||
assert(_PyOpcode_InlineCacheEntries[BINARY_OP] ==
|
||||
INLINE_CACHE_ENTRIES_BINARY_OP);
|
||||
assert(_PyOpcode_Caches[BINARY_OP] == INLINE_CACHE_ENTRIES_BINARY_OP);
|
||||
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)(instr + 1);
|
||||
switch (oparg) {
|
||||
case NB_ADD:
|
||||
|
@ -1936,8 +1871,7 @@ void
|
|||
_Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
|
||||
int oparg)
|
||||
{
|
||||
assert(_PyOpcode_InlineCacheEntries[COMPARE_OP] ==
|
||||
INLINE_CACHE_ENTRIES_COMPARE_OP);
|
||||
assert(_PyOpcode_Caches[COMPARE_OP] == INLINE_CACHE_ENTRIES_COMPARE_OP);
|
||||
_PyCompareOpCache *cache = (_PyCompareOpCache *)(instr + 1);
|
||||
int next_opcode = _Py_OPCODE(instr[INLINE_CACHE_ENTRIES_COMPARE_OP + 1]);
|
||||
if (next_opcode != POP_JUMP_IF_FALSE && next_opcode != POP_JUMP_IF_TRUE) {
|
||||
|
@ -2019,7 +1953,7 @@ unpack_sequence_fail_kind(PyObject *seq)
|
|||
void
|
||||
_Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr, int oparg)
|
||||
{
|
||||
assert(_PyOpcode_InlineCacheEntries[UNPACK_SEQUENCE] ==
|
||||
assert(_PyOpcode_Caches[UNPACK_SEQUENCE] ==
|
||||
INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE);
|
||||
_PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)(instr + 1);
|
||||
if (PyTuple_CheckExact(seq)) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue