gh-104584: Baby steps towards generating and executing traces (#105924)

Added a new, experimental, tracing optimizer and interpreter (a.k.a. "tier 2"). This currently pessimizes, so don't use yet -- this is infrastructure so we can experiment with optimizing passes. To enable it, pass ``-Xuops`` or set ``PYTHONUOPS=1``. To get debug output, set ``PYTHONUOPSDEBUG=N`` where ``N`` is a debug level (0-4, where 0 is no debug output and 4 is excessively verbose).

All of this code is likely to change dramatically before the 3.13 feature freeze. But this is a first step.
This commit is contained in:
Guido van Rossum 2023-06-26 19:02:57 -07:00 committed by GitHub
parent d3af83b934
commit 51fc725117
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 2559 additions and 305 deletions

View file

@ -52,8 +52,6 @@
#define family(name, ...) static int family_##name
#define pseudo(name) static int pseudo_##name
typedef PyObject *(*convertion_func_ptr)(PyObject *);
// Dummy variables for stack effects.
static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub;
static PyObject *container, *start, *stop, *v, *lhs, *rhs, *res2;
@ -2182,7 +2180,7 @@ dummy_func(
frame = executor->execute(executor, frame, stack_pointer);
if (frame == NULL) {
frame = cframe.current_frame;
goto error;
goto resume_with_error;
}
goto resume_frame;
}

View file

@ -22,6 +22,7 @@
#include "pycore_sysmodule.h" // _PySys_Audit()
#include "pycore_tuple.h" // _PyTuple_ITEMS()
#include "pycore_typeobject.h" // _PySuper_Lookup()
#include "pycore_uops.h" // _PyUOpExecutorObject
#include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS
#include "pycore_dict.h"
@ -223,14 +224,6 @@ _PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func,
static void
_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
typedef PyObject *(*convertion_func_ptr)(PyObject *);
static const convertion_func_ptr CONVERSION_FUNCTIONS[4] = {
[FVC_STR] = PyObject_Str,
[FVC_REPR] = PyObject_Repr,
[FVC_ASCII] = PyObject_ASCII
};
#define UNBOUNDLOCAL_ERROR_MSG \
"cannot access local variable '%s' where it is not associated with a value"
#define UNBOUNDFREE_ERROR_MSG \
@ -2771,3 +2764,131 @@ void Py_LeaveRecursiveCall(void)
{
_Py_LeaveRecursiveCall();
}
///////////////////// Experimental UOp Interpreter /////////////////////
// UPDATE_MISS_STATS (called by DEOPT_IF) uses next_instr
// TODO: Make it do something useful
#undef UPDATE_MISS_STATS
#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
_PyInterpreterFrame *
_PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject **stack_pointer)
{
#ifdef LLTRACE
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
int lltrace = 0;
if (uop_debug != NULL && *uop_debug >= '0') {
lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
}
if (lltrace >= 2) {
PyCodeObject *code = _PyFrame_GetCode(frame);
_Py_CODEUNIT *instr = frame->prev_instr + 1;
fprintf(stderr,
"Entering _PyUopExecute for %s (%s:%d) at offset %ld\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
}
#endif
PyThreadState *tstate = _PyThreadState_GET();
_PyUOpExecutorObject *self = (_PyUOpExecutorObject *)executor;
// Equivalent to CHECK_EVAL_BREAKER()
_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY();
if (_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker)) {
if (_Py_HandlePending(tstate) != 0) {
goto error;
}
}
OBJECT_STAT_INC(optimization_traces_executed);
_Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive - 1;
int pc = 0;
int opcode;
uint64_t operand;
int oparg;
for (;;) {
opcode = self->trace[pc].opcode;
operand = self->trace[pc].operand;
oparg = (int)operand;
#ifdef LLTRACE
if (lltrace >= 3) {
const char *opname = opcode < 256 ? _PyOpcode_OpName[opcode] : "";
int stack_level = (int)(stack_pointer - _PyFrame_Stackbase(frame));
fprintf(stderr, " uop %s %d, operand %" PRIu64 ", stack_level %d\n",
opname, opcode, operand, stack_level);
}
#endif
pc++;
OBJECT_STAT_INC(optimization_uops_executed);
switch (opcode) {
#undef ENABLE_SPECIALIZATION
#define ENABLE_SPECIALIZATION 0
#include "executor_cases.c.h"
case SET_IP:
{
frame->prev_instr = ip_offset + oparg;
break;
}
case EXIT_TRACE:
{
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
return frame;
}
default:
{
fprintf(stderr, "Unknown uop %d, operand %" PRIu64 "\n", opcode, operand);
Py_FatalError("Unknown uop");
abort(); // Unreachable
for (;;) {}
// Really unreachable
}
}
}
pop_4_error:
STACK_SHRINK(1);
pop_3_error:
STACK_SHRINK(1);
pop_2_error:
STACK_SHRINK(1);
pop_1_error:
STACK_SHRINK(1);
error:
// On ERROR_IF we return NULL as the frame.
// The caller recovers the frame from cframe.current_frame.
#ifdef LLTRACE
if (lltrace >= 2) {
fprintf(stderr, "Error: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
}
#endif
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
return NULL;
PREDICTED(UNPACK_SEQUENCE)
PREDICTED(COMPARE_OP)
PREDICTED(LOAD_SUPER_ATTR)
PREDICTED(STORE_SUBSCR)
PREDICTED(BINARY_SUBSCR)
PREDICTED(BINARY_OP)
// On DEOPT_IF we just repeat the last instruction.
// This presumes nothing was popped from the stack (nor pushed).
#ifdef LLTRACE
if (lltrace >= 2) {
fprintf(stderr, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
}
#endif
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
return frame;
}

View file

@ -1,4 +1,4 @@
// Macros needed by ceval.c and bytecodes.c
// Macros and other things needed by ceval.c and bytecodes.c
/* Computed GOTOs, or
the-optimization-commonly-but-improperly-known-as-"threaded code"
@ -339,3 +339,11 @@ do { \
goto error; \
} \
} while (0);
typedef PyObject *(*convertion_func_ptr)(PyObject *);
static const convertion_func_ptr CONVERSION_FUNCTIONS[4] = {
[FVC_STR] = PyObject_Str,
[FVC_REPR] = PyObject_Repr,
[FVC_ASCII] = PyObject_ASCII
};

1606
Python/executor_cases.c.h generated Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -18,6 +18,21 @@
((OP) == POP_BLOCK) || \
0
#define EXIT_TRACE 300
#define SET_IP 301
#define _GUARD_BOTH_INT 302
#define _BINARY_OP_MULTIPLY_INT 303
#define _BINARY_OP_ADD_INT 304
#define _BINARY_OP_SUBTRACT_INT 305
#define _GUARD_BOTH_FLOAT 306
#define _BINARY_OP_MULTIPLY_FLOAT 307
#define _BINARY_OP_ADD_FLOAT 308
#define _BINARY_OP_SUBTRACT_FLOAT 309
#define _GUARD_BOTH_UNICODE 310
#define _BINARY_OP_ADD_UNICODE 311
#define _LOAD_LOCALS 312
#define _LOAD_FROM_DICT_OR_GLOBALS 313
#ifndef NEED_OPCODE_METADATA
extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump);
#else
@ -885,12 +900,19 @@ struct opcode_metadata {
int flags;
};
struct opcode_macro_expansion {
int nuops;
struct { int16_t uop; int8_t size; int8_t offset; } uops[8];
};
#define OPCODE_METADATA_FMT(OP) (_PyOpcode_opcode_metadata[(OP)].instr_format)
#define SAME_OPCODE_METADATA(OP1, OP2) \
(OPCODE_METADATA_FMT(OP1) == OPCODE_METADATA_FMT(OP2))
#ifndef NEED_OPCODE_METADATA
extern const struct opcode_metadata _PyOpcode_opcode_metadata[512];
extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256];
#else
const struct opcode_metadata _PyOpcode_opcode_metadata[512] = {
[NOP] = { true, INSTR_FMT_IX, 0 },
@ -1101,4 +1123,88 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[512] = {
[CACHE] = { true, INSTR_FMT_IX, 0 },
[RESERVED] = { true, INSTR_FMT_IX, 0 },
};
const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = {
[NOP] = { .nuops = 1, .uops = { { NOP, 0, 0 } } },
[LOAD_FAST] = { .nuops = 1, .uops = { { LOAD_FAST, 0, 0 } } },
[LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { LOAD_FAST_AND_CLEAR, 0, 0 } } },
[LOAD_CONST] = { .nuops = 1, .uops = { { LOAD_CONST, 0, 0 } } },
[STORE_FAST] = { .nuops = 1, .uops = { { STORE_FAST, 0, 0 } } },
[POP_TOP] = { .nuops = 1, .uops = { { POP_TOP, 0, 0 } } },
[PUSH_NULL] = { .nuops = 1, .uops = { { PUSH_NULL, 0, 0 } } },
[END_SEND] = { .nuops = 1, .uops = { { END_SEND, 0, 0 } } },
[UNARY_NEGATIVE] = { .nuops = 1, .uops = { { UNARY_NEGATIVE, 0, 0 } } },
[UNARY_NOT] = { .nuops = 1, .uops = { { UNARY_NOT, 0, 0 } } },
[UNARY_INVERT] = { .nuops = 1, .uops = { { UNARY_INVERT, 0, 0 } } },
[BINARY_SLICE] = { .nuops = 1, .uops = { { BINARY_SLICE, 0, 0 } } },
[STORE_SLICE] = { .nuops = 1, .uops = { { STORE_SLICE, 0, 0 } } },
[BINARY_SUBSCR_LIST_INT] = { .nuops = 1, .uops = { { BINARY_SUBSCR_LIST_INT, 0, 0 } } },
[BINARY_SUBSCR_TUPLE_INT] = { .nuops = 1, .uops = { { BINARY_SUBSCR_TUPLE_INT, 0, 0 } } },
[BINARY_SUBSCR_DICT] = { .nuops = 1, .uops = { { BINARY_SUBSCR_DICT, 0, 0 } } },
[LIST_APPEND] = { .nuops = 1, .uops = { { LIST_APPEND, 0, 0 } } },
[SET_ADD] = { .nuops = 1, .uops = { { SET_ADD, 0, 0 } } },
[STORE_SUBSCR_LIST_INT] = { .nuops = 1, .uops = { { STORE_SUBSCR_LIST_INT, 0, 0 } } },
[STORE_SUBSCR_DICT] = { .nuops = 1, .uops = { { STORE_SUBSCR_DICT, 0, 0 } } },
[DELETE_SUBSCR] = { .nuops = 1, .uops = { { DELETE_SUBSCR, 0, 0 } } },
[CALL_INTRINSIC_1] = { .nuops = 1, .uops = { { CALL_INTRINSIC_1, 0, 0 } } },
[CALL_INTRINSIC_2] = { .nuops = 1, .uops = { { CALL_INTRINSIC_2, 0, 0 } } },
[GET_AITER] = { .nuops = 1, .uops = { { GET_AITER, 0, 0 } } },
[GET_ANEXT] = { .nuops = 1, .uops = { { GET_ANEXT, 0, 0 } } },
[GET_AWAITABLE] = { .nuops = 1, .uops = { { GET_AWAITABLE, 0, 0 } } },
[POP_EXCEPT] = { .nuops = 1, .uops = { { POP_EXCEPT, 0, 0 } } },
[LOAD_ASSERTION_ERROR] = { .nuops = 1, .uops = { { LOAD_ASSERTION_ERROR, 0, 0 } } },
[LOAD_BUILD_CLASS] = { .nuops = 1, .uops = { { LOAD_BUILD_CLASS, 0, 0 } } },
[STORE_NAME] = { .nuops = 1, .uops = { { STORE_NAME, 0, 0 } } },
[DELETE_NAME] = { .nuops = 1, .uops = { { DELETE_NAME, 0, 0 } } },
[UNPACK_SEQUENCE_TWO_TUPLE] = { .nuops = 1, .uops = { { UNPACK_SEQUENCE_TWO_TUPLE, 0, 0 } } },
[UNPACK_SEQUENCE_TUPLE] = { .nuops = 1, .uops = { { UNPACK_SEQUENCE_TUPLE, 0, 0 } } },
[UNPACK_SEQUENCE_LIST] = { .nuops = 1, .uops = { { UNPACK_SEQUENCE_LIST, 0, 0 } } },
[UNPACK_EX] = { .nuops = 1, .uops = { { UNPACK_EX, 0, 0 } } },
[DELETE_ATTR] = { .nuops = 1, .uops = { { DELETE_ATTR, 0, 0 } } },
[STORE_GLOBAL] = { .nuops = 1, .uops = { { STORE_GLOBAL, 0, 0 } } },
[DELETE_GLOBAL] = { .nuops = 1, .uops = { { DELETE_GLOBAL, 0, 0 } } },
[DELETE_DEREF] = { .nuops = 1, .uops = { { DELETE_DEREF, 0, 0 } } },
[LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { LOAD_FROM_DICT_OR_DEREF, 0, 0 } } },
[LOAD_DEREF] = { .nuops = 1, .uops = { { LOAD_DEREF, 0, 0 } } },
[STORE_DEREF] = { .nuops = 1, .uops = { { STORE_DEREF, 0, 0 } } },
[COPY_FREE_VARS] = { .nuops = 1, .uops = { { COPY_FREE_VARS, 0, 0 } } },
[BUILD_STRING] = { .nuops = 1, .uops = { { BUILD_STRING, 0, 0 } } },
[BUILD_TUPLE] = { .nuops = 1, .uops = { { BUILD_TUPLE, 0, 0 } } },
[BUILD_LIST] = { .nuops = 1, .uops = { { BUILD_LIST, 0, 0 } } },
[LIST_EXTEND] = { .nuops = 1, .uops = { { LIST_EXTEND, 0, 0 } } },
[SET_UPDATE] = { .nuops = 1, .uops = { { SET_UPDATE, 0, 0 } } },
[BUILD_SET] = { .nuops = 1, .uops = { { BUILD_SET, 0, 0 } } },
[BUILD_MAP] = { .nuops = 1, .uops = { { BUILD_MAP, 0, 0 } } },
[SETUP_ANNOTATIONS] = { .nuops = 1, .uops = { { SETUP_ANNOTATIONS, 0, 0 } } },
[BUILD_CONST_KEY_MAP] = { .nuops = 1, .uops = { { BUILD_CONST_KEY_MAP, 0, 0 } } },
[DICT_UPDATE] = { .nuops = 1, .uops = { { DICT_UPDATE, 0, 0 } } },
[DICT_MERGE] = { .nuops = 1, .uops = { { DICT_MERGE, 0, 0 } } },
[MAP_ADD] = { .nuops = 1, .uops = { { MAP_ADD, 0, 0 } } },
[LOAD_SUPER_ATTR_ATTR] = { .nuops = 1, .uops = { { LOAD_SUPER_ATTR_ATTR, 0, 0 } } },
[LOAD_SUPER_ATTR_METHOD] = { .nuops = 1, .uops = { { LOAD_SUPER_ATTR_METHOD, 0, 0 } } },
[COMPARE_OP_FLOAT] = { .nuops = 1, .uops = { { COMPARE_OP_FLOAT, 0, 0 } } },
[COMPARE_OP_INT] = { .nuops = 1, .uops = { { COMPARE_OP_INT, 0, 0 } } },
[COMPARE_OP_STR] = { .nuops = 1, .uops = { { COMPARE_OP_STR, 0, 0 } } },
[IS_OP] = { .nuops = 1, .uops = { { IS_OP, 0, 0 } } },
[CONTAINS_OP] = { .nuops = 1, .uops = { { CONTAINS_OP, 0, 0 } } },
[CHECK_EG_MATCH] = { .nuops = 1, .uops = { { CHECK_EG_MATCH, 0, 0 } } },
[CHECK_EXC_MATCH] = { .nuops = 1, .uops = { { CHECK_EXC_MATCH, 0, 0 } } },
[GET_LEN] = { .nuops = 1, .uops = { { GET_LEN, 0, 0 } } },
[MATCH_CLASS] = { .nuops = 1, .uops = { { MATCH_CLASS, 0, 0 } } },
[MATCH_MAPPING] = { .nuops = 1, .uops = { { MATCH_MAPPING, 0, 0 } } },
[MATCH_SEQUENCE] = { .nuops = 1, .uops = { { MATCH_SEQUENCE, 0, 0 } } },
[MATCH_KEYS] = { .nuops = 1, .uops = { { MATCH_KEYS, 0, 0 } } },
[GET_ITER] = { .nuops = 1, .uops = { { GET_ITER, 0, 0 } } },
[GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } },
[WITH_EXCEPT_START] = { .nuops = 1, .uops = { { WITH_EXCEPT_START, 0, 0 } } },
[PUSH_EXC_INFO] = { .nuops = 1, .uops = { { PUSH_EXC_INFO, 0, 0 } } },
[EXIT_INIT_CHECK] = { .nuops = 1, .uops = { { EXIT_INIT_CHECK, 0, 0 } } },
[MAKE_FUNCTION] = { .nuops = 1, .uops = { { MAKE_FUNCTION, 0, 0 } } },
[SET_FUNCTION_ATTRIBUTE] = { .nuops = 1, .uops = { { SET_FUNCTION_ATTRIBUTE, 0, 0 } } },
[BUILD_SLICE] = { .nuops = 1, .uops = { { BUILD_SLICE, 0, 0 } } },
[CONVERT_VALUE] = { .nuops = 1, .uops = { { CONVERT_VALUE, 0, 0 } } },
[FORMAT_SIMPLE] = { .nuops = 1, .uops = { { FORMAT_SIMPLE, 0, 0 } } },
[FORMAT_WITH_SPEC] = { .nuops = 1, .uops = { { FORMAT_WITH_SPEC, 0, 0 } } },
[COPY] = { .nuops = 1, .uops = { { COPY, 0, 0 } } },
[SWAP] = { .nuops = 1, .uops = { { SWAP, 0, 0 } } },
};
#endif

View file

@ -3,7 +3,9 @@
#include "opcode.h"
#include "pycore_interp.h"
#include "pycore_opcode.h"
#include "opcode_metadata.h"
#include "pycore_pystate.h"
#include "pycore_uops.h"
#include "cpython/optimizer.h"
#include <stdbool.h>
#include <stdint.h>
@ -278,3 +280,200 @@ PyUnstable_Optimizer_NewCounter(void)
opt->count = 0;
return (PyObject *)opt;
}
///////////////////// Experimental UOp Optimizer /////////////////////
#ifdef Py_DEBUG
/* For debugging the interpreter: */
# define LLTRACE 1 /* Low-level trace feature */
#endif
static void
uop_dealloc(_PyUOpExecutorObject *self) {
PyObject_Free(self);
}
static PyTypeObject UOpExecutor_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "uop_executor",
.tp_basicsize = sizeof(_PyUOpExecutorObject),
.tp_itemsize = 0,
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
.tp_dealloc = (destructor)uop_dealloc,
};
static int
translate_bytecode_to_trace(
PyCodeObject *code,
_Py_CODEUNIT *instr,
_PyUOpInstruction *trace,
int max_length)
{
#ifdef LLTRACE
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
int lltrace = 0;
if (uop_debug != NULL && *uop_debug >= '0') {
lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
}
if (lltrace >= 4) {
fprintf(stderr,
"Optimizing %s (%s:%d) at offset %ld\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
}
#define ADD_TO_TRACE(OPCODE, OPERAND) \
if (lltrace >= 2) { \
const char *opname = (OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : ""; \
fprintf(stderr, " ADD_TO_TRACE(%s %d, %" PRIu64 ")\n", opname, (OPCODE), (uint64_t)(OPERAND)); \
} \
trace[trace_length].opcode = (OPCODE); \
trace[trace_length].operand = (OPERAND); \
trace_length++;
#else
#define ADD_TO_TRACE(OPCODE, OPERAND) \
trace[trace_length].opcode = (OPCODE); \
trace[trace_length].operand = (OPERAND); \
trace_length++;
#endif
int trace_length = 0;
// Always reserve space for one uop, plus SET_UP, plus EXIT_TRACE
while (trace_length + 3 <= max_length) {
int opcode = instr->op.code;
uint64_t operand = instr->op.arg;
switch (opcode) {
case LOAD_FAST_LOAD_FAST:
{
// Reserve space for two uops (+ SETUP + EXIT_TRACE)
if (trace_length + 4 > max_length) {
goto done;
}
uint64_t oparg1 = operand >> 4;
uint64_t oparg2 = operand & 15;
ADD_TO_TRACE(LOAD_FAST, oparg1);
ADD_TO_TRACE(LOAD_FAST, oparg2);
break;
}
default:
{
const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
if (expansion->nuops > 0) {
// Reserve space for nuops (+ SETUP + EXIT_TRACE)
int nuops = expansion->nuops;
if (trace_length + nuops + 2 > max_length) {
goto done;
}
for (int i = 0; i < nuops; i++) {
int offset = expansion->uops[i].offset;
switch (expansion->uops[i].size) {
case 0:
break;
case 1:
operand = read_u16(&instr[offset].cache);
break;
case 2:
operand = read_u32(&instr[offset].cache);
break;
case 4:
operand = read_u64(&instr[offset].cache);
break;
default:
fprintf(stderr,
"opcode=%d, operand=%" PRIu64 "; nuops=%d, i=%d; size=%d, offset=%d\n",
opcode, operand, nuops, i,
expansion->uops[i].size,
expansion->uops[i].offset);
Py_FatalError("garbled expansion");
}
ADD_TO_TRACE(expansion->uops[i].uop, operand);
assert(expansion->uops[0].size == 0); // TODO
}
break;
}
// fprintf(stderr, "Unsupported opcode %d\n", opcode);
goto done; // Break out of while loop
}
}
instr++;
// Add cache size for opcode
instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
ADD_TO_TRACE(SET_IP, (int)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
}
done:
if (trace_length > 0) {
ADD_TO_TRACE(EXIT_TRACE, 0);
#ifdef LLTRACE
if (lltrace >= 1) {
fprintf(stderr,
"Created a trace for %s (%s:%d) at offset %ld -- length %d\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive),
trace_length);
}
#endif
}
else {
#ifdef LLTRACE
if (lltrace >= 4) {
fprintf(stderr,
"No trace for %s (%s:%d) at offset %ld\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
}
#endif
}
return trace_length;
#undef ADD_TO_TRACE
}
static int
uop_optimize(
_PyOptimizerObject *self,
PyCodeObject *code,
_Py_CODEUNIT *instr,
_PyExecutorObject **exec_ptr)
{
_PyUOpInstruction trace[_Py_UOP_MAX_TRACE_LENGTH];
int trace_length = translate_bytecode_to_trace(code, instr, trace, _Py_UOP_MAX_TRACE_LENGTH);
if (trace_length <= 0) {
// Error or nothing translated
return trace_length;
}
OBJECT_STAT_INC(optimization_traces_created);
_PyUOpExecutorObject *executor = (_PyUOpExecutorObject *)_PyObject_New(&UOpExecutor_Type);
if (executor == NULL) {
return -1;
}
executor->base.execute = _PyUopExecute;
memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction));
*exec_ptr = (_PyExecutorObject *)executor;
return 1;
}
static PyTypeObject UOpOptimizer_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "uop_optimizer",
.tp_basicsize = sizeof(_PyOptimizerObject),
.tp_itemsize = 0,
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
};
PyObject *
PyUnstable_Optimizer_NewUOpOptimizer(void)
{
_PyOptimizerObject *opt = (_PyOptimizerObject *)_PyObject_New(&UOpOptimizer_Type);
if (opt == NULL) {
return NULL;
}
opt->optimize = uop_optimize;
opt->resume_threshold = UINT16_MAX;
opt->backedge_threshold = 0;
return (PyObject *)opt;
}

View file

@ -1181,6 +1181,19 @@ init_interp_main(PyThreadState *tstate)
#endif
}
// Turn on experimental tier 2 (uops-based) optimizer
if (is_main_interp) {
char *envvar = Py_GETENV("PYTHONUOPS");
int enabled = envvar != NULL && *envvar > '0';
if (_Py_get_xoption(&config->xoptions, L"uops") != NULL) {
enabled = 1;
}
if (enabled) {
PyObject *opt = PyUnstable_Optimizer_NewUOpOptimizer();
PyUnstable_SetOptimizer((_PyOptimizerObject *)opt);
}
}
assert(!_PyErr_Occurred(tstate));
return _PyStatus_OK();

View file

@ -195,6 +195,10 @@ print_object_stats(FILE *out, ObjectStats *stats)
fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits);
fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses);
fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->optimization_attempts);
fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->optimization_traces_created);
fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->optimization_traces_executed);
fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->optimization_uops_executed);
}
static void