gh-104584: Baby steps towards generating and executing traces (#105924)

Added a new, experimental, tracing optimizer and interpreter (a.k.a. "tier 2"). This currently pessimizes, so don't use yet -- this is infrastructure so we can experiment with optimizing passes. To enable it, pass ``-Xuops`` or set ``PYTHONUOPS=1``. To get debug output, set ``PYTHONUOPSDEBUG=N`` where ``N`` is a debug level (0-4, where 0 is no debug output and 4 is excessively verbose).

All of this code is likely to change dramatically before the 3.13 feature freeze. But this is a first step.
This commit is contained in:
Guido van Rossum 2023-06-26 19:02:57 -07:00 committed by GitHub
parent d3af83b934
commit 51fc725117
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 2559 additions and 305 deletions

View file

@ -22,6 +22,7 @@
#include "pycore_sysmodule.h" // _PySys_Audit()
#include "pycore_tuple.h" // _PyTuple_ITEMS()
#include "pycore_typeobject.h" // _PySuper_Lookup()
#include "pycore_uops.h" // _PyUOpExecutorObject
#include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS
#include "pycore_dict.h"
@ -223,14 +224,6 @@ _PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func,
static void
_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
typedef PyObject *(*convertion_func_ptr)(PyObject *);
static const convertion_func_ptr CONVERSION_FUNCTIONS[4] = {
[FVC_STR] = PyObject_Str,
[FVC_REPR] = PyObject_Repr,
[FVC_ASCII] = PyObject_ASCII
};
#define UNBOUNDLOCAL_ERROR_MSG \
"cannot access local variable '%s' where it is not associated with a value"
#define UNBOUNDFREE_ERROR_MSG \
@ -2771,3 +2764,131 @@ void Py_LeaveRecursiveCall(void)
{
_Py_LeaveRecursiveCall();
}
///////////////////// Experimental UOp Interpreter /////////////////////
// UPDATE_MISS_STATS (called by DEOPT_IF) uses next_instr
// TODO: Make it do something useful
#undef UPDATE_MISS_STATS
#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
_PyInterpreterFrame *
_PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject **stack_pointer)
{
#ifdef LLTRACE
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
int lltrace = 0;
if (uop_debug != NULL && *uop_debug >= '0') {
lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
}
if (lltrace >= 2) {
PyCodeObject *code = _PyFrame_GetCode(frame);
_Py_CODEUNIT *instr = frame->prev_instr + 1;
fprintf(stderr,
"Entering _PyUopExecute for %s (%s:%d) at offset %ld\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
}
#endif
PyThreadState *tstate = _PyThreadState_GET();
_PyUOpExecutorObject *self = (_PyUOpExecutorObject *)executor;
// Equivalent to CHECK_EVAL_BREAKER()
_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY();
if (_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker)) {
if (_Py_HandlePending(tstate) != 0) {
goto error;
}
}
OBJECT_STAT_INC(optimization_traces_executed);
_Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive - 1;
int pc = 0;
int opcode;
uint64_t operand;
int oparg;
for (;;) {
opcode = self->trace[pc].opcode;
operand = self->trace[pc].operand;
oparg = (int)operand;
#ifdef LLTRACE
if (lltrace >= 3) {
const char *opname = opcode < 256 ? _PyOpcode_OpName[opcode] : "";
int stack_level = (int)(stack_pointer - _PyFrame_Stackbase(frame));
fprintf(stderr, " uop %s %d, operand %" PRIu64 ", stack_level %d\n",
opname, opcode, operand, stack_level);
}
#endif
pc++;
OBJECT_STAT_INC(optimization_uops_executed);
switch (opcode) {
#undef ENABLE_SPECIALIZATION
#define ENABLE_SPECIALIZATION 0
#include "executor_cases.c.h"
case SET_IP:
{
frame->prev_instr = ip_offset + oparg;
break;
}
case EXIT_TRACE:
{
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
return frame;
}
default:
{
fprintf(stderr, "Unknown uop %d, operand %" PRIu64 "\n", opcode, operand);
Py_FatalError("Unknown uop");
abort(); // Unreachable
for (;;) {}
// Really unreachable
}
}
}
pop_4_error:
STACK_SHRINK(1);
pop_3_error:
STACK_SHRINK(1);
pop_2_error:
STACK_SHRINK(1);
pop_1_error:
STACK_SHRINK(1);
error:
// On ERROR_IF we return NULL as the frame.
// The caller recovers the frame from cframe.current_frame.
#ifdef LLTRACE
if (lltrace >= 2) {
fprintf(stderr, "Error: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
}
#endif
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
return NULL;
PREDICTED(UNPACK_SEQUENCE)
PREDICTED(COMPARE_OP)
PREDICTED(LOAD_SUPER_ATTR)
PREDICTED(STORE_SUBSCR)
PREDICTED(BINARY_SUBSCR)
PREDICTED(BINARY_OP)
// On DEOPT_IF we just repeat the last instruction.
// This presumes nothing was popped from the stack (nor pushed).
#ifdef LLTRACE
if (lltrace >= 2) {
fprintf(stderr, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
}
#endif
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
return frame;
}