GH-116422: Tier2 hot/cold splitting (GH-116813)

Splits the "cold" path, deopts and exits, from the "hot" path, reducing the size of most jitted instructions, at the cost of slower exits.
This commit is contained in:
Mark Shannon 2024-03-26 09:35:11 +00:00 committed by GitHub
parent 61599a48f5
commit bf82f77957
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 1662 additions and 1003 deletions

View file

@ -179,7 +179,7 @@ dummy_func(
uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
if (code_version != global_version) {
if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
next_instr = this_instr;
}
@ -206,7 +206,13 @@ dummy_func(
inst(LOAD_FAST_CHECK, (-- value)) {
value = GETLOCAL(oparg);
ERROR_IF(value == NULL, unbound_local_error);
if (value == NULL) {
_PyEval_FormatExcCheckArg(tstate, PyExc_UnboundLocalError,
UNBOUNDLOCAL_ERROR_MSG,
PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg)
);
ERROR_IF(1, error);
}
Py_INCREF(value);
}
@ -275,7 +281,7 @@ dummy_func(
if (PyGen_Check(receiver)) {
PyErr_SetObject(PyExc_StopIteration, value);
if (monitor_stop_iteration(tstate, frame, this_instr)) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
PyErr_SetRaisedException(NULL);
}
@ -290,7 +296,7 @@ dummy_func(
if (PyGen_Check(receiver) || PyCoro_CheckExact(receiver)) {
PyErr_SetObject(PyExc_StopIteration, value);
if (monitor_stop_iteration(tstate, frame, this_instr)) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
PyErr_SetRaisedException(NULL);
}
@ -826,7 +832,7 @@ dummy_func(
int err = _Py_call_instrumentation_arg(
tstate, PY_MONITORING_EVENT_PY_RETURN,
frame, this_instr, retval);
if (err) GOTO_ERROR(error);
if (err) ERROR_NO_POP();
STACK_SHRINK(1);
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -850,7 +856,7 @@ dummy_func(
int err = _Py_call_instrumentation_arg(
tstate, PY_MONITORING_EVENT_PY_RETURN,
frame, this_instr, retval);
if (err) GOTO_ERROR(error);
if (err) ERROR_NO_POP();
Py_INCREF(retval);
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -906,7 +912,7 @@ dummy_func(
if (PyAsyncGen_CheckExact(aiter)) {
awaitable = type->tp_as_async->am_anext(aiter);
if (awaitable == NULL) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
} else {
if (type->tp_as_async != NULL){
@ -916,7 +922,7 @@ dummy_func(
if (getter != NULL) {
next_iter = (*getter)(aiter);
if (next_iter == NULL) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
}
else {
@ -924,7 +930,7 @@ dummy_func(
"'async for' requires an iterator with "
"__anext__ method, got %.100s",
type->tp_name);
GOTO_ERROR(error);
ERROR_NO_POP();
}
awaitable = _PyCoro_GetAwaitableIter(next_iter);
@ -936,7 +942,7 @@ dummy_func(
Py_TYPE(next_iter)->tp_name);
Py_DECREF(next_iter);
GOTO_ERROR(error);
ERROR_NO_POP();
} else {
Py_DECREF(next_iter);
}
@ -1018,7 +1024,7 @@ dummy_func(
JUMPBY(oparg);
}
else {
GOTO_ERROR(error);
ERROR_NO_POP();
}
}
Py_DECREF(v);
@ -1054,7 +1060,7 @@ dummy_func(
int err = _Py_call_instrumentation_arg(
tstate, PY_MONITORING_EVENT_PY_YIELD,
frame, this_instr, retval);
if (err) GOTO_ERROR(error);
if (err) ERROR_NO_POP();
tstate->exc_info = gen->gi_exc_state.previous_item;
gen->gi_exc_state.previous_item = NULL;
_Py_LeaveRecursiveCallPy(tstate);
@ -1108,7 +1114,7 @@ dummy_func(
else {
assert(PyLong_Check(lasti));
_PyErr_SetString(tstate, PyExc_SystemError, "lasti is not an int");
GOTO_ERROR(error);
ERROR_NO_POP();
}
}
assert(exc && PyExceptionInstance_Check(exc));
@ -1184,7 +1190,7 @@ dummy_func(
if (ns == NULL) {
_PyErr_Format(tstate, PyExc_SystemError,
"no locals when deleting %R", name);
GOTO_ERROR(error);
ERROR_NO_POP();
}
err = PyObject_DelItem(ns, name);
// Can't use ERROR_IF here.
@ -1192,7 +1198,7 @@ dummy_func(
_PyEval_FormatExcCheckArg(tstate, PyExc_NameError,
NAME_ERROR_MSG,
name);
GOTO_ERROR(error);
ERROR_NO_POP();
}
}
@ -1312,12 +1318,12 @@ dummy_func(
int err = PyDict_Pop(GLOBALS(), name, NULL);
// Can't use ERROR_IF here.
if (err < 0) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
if (err == 0) {
_PyEval_FormatExcCheckArg(tstate, PyExc_NameError,
NAME_ERROR_MSG, name);
GOTO_ERROR(error);
ERROR_NO_POP();
}
}
@ -1334,21 +1340,21 @@ dummy_func(
inst(LOAD_FROM_DICT_OR_GLOBALS, (mod_or_class_dict -- v)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
if (PyMapping_GetOptionalItem(mod_or_class_dict, name, &v) < 0) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
if (v == NULL) {
if (PyDict_GetItemRef(GLOBALS(), name, &v) < 0) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
if (v == NULL) {
if (PyMapping_GetOptionalItem(BUILTINS(), name, &v) < 0) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
if (v == NULL) {
_PyEval_FormatExcCheckArg(
tstate, PyExc_NameError,
NAME_ERROR_MSG, name);
GOTO_ERROR(error);
ERROR_NO_POP();
}
}
}
@ -1364,21 +1370,21 @@ dummy_func(
}
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
if (PyMapping_GetOptionalItem(mod_or_class_dict, name, &v) < 0) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
if (v == NULL) {
if (PyDict_GetItemRef(GLOBALS(), name, &v) < 0) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
if (v == NULL) {
if (PyMapping_GetOptionalItem(BUILTINS(), name, &v) < 0) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
if (v == NULL) {
_PyEval_FormatExcCheckArg(
tstate, PyExc_NameError,
NAME_ERROR_MSG, name);
GOTO_ERROR(error);
ERROR_NO_POP();
}
}
}
@ -1494,7 +1500,13 @@ dummy_func(
inst(DELETE_FAST, (--)) {
PyObject *v = GETLOCAL(oparg);
ERROR_IF(v == NULL, unbound_local_error);
if (v == NULL) {
_PyEval_FormatExcCheckArg(tstate, PyExc_UnboundLocalError,
UNBOUNDLOCAL_ERROR_MSG,
PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg)
);
ERROR_IF(1, error);
}
SETLOCAL(oparg, NULL);
}
@ -1504,7 +1516,7 @@ dummy_func(
PyObject *initial = GETLOCAL(oparg);
PyObject *cell = PyCell_New(initial);
if (cell == NULL) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
SETLOCAL(oparg, cell);
}
@ -1516,7 +1528,7 @@ dummy_func(
// Fortunately we don't need its superpower.
if (oldobj == NULL) {
_PyEval_FormatExcUnbound(tstate, _PyFrame_GetCode(frame), oparg);
GOTO_ERROR(error);
ERROR_NO_POP();
}
PyCell_SET(cell, NULL);
Py_DECREF(oldobj);
@ -1528,14 +1540,14 @@ dummy_func(
assert(oparg >= 0 && oparg < _PyFrame_GetCode(frame)->co_nlocalsplus);
name = PyTuple_GET_ITEM(_PyFrame_GetCode(frame)->co_localsplusnames, oparg);
if (PyMapping_GetOptionalItem(class_dict, name, &value) < 0) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
if (!value) {
PyObject *cell = GETLOCAL(oparg);
value = PyCell_GET(cell);
if (value == NULL) {
_PyEval_FormatExcUnbound(tstate, _PyFrame_GetCode(frame), oparg);
GOTO_ERROR(error);
ERROR_NO_POP();
}
Py_INCREF(value);
}
@ -1615,7 +1627,7 @@ dummy_func(
inst(BUILD_SET, (values[oparg] -- set)) {
set = PySet_New(NULL);
if (set == NULL)
GOTO_ERROR(error);
ERROR_NO_POP();
int err = 0;
for (int i = 0; i < oparg; i++) {
PyObject *item = values[i];
@ -1662,12 +1674,8 @@ dummy_func(
}
inst(BUILD_CONST_KEY_MAP, (values[oparg], keys -- map)) {
if (!PyTuple_CheckExact(keys) ||
PyTuple_GET_SIZE(keys) != (Py_ssize_t)oparg) {
_PyErr_SetString(tstate, PyExc_SystemError,
"bad BUILD_CONST_KEY_MAP keys argument");
GOTO_ERROR(error); // Pop the keys and values.
}
assert(PyTuple_CheckExact(keys));
assert(PyTuple_GET_SIZE(keys) == (Py_ssize_t)oparg);
map = _PyDict_FromItems(
&PyTuple_GET_ITEM(keys, 0), 1,
values, 1, oparg);
@ -2502,7 +2510,7 @@ dummy_func(
_PyErr_SetString(tstate, PyExc_TypeError,
"cannot 'yield from' a coroutine object "
"in a non-coroutine generator");
GOTO_ERROR(error);
ERROR_NO_POP();
}
iter = iterable;
}
@ -2513,7 +2521,7 @@ dummy_func(
/* `iterable` is not a generator. */
iter = PyObject_GetIter(iterable);
if (iter == NULL) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
DECREF_INPUTS();
}
@ -2550,7 +2558,7 @@ dummy_func(
if (next == NULL) {
if (_PyErr_Occurred(tstate)) {
if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
monitor_raise(tstate, frame, this_instr);
_PyErr_Clear(tstate);
@ -2573,7 +2581,7 @@ dummy_func(
if (next == NULL) {
if (_PyErr_Occurred(tstate)) {
if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
_PyErr_Clear(tstate);
}
@ -2599,7 +2607,7 @@ dummy_func(
else {
if (_PyErr_Occurred(tstate)) {
if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
monitor_raise(tstate, frame, this_instr);
_PyErr_Clear(tstate);
@ -2779,7 +2787,7 @@ dummy_func(
"asynchronous context manager protocol",
Py_TYPE(mgr)->tp_name);
}
GOTO_ERROR(error);
ERROR_NO_POP();
}
exit = _PyObject_LookupSpecial(mgr, &_Py_ID(__aexit__));
if (exit == NULL) {
@ -2791,7 +2799,7 @@ dummy_func(
Py_TYPE(mgr)->tp_name);
}
Py_DECREF(enter);
GOTO_ERROR(error);
ERROR_NO_POP();
}
DECREF_INPUTS();
res = PyObject_CallNoArgs(enter);
@ -2814,7 +2822,7 @@ dummy_func(
"context manager protocol",
Py_TYPE(mgr)->tp_name);
}
GOTO_ERROR(error);
ERROR_NO_POP();
}
exit = _PyObject_LookupSpecial(mgr, &_Py_ID(__exit__));
if (exit == NULL) {
@ -2826,7 +2834,7 @@ dummy_func(
Py_TYPE(mgr)->tp_name);
}
Py_DECREF(enter);
GOTO_ERROR(error);
ERROR_NO_POP();
}
DECREF_INPUTS();
res = PyObject_CallNoArgs(enter);
@ -3075,7 +3083,7 @@ dummy_func(
// The frame has stolen all the arguments from the stack,
// so there is no need to clean them up.
if (new_frame == NULL) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
frame->return_offset = (uint16_t)(next_instr - this_instr);
DISPATCH_INLINED(new_frame);
@ -3298,7 +3306,7 @@ dummy_func(
STAT_INC(CALL, hit);
PyObject *self = _PyType_NewManagedObject(tp);
if (self == NULL) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
Py_DECREF(tp);
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
@ -3335,7 +3343,7 @@ dummy_func(
PyErr_Format(PyExc_TypeError,
"__init__() should return None, not '%.200s'",
Py_TYPE(should_be_none)->tp_name);
GOTO_ERROR(error);
ERROR_NO_POP();
}
}
@ -3472,7 +3480,7 @@ dummy_func(
PyObject *arg = args[0];
Py_ssize_t len_i = PyObject_Length(arg);
if (len_i < 0) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
res = PyLong_FromSsize_t(len_i);
assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
@ -3498,7 +3506,7 @@ dummy_func(
PyObject *inst = args[0];
int retval = PyObject_IsInstance(inst, cls);
if (retval < 0) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
res = PyBool_FromLong(retval);
assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
@ -3712,7 +3720,7 @@ dummy_func(
// The frame has stolen all the arguments from the stack,
// so there is no need to clean them up.
if (new_frame == NULL) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
assert(next_instr - this_instr == 1);
frame->return_offset = 1;
@ -3760,11 +3768,11 @@ dummy_func(
assert(kwargs == NULL || PyDict_CheckExact(kwargs));
if (!PyTuple_CheckExact(callargs)) {
if (check_args_iterable(tstate, func, callargs) < 0) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
PyObject *tuple = PySequence_Tuple(callargs);
if (tuple == NULL) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
Py_SETREF(callargs, tuple);
}
@ -3776,7 +3784,7 @@ dummy_func(
int err = _Py_call_instrumentation_2args(
tstate, PY_MONITORING_EVENT_CALL,
frame, this_instr, func, arg);
if (err) GOTO_ERROR(error);
if (err) ERROR_NO_POP();
result = PyObject_Call(func, callargs, kwargs);
if (!PyFunction_Check(func) && !PyMethod_Check(func)) {
@ -3810,7 +3818,7 @@ dummy_func(
// Need to manually shrink the stack since we exit with DISPATCH_INLINED.
STACK_SHRINK(oparg + 3);
if (new_frame == NULL) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
assert(next_instr - this_instr == 1);
frame->return_offset = 1;
@ -3831,7 +3839,7 @@ dummy_func(
Py_DECREF(codeobj);
if (func_obj == NULL) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
_PyFunction_SetVersion(
@ -3871,7 +3879,7 @@ dummy_func(
PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func);
if (gen == NULL) {
GOTO_ERROR(error);
ERROR_NO_POP();
}
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -4169,7 +4177,7 @@ dummy_func(
if (optimized < 0) {
Py_DECREF(previous);
tstate->previous_executor = Py_None;
ERROR_IF(1, error);
GOTO_UNWIND();
}
GOTO_TIER_ONE(target);
}
@ -4199,6 +4207,19 @@ dummy_func(
frame->instr_ptr = (_Py_CODEUNIT *)instr_ptr;
}
tier2 op(_DEOPT, (--)) {
EXIT_TO_TIER1();
}
tier2 op(_SIDE_EXIT, (--)) {
EXIT_TO_TRACE();
}
tier2 op(_ERROR_POP_N, (unused[oparg] --)) {
SYNC_SP();
GOTO_UNWIND();
}
// END BYTECODES //
}

View file

@ -642,7 +642,6 @@ int _Py_CheckRecursiveCallPy(
return 0;
}
static const _Py_CODEUNIT _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS[] = {
/* Put a NOP at the start, so that the IP points into
* the code, rather than before it */
@ -850,15 +849,6 @@ resume_frame:
or goto error. */
Py_UNREACHABLE();
unbound_local_error:
{
_PyEval_FormatExcCheckArg(tstate, PyExc_UnboundLocalError,
UNBOUNDLOCAL_ERROR_MSG,
PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg)
);
goto error;
}
pop_4_error:
STACK_SHRINK(1);
pop_3_error:
@ -980,12 +970,6 @@ enter_tier_two:
#undef GOTO_ERROR
#define GOTO_ERROR(LABEL) goto LABEL ## _tier_two
#undef DEOPT_IF
#define DEOPT_IF(COND, INSTNAME) \
if ((COND)) { \
goto deoptimize;\
}
#ifdef Py_STATS
// Disable these macros that apply to Tier 1 stats when we are in Tier 2
#undef STAT_INC
@ -1013,6 +997,7 @@ enter_tier_two:
#endif
assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT);
tier2_dispatch:
for (;;) {
uopcode = next_uop->opcode;
#ifdef Py_DEBUG
@ -1054,24 +1039,7 @@ enter_tier_two:
}
}
// Jump here from ERROR_IF(..., unbound_local_error)
unbound_local_error_tier_two:
_PyEval_FormatExcCheckArg(tstate, PyExc_UnboundLocalError,
UNBOUNDLOCAL_ERROR_MSG,
PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg)
);
goto error_tier_two;
// JUMP to any of these from ERROR_IF(..., error)
pop_4_error_tier_two:
STACK_SHRINK(1);
pop_3_error_tier_two:
STACK_SHRINK(1);
pop_2_error_tier_two:
STACK_SHRINK(1);
pop_1_error_tier_two:
STACK_SHRINK(1);
error_tier_two:
jump_to_error_target:
#ifdef Py_DEBUG
if (lltrace >= 2) {
printf("Error: [UOp ");
@ -1081,15 +1049,28 @@ error_tier_two:
_PyOpcode_OpName[frame->instr_ptr->op.code]);
}
#endif
assert (next_uop[-1].format == UOP_FORMAT_JUMP);
uint16_t target = uop_get_error_target(&next_uop[-1]);
next_uop = current_executor->trace + target;
goto tier2_dispatch;
error_tier_two:
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
assert(next_uop[-1].format == UOP_FORMAT_TARGET);
frame->return_offset = 0; // Don't leave this random
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(current_executor);
tstate->previous_executor = NULL;
goto resume_with_error;
// Jump here from DEOPT_IF()
deoptimize:
jump_to_jump_target:
assert(next_uop[-1].format == UOP_FORMAT_JUMP);
target = uop_get_jump_target(&next_uop[-1]);
next_uop = current_executor->trace + target;
goto tier2_dispatch;
exit_to_tier1:
assert(next_uop[-1].format == UOP_FORMAT_TARGET);
next_instr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame));
#ifdef Py_DEBUG
if (lltrace >= 2) {
@ -1105,8 +1086,8 @@ deoptimize:
tstate->previous_executor = NULL;
DISPATCH();
// Jump here from EXIT_IF()
side_exit:
exit_to_trace:
assert(next_uop[-1].format == UOP_FORMAT_EXIT);
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
UOP_STAT_INC(uopcode, miss);
uint32_t exit_index = next_uop[-1].exit_index;

View file

@ -423,3 +423,9 @@ do { \
#define CURRENT_OPARG() (next_uop[-1].oparg)
#define CURRENT_OPERAND() (next_uop[-1].operand)
#define JUMP_TO_JUMP_TARGET() goto jump_to_jump_target
#define JUMP_TO_ERROR() goto jump_to_error_target
#define GOTO_UNWIND() goto error_tier_two
#define EXIT_TO_TRACE() goto exit_to_trace
#define EXIT_TO_TIER1() goto exit_to_tier1

File diff suppressed because it is too large Load diff

View file

@ -25,7 +25,7 @@
"asynchronous context manager protocol",
Py_TYPE(mgr)->tp_name);
}
GOTO_ERROR(error);
goto error;
}
exit = _PyObject_LookupSpecial(mgr, &_Py_ID(__aexit__));
if (exit == NULL) {
@ -37,7 +37,7 @@
Py_TYPE(mgr)->tp_name);
}
Py_DECREF(enter);
GOTO_ERROR(error);
goto error;
}
Py_DECREF(mgr);
res = PyObject_CallNoArgs(enter);
@ -71,7 +71,7 @@
"context manager protocol",
Py_TYPE(mgr)->tp_name);
}
GOTO_ERROR(error);
goto error;
}
exit = _PyObject_LookupSpecial(mgr, &_Py_ID(__exit__));
if (exit == NULL) {
@ -83,7 +83,7 @@
Py_TYPE(mgr)->tp_name);
}
Py_DECREF(enter);
GOTO_ERROR(error);
goto error;
}
Py_DECREF(mgr);
res = PyObject_CallNoArgs(enter);
@ -605,12 +605,8 @@
PyObject *map;
keys = stack_pointer[-1];
values = &stack_pointer[-1 - oparg];
if (!PyTuple_CheckExact(keys) ||
PyTuple_GET_SIZE(keys) != (Py_ssize_t)oparg) {
_PyErr_SetString(tstate, PyExc_SystemError,
"bad BUILD_CONST_KEY_MAP keys argument");
GOTO_ERROR(error); // Pop the keys and values.
}
assert(PyTuple_CheckExact(keys));
assert(PyTuple_GET_SIZE(keys) == (Py_ssize_t)oparg);
map = _PyDict_FromItems(
&PyTuple_GET_ITEM(keys, 0), 1,
values, 1, oparg);
@ -667,7 +663,7 @@
values = &stack_pointer[-oparg];
set = PySet_New(NULL);
if (set == NULL)
GOTO_ERROR(error);
goto error;
int err = 0;
for (int i = 0; i < oparg; i++) {
PyObject *item = values[i];
@ -808,7 +804,7 @@
// The frame has stolen all the arguments from the stack,
// so there is no need to clean them up.
if (new_frame == NULL) {
GOTO_ERROR(error);
goto error;
}
frame->return_offset = (uint16_t)(next_instr - this_instr);
DISPATCH_INLINED(new_frame);
@ -882,7 +878,7 @@
STAT_INC(CALL, hit);
PyObject *self = _PyType_NewManagedObject(tp);
if (self == NULL) {
GOTO_ERROR(error);
goto error;
}
Py_DECREF(tp);
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
@ -1213,11 +1209,11 @@
assert(kwargs == NULL || PyDict_CheckExact(kwargs));
if (!PyTuple_CheckExact(callargs)) {
if (check_args_iterable(tstate, func, callargs) < 0) {
GOTO_ERROR(error);
goto error;
}
PyObject *tuple = PySequence_Tuple(callargs);
if (tuple == NULL) {
GOTO_ERROR(error);
goto error;
}
Py_SETREF(callargs, tuple);
}
@ -1229,7 +1225,7 @@
int err = _Py_call_instrumentation_2args(
tstate, PY_MONITORING_EVENT_CALL,
frame, this_instr, func, arg);
if (err) GOTO_ERROR(error);
if (err) goto error;
result = PyObject_Call(func, callargs, kwargs);
if (!PyFunction_Check(func) && !PyMethod_Check(func)) {
if (result == NULL) {
@ -1261,7 +1257,7 @@
// Need to manually shrink the stack since we exit with DISPATCH_INLINED.
STACK_SHRINK(oparg + 3);
if (new_frame == NULL) {
GOTO_ERROR(error);
goto error;
}
assert(next_instr - this_instr == 1);
frame->return_offset = 1;
@ -1342,7 +1338,7 @@
PyObject *inst = args[0];
int retval = PyObject_IsInstance(inst, cls);
if (retval < 0) {
GOTO_ERROR(error);
goto error;
}
res = PyBool_FromLong(retval);
assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
@ -1407,7 +1403,7 @@
// The frame has stolen all the arguments from the stack,
// so there is no need to clean them up.
if (new_frame == NULL) {
GOTO_ERROR(error);
goto error;
}
assert(next_instr - this_instr == 1);
frame->return_offset = 1;
@ -1475,7 +1471,7 @@
PyObject *arg = args[0];
Py_ssize_t len_i = PyObject_Length(arg);
if (len_i < 0) {
GOTO_ERROR(error);
goto error;
}
res = PyLong_FromSsize_t(len_i);
assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
@ -2329,7 +2325,7 @@
// Fortunately we don't need its superpower.
if (oldobj == NULL) {
_PyEval_FormatExcUnbound(tstate, _PyFrame_GetCode(frame), oparg);
GOTO_ERROR(error);
goto error;
}
PyCell_SET(cell, NULL);
Py_DECREF(oldobj);
@ -2341,7 +2337,13 @@
next_instr += 1;
INSTRUCTION_STATS(DELETE_FAST);
PyObject *v = GETLOCAL(oparg);
if (v == NULL) goto unbound_local_error;
if (v == NULL) {
_PyEval_FormatExcCheckArg(tstate, PyExc_UnboundLocalError,
UNBOUNDLOCAL_ERROR_MSG,
PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg)
);
if (1) goto error;
}
SETLOCAL(oparg, NULL);
DISPATCH();
}
@ -2354,12 +2356,12 @@
int err = PyDict_Pop(GLOBALS(), name, NULL);
// Can't use ERROR_IF here.
if (err < 0) {
GOTO_ERROR(error);
goto error;
}
if (err == 0) {
_PyEval_FormatExcCheckArg(tstate, PyExc_NameError,
NAME_ERROR_MSG, name);
GOTO_ERROR(error);
goto error;
}
DISPATCH();
}
@ -2374,7 +2376,7 @@
if (ns == NULL) {
_PyErr_Format(tstate, PyExc_SystemError,
"no locals when deleting %R", name);
GOTO_ERROR(error);
goto error;
}
err = PyObject_DelItem(ns, name);
// Can't use ERROR_IF here.
@ -2382,7 +2384,7 @@
_PyEval_FormatExcCheckArg(tstate, PyExc_NameError,
NAME_ERROR_MSG,
name);
GOTO_ERROR(error);
goto error;
}
DISPATCH();
}
@ -2523,7 +2525,7 @@
PyErr_Format(PyExc_TypeError,
"__init__() should return None, not '%.200s'",
Py_TYPE(should_be_none)->tp_name);
GOTO_ERROR(error);
goto error;
}
stack_pointer += -1;
DISPATCH();
@ -2610,7 +2612,7 @@
if (next == NULL) {
if (_PyErr_Occurred(tstate)) {
if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) {
GOTO_ERROR(error);
goto error;
}
monitor_raise(tstate, frame, this_instr);
_PyErr_Clear(tstate);
@ -2841,7 +2843,7 @@
if (PyAsyncGen_CheckExact(aiter)) {
awaitable = type->tp_as_async->am_anext(aiter);
if (awaitable == NULL) {
GOTO_ERROR(error);
goto error;
}
} else {
if (type->tp_as_async != NULL){
@ -2850,7 +2852,7 @@
if (getter != NULL) {
next_iter = (*getter)(aiter);
if (next_iter == NULL) {
GOTO_ERROR(error);
goto error;
}
}
else {
@ -2858,7 +2860,7 @@
"'async for' requires an iterator with "
"__anext__ method, got %.100s",
type->tp_name);
GOTO_ERROR(error);
goto error;
}
awaitable = _PyCoro_GetAwaitableIter(next_iter);
if (awaitable == NULL) {
@ -2868,7 +2870,7 @@
"from __anext__: %.100s",
Py_TYPE(next_iter)->tp_name);
Py_DECREF(next_iter);
GOTO_ERROR(error);
goto error;
} else {
Py_DECREF(next_iter);
}
@ -2956,7 +2958,7 @@
_PyErr_SetString(tstate, PyExc_TypeError,
"cannot 'yield from' a coroutine object "
"in a non-coroutine generator");
GOTO_ERROR(error);
goto error;
}
iter = iterable;
}
@ -2967,7 +2969,7 @@
/* `iterable` is not a generator. */
iter = PyObject_GetIter(iterable);
if (iter == NULL) {
GOTO_ERROR(error);
goto error;
}
Py_DECREF(iterable);
}
@ -3066,7 +3068,7 @@
if (PyGen_Check(receiver)) {
PyErr_SetObject(PyExc_StopIteration, value);
if (monitor_stop_iteration(tstate, frame, this_instr)) {
GOTO_ERROR(error);
goto error;
}
PyErr_SetRaisedException(NULL);
}
@ -3087,7 +3089,7 @@
if (PyGen_Check(receiver) || PyCoro_CheckExact(receiver)) {
PyErr_SetObject(PyExc_StopIteration, value);
if (monitor_stop_iteration(tstate, frame, this_instr)) {
GOTO_ERROR(error);
goto error;
}
PyErr_SetRaisedException(NULL);
}
@ -3113,7 +3115,7 @@
else {
if (_PyErr_Occurred(tstate)) {
if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) {
GOTO_ERROR(error);
goto error;
}
monitor_raise(tstate, frame, this_instr);
_PyErr_Clear(tstate);
@ -3268,7 +3270,7 @@
uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
if (code_version != global_version) {
if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) {
GOTO_ERROR(error);
goto error;
}
next_instr = this_instr;
}
@ -3299,7 +3301,7 @@
int err = _Py_call_instrumentation_arg(
tstate, PY_MONITORING_EVENT_PY_RETURN,
frame, this_instr, retval);
if (err) GOTO_ERROR(error);
if (err) goto error;
Py_INCREF(retval);
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -3324,7 +3326,7 @@
int err = _Py_call_instrumentation_arg(
tstate, PY_MONITORING_EVENT_PY_RETURN,
frame, this_instr, retval);
if (err) GOTO_ERROR(error);
if (err) goto error;
STACK_SHRINK(1);
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -3356,7 +3358,7 @@
int err = _Py_call_instrumentation_arg(
tstate, PY_MONITORING_EVENT_PY_YIELD,
frame, this_instr, retval);
if (err) GOTO_ERROR(error);
if (err) goto error;
tstate->exc_info = gen->gi_exc_state.previous_item;
gen->gi_exc_state.previous_item = NULL;
_Py_LeaveRecursiveCallPy(tstate);
@ -4138,7 +4140,13 @@
INSTRUCTION_STATS(LOAD_FAST_CHECK);
PyObject *value;
value = GETLOCAL(oparg);
if (value == NULL) goto unbound_local_error;
if (value == NULL) {
_PyEval_FormatExcCheckArg(tstate, PyExc_UnboundLocalError,
UNBOUNDLOCAL_ERROR_MSG,
PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg)
);
if (1) goto error;
}
Py_INCREF(value);
stack_pointer[0] = value;
stack_pointer += 1;
@ -4175,14 +4183,14 @@
assert(oparg >= 0 && oparg < _PyFrame_GetCode(frame)->co_nlocalsplus);
name = PyTuple_GET_ITEM(_PyFrame_GetCode(frame)->co_localsplusnames, oparg);
if (PyMapping_GetOptionalItem(class_dict, name, &value) < 0) {
GOTO_ERROR(error);
goto error;
}
if (!value) {
PyObject *cell = GETLOCAL(oparg);
value = PyCell_GET(cell);
if (value == NULL) {
_PyEval_FormatExcUnbound(tstate, _PyFrame_GetCode(frame), oparg);
GOTO_ERROR(error);
goto error;
}
Py_INCREF(value);
}
@ -4200,21 +4208,21 @@
mod_or_class_dict = stack_pointer[-1];
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
if (PyMapping_GetOptionalItem(mod_or_class_dict, name, &v) < 0) {
GOTO_ERROR(error);
goto error;
}
if (v == NULL) {
if (PyDict_GetItemRef(GLOBALS(), name, &v) < 0) {
GOTO_ERROR(error);
goto error;
}
if (v == NULL) {
if (PyMapping_GetOptionalItem(BUILTINS(), name, &v) < 0) {
GOTO_ERROR(error);
goto error;
}
if (v == NULL) {
_PyEval_FormatExcCheckArg(
tstate, PyExc_NameError,
NAME_ERROR_MSG, name);
GOTO_ERROR(error);
goto error;
}
}
}
@ -4398,21 +4406,21 @@
}
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
if (PyMapping_GetOptionalItem(mod_or_class_dict, name, &v) < 0) {
GOTO_ERROR(error);
goto error;
}
if (v == NULL) {
if (PyDict_GetItemRef(GLOBALS(), name, &v) < 0) {
GOTO_ERROR(error);
goto error;
}
if (v == NULL) {
if (PyMapping_GetOptionalItem(BUILTINS(), name, &v) < 0) {
GOTO_ERROR(error);
goto error;
}
if (v == NULL) {
_PyEval_FormatExcCheckArg(
tstate, PyExc_NameError,
NAME_ERROR_MSG, name);
GOTO_ERROR(error);
goto error;
}
}
}
@ -4574,7 +4582,7 @@
PyObject *initial = GETLOCAL(oparg);
PyObject *cell = PyCell_New(initial);
if (cell == NULL) {
GOTO_ERROR(error);
goto error;
}
SETLOCAL(oparg, cell);
DISPATCH();
@ -4591,7 +4599,7 @@
PyFunction_New(codeobj, GLOBALS());
Py_DECREF(codeobj);
if (func_obj == NULL) {
GOTO_ERROR(error);
goto error;
}
_PyFunction_SetVersion(
func_obj, ((PyCodeObject *)codeobj)->co_version);
@ -4910,7 +4918,7 @@
else {
assert(PyLong_Check(lasti));
_PyErr_SetString(tstate, PyExc_SystemError, "lasti is not an int");
GOTO_ERROR(error);
goto error;
}
}
assert(exc && PyExceptionInstance_Check(exc));
@ -5017,7 +5025,7 @@
PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func);
if (gen == NULL) {
GOTO_ERROR(error);
goto error;
}
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
@ -5126,7 +5134,7 @@
JUMPBY(oparg);
}
else {
GOTO_ERROR(error);
goto error;
}
}
Py_DECREF(v);

View file

@ -381,11 +381,13 @@ int
_PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size_t length)
{
// Loop once to find the total compiled size:
size_t code_size = 0;
size_t data_size = 0;
uint32_t instruction_starts[UOP_MAX_TRACE_LENGTH];
uint32_t code_size = 0;
uint32_t data_size = 0;
for (size_t i = 0; i < length; i++) {
_PyUOpInstruction *instruction = (_PyUOpInstruction *)&trace[i];
const StencilGroup *group = &stencil_groups[instruction->opcode];
instruction_starts[i] = code_size;
code_size += group->code.body_size;
data_size += group->data.body_size;
}
@ -403,11 +405,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size
// Loop again to emit the code:
unsigned char *code = memory;
unsigned char *data = memory + code_size;
unsigned char *top = code;
if (trace[0].opcode == _START_EXECUTOR) {
// Don't want to execute this more than once:
top += stencil_groups[_START_EXECUTOR].code.body_size;
}
assert(trace[0].opcode == _START_EXECUTOR || trace[0].opcode == _COLD_EXIT);
for (size_t i = 0; i < length; i++) {
_PyUOpInstruction *instruction = (_PyUOpInstruction *)&trace[i];
const StencilGroup *group = &stencil_groups[instruction->opcode];
@ -419,8 +417,29 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size
patches[HoleValue_EXECUTOR] = (uint64_t)executor;
patches[HoleValue_OPARG] = instruction->oparg;
patches[HoleValue_OPERAND] = instruction->operand;
patches[HoleValue_TARGET] = instruction->target;
patches[HoleValue_TOP] = (uint64_t)top;
switch (instruction->format) {
case UOP_FORMAT_TARGET:
patches[HoleValue_TARGET] = instruction->target;
break;
case UOP_FORMAT_EXIT:
assert(instruction->exit_index < executor->exit_count);
patches[HoleValue_EXIT_INDEX] = instruction->exit_index;
if (instruction->error_target < length) {
patches[HoleValue_ERROR_TARGET] = (uint64_t)memory + instruction_starts[instruction->error_target];
}
break;
case UOP_FORMAT_JUMP:
assert(instruction->jump_target < length);
patches[HoleValue_JUMP_TARGET] = (uint64_t)memory + instruction_starts[instruction->jump_target];
if (instruction->error_target < length) {
patches[HoleValue_ERROR_TARGET] = (uint64_t)memory + instruction_starts[instruction->error_target];
}
break;
default:
assert(0);
Py_FatalError("Illegal instruction format");
}
patches[HoleValue_TOP] = (uint64_t)memory + instruction_starts[1];
patches[HoleValue_ZERO] = 0;
emit(group, patches);
code += group->code.body_size;

View file

@ -154,13 +154,19 @@ PyUnstable_GetOptimizer(void)
}
static _PyExecutorObject *
make_executor_from_uops(_PyUOpInstruction *buffer, const _PyBloomFilter *dependencies);
make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies);
static int
init_cold_exit_executor(_PyExecutorObject *executor, int oparg);
/* It is impossible for the number of exits to reach 1/4 of the total length,
* as the number of exits cannot reach 1/3 of the number of non-exits, due to
* the presence of CHECK_VALIDITY checks and instructions to produce the values
* being checked in exits. */
#define COLD_EXIT_COUNT (UOP_MAX_TRACE_LENGTH/4)
static int cold_exits_initialized = 0;
static _PyExecutorObject COLD_EXITS[UOP_MAX_TRACE_LENGTH] = { 0 };
static _PyExecutorObject COLD_EXITS[COLD_EXIT_COUNT] = { 0 };
static const _PyBloomFilter EMPTY_FILTER = { 0 };
@ -172,7 +178,7 @@ _Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer)
}
else if (cold_exits_initialized == 0) {
cold_exits_initialized = 1;
for (int i = 0; i < UOP_MAX_TRACE_LENGTH; i++) {
for (int i = 0; i < COLD_EXIT_COUNT; i++) {
if (init_cold_exit_executor(&COLD_EXITS[i], i)) {
return NULL;
}
@ -313,10 +319,33 @@ _PyUOpPrint(const _PyUOpInstruction *uop)
else {
printf("%s", name);
}
printf(" (%d, target=%d, operand=%#" PRIx64 ")",
uop->oparg,
uop->target,
(uint64_t)uop->operand);
switch(uop->format) {
case UOP_FORMAT_TARGET:
printf(" (%d, target=%d, operand=%#" PRIx64,
uop->oparg,
uop->target,
(uint64_t)uop->operand);
break;
case UOP_FORMAT_JUMP:
printf(" (%d, jump_target=%d, operand=%#" PRIx64,
uop->oparg,
uop->jump_target,
(uint64_t)uop->operand);
break;
case UOP_FORMAT_EXIT:
printf(" (%d, exit_index=%d, operand=%#" PRIx64,
uop->oparg,
uop->exit_index,
(uint64_t)uop->operand);
break;
default:
printf(" (%d, Unknown format)", uop->oparg);
}
if (_PyUop_Flags[uop->opcode] & HAS_ERROR_FLAG) {
printf(", error_target=%d", uop->error_target);
}
printf(")");
}
#endif
@ -432,28 +461,36 @@ BRANCH_TO_GUARD[4][2] = {
#endif
// Beware: Macro arg order differs from struct member order
static inline int
add_to_trace(
_PyUOpInstruction *trace,
int trace_length,
uint16_t opcode,
uint16_t oparg,
uint64_t operand,
uint32_t target)
{
trace[trace_length].opcode = opcode;
trace[trace_length].format = UOP_FORMAT_TARGET;
trace[trace_length].target = target;
trace[trace_length].oparg = oparg;
trace[trace_length].operand = operand;
return trace_length + 1;
}
#ifdef Py_DEBUG
#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \
assert(trace_length < max_length); \
trace[trace_length].opcode = (OPCODE); \
trace[trace_length].oparg = (OPARG); \
trace[trace_length].target = (TARGET); \
trace[trace_length].operand = (OPERAND); \
trace_length = add_to_trace(trace, trace_length, (OPCODE), (OPARG), (OPERAND), (TARGET)); \
if (lltrace >= 2) { \
printf("%4d ADD_TO_TRACE: ", trace_length); \
_PyUOpPrint(&trace[trace_length]); \
_PyUOpPrint(&trace[trace_length-1]); \
printf("\n"); \
} \
trace_length++;
}
#else
#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \
assert(trace_length < max_length); \
trace[trace_length].opcode = (OPCODE); \
trace[trace_length].oparg = (OPARG); \
trace[trace_length].target = (TARGET); \
trace[trace_length].operand = (OPERAND); \
trace_length++;
trace_length = add_to_trace(trace, trace_length, (OPCODE), (OPARG), (OPERAND), (TARGET));
#endif
#define INSTR_IP(INSTR, CODE) \
@ -476,8 +513,7 @@ BRANCH_TO_GUARD[4][2] = {
if (trace_stack_depth >= TRACE_STACK_SIZE) { \
DPRINTF(2, "Trace stack overflow\n"); \
OPT_STAT_INC(trace_stack_overflow); \
ADD_TO_TRACE(uop, oparg, operand, target); \
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); \
trace_length = 0; \
goto done; \
} \
assert(func == NULL || func->func_code == (PyObject *)code); \
@ -495,7 +531,7 @@ BRANCH_TO_GUARD[4][2] = {
assert(func == NULL || func->func_code == (PyObject *)code); \
instr = trace_stack[trace_stack_depth].instr;
/* Returns 1 on success,
/* Returns the length of the trace on success,
* 0 if it failed to produce a worthwhile trace,
* and -1 on an error.
*/
@ -515,7 +551,8 @@ translate_bytecode_to_trace(
_Py_BloomFilter_Add(dependencies, initial_code);
_Py_CODEUNIT *initial_instr = instr;
int trace_length = 0;
int max_length = buffer_size;
// Leave space for possible trailing _EXIT_TRACE
int max_length = buffer_size-2;
struct {
PyFunctionObject *func;
PyCodeObject *code;
@ -538,13 +575,16 @@ translate_bytecode_to_trace(
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code));
ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code));
uint32_t target = 0;
top: // Jump here after _PUSH_FRAME or likely branches
for (;;) {
target = INSTR_IP(instr, code);
RESERVE_RAW(2, "epilogue"); // Always need space for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE
RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP");
ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target);
// Need space for _DEOPT
max_length--;
uint32_t opcode = instr->op.code;
uint32_t oparg = instr->op.arg;
@ -582,13 +622,22 @@ top: // Jump here after _PUSH_FRAME or likely branches
continue;
}
else {
if (OPCODE_HAS_DEOPT(opcode)) {
if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) {
opcode = _PyOpcode_Deopt[opcode];
}
assert(!OPCODE_HAS_EXIT(opcode));
assert(!OPCODE_HAS_DEOPT(opcode));
}
}
if (OPCODE_HAS_EXIT(opcode)) {
// Make space for exit code
max_length--;
}
if (OPCODE_HAS_ERROR(opcode)) {
// Make space for error code
max_length--;
}
switch (opcode) {
case POP_JUMP_IF_NONE:
case POP_JUMP_IF_NOT_NONE:
@ -624,10 +673,10 @@ top: // Jump here after _PUSH_FRAME or likely branches
DPRINTF(2, "Jump likely (%04x = %d bits), continue at byte offset %d\n",
instr[1].cache, bitcount, 2 * INSTR_IP(target_instr, code));
instr = target_instr;
ADD_TO_TRACE(uopcode, max_length, 0, INSTR_IP(next_instr, code));
ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(next_instr, code));
goto top;
}
ADD_TO_TRACE(uopcode, max_length, 0, INSTR_IP(target_instr, code));
ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(target_instr, code));
break;
}
@ -849,7 +898,9 @@ done:
progress_needed ? "no progress" : "too short");
return 0;
}
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
if (trace[trace_length-1].opcode != _JUMP_TO_TOP) {
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
}
DPRINTF(1,
"Created a proto-trace for %s (%s:%d) at byte offset %d -- length %d\n",
PyUnicode_AsUTF8(code->co_qualname),
@ -857,8 +908,8 @@ done:
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code),
trace_length);
OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
return 1;
OPT_HIST(trace_length, trace_length_hist);
return trace_length;
}
#undef RESERVE
@ -871,43 +922,86 @@ done:
#define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31)))
#define BIT_IS_SET(array, bit) (array[(bit)>>5] & (1<<((bit)&31)))
/* Count the number of used uops, and mark them in the bit vector `used`.
* This can be done in a single pass using simple reachability analysis,
* as there are no backward jumps.
* NOPs are excluded from the count.
/* Count the number of unused uops and exits
*/
static int
compute_used(_PyUOpInstruction *buffer, uint32_t *used, int *exit_count_ptr)
count_exits(_PyUOpInstruction *buffer, int length)
{
int count = 0;
int exit_count = 0;
SET_BIT(used, 0);
for (int i = 0; i < UOP_MAX_TRACE_LENGTH; i++) {
if (!BIT_IS_SET(used, i)) {
continue;
}
count++;
for (int i = 0; i < length; i++) {
int opcode = buffer[i].opcode;
if (_PyUop_Flags[opcode] & HAS_EXIT_FLAG) {
if (opcode == _SIDE_EXIT) {
exit_count++;
}
if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) {
continue;
}
/* All other micro-ops fall through, so i+1 is reachable */
SET_BIT(used, i+1);
assert(opcode <= MAX_UOP_ID);
if (_PyUop_Flags[opcode] & HAS_JUMP_FLAG) {
/* Mark target as reachable */
SET_BIT(used, buffer[i].oparg);
}
if (opcode == NOP) {
count--;
UNSET_BIT(used, i);
}
return exit_count;
}
static void make_exit(_PyUOpInstruction *inst, int opcode, int target)
{
inst->opcode = opcode;
inst->oparg = 0;
inst->format = UOP_FORMAT_TARGET;
inst->target = target;
}
/* Convert implicit exits, errors and deopts
* into explicit ones. */
static int
prepare_for_execution(_PyUOpInstruction *buffer, int length)
{
int32_t current_jump = -1;
int32_t current_jump_target = -1;
int32_t current_error = -1;
int32_t current_error_target = -1;
int32_t current_popped = -1;
/* Leaving in NOPs slows down the interpreter and messes up the stats */
_PyUOpInstruction *copy_to = &buffer[0];
for (int i = 0; i < length; i++) {
_PyUOpInstruction *inst = &buffer[i];
if (inst->opcode != _NOP) {
if (copy_to != inst) {
*copy_to = *inst;
}
copy_to++;
}
}
*exit_count_ptr = exit_count;
return count;
length = (int)(copy_to - buffer);
int next_spare = length;
for (int i = 0; i < length; i++) {
_PyUOpInstruction *inst = &buffer[i];
int opcode = inst->opcode;
int32_t target = (int32_t)uop_get_target(inst);
if (_PyUop_Flags[opcode] & (HAS_EXIT_FLAG | HAS_DEOPT_FLAG)) {
if (target != current_jump_target) {
uint16_t exit_op = (_PyUop_Flags[opcode] & HAS_EXIT_FLAG) ? _SIDE_EXIT : _DEOPT;
make_exit(&buffer[next_spare], exit_op, target);
current_jump_target = target;
current_jump = next_spare;
next_spare++;
}
buffer[i].jump_target = current_jump;
buffer[i].format = UOP_FORMAT_JUMP;
}
if (_PyUop_Flags[opcode] & HAS_ERROR_FLAG) {
int popped = (_PyUop_Flags[opcode] & HAS_ERROR_NO_POP_FLAG) ?
0 : _PyUop_num_popped(opcode, inst->oparg);
if (target != current_error_target || popped != current_popped) {
current_popped = popped;
current_error = next_spare;
current_error_target = target;
make_exit(&buffer[next_spare], _ERROR_POP_N, 0);
buffer[next_spare].oparg = popped;
next_spare++;
}
buffer[i].error_target = current_error;
if (buffer[i].format == UOP_FORMAT_TARGET) {
buffer[i].format = UOP_FORMAT_JUMP;
buffer[i].jump_target = 0;
}
}
}
return next_spare;
}
/* Executor side exits */
@ -926,61 +1020,118 @@ allocate_executor(int exit_count, int length)
return res;
}
#ifdef Py_DEBUG
#define CHECK(PRED) \
if (!(PRED)) { \
printf(#PRED " at %d\n", i); \
assert(0); \
}
static int
target_unused(int opcode)
{
return (_PyUop_Flags[opcode] & (HAS_ERROR_FLAG | HAS_EXIT_FLAG | HAS_DEOPT_FLAG)) == 0;
}
static void
sanity_check(_PyExecutorObject *executor)
{
for (uint32_t i = 0; i < executor->exit_count; i++) {
_PyExitData *exit = &executor->exits[i];
CHECK(exit->target < (1 << 25));
}
bool ended = false;
uint32_t i = 0;
CHECK(executor->trace[0].opcode == _START_EXECUTOR || executor->trace[0].opcode == _COLD_EXIT);
for (; i < executor->code_size; i++) {
const _PyUOpInstruction *inst = &executor->trace[i];
uint16_t opcode = inst->opcode;
CHECK(opcode <= MAX_UOP_ID);
CHECK(_PyOpcode_uop_name[opcode] != NULL);
switch(inst->format) {
case UOP_FORMAT_TARGET:
CHECK(target_unused(opcode));
break;
case UOP_FORMAT_EXIT:
CHECK(opcode == _SIDE_EXIT);
CHECK(inst->exit_index < executor->exit_count);
break;
case UOP_FORMAT_JUMP:
CHECK(inst->jump_target < executor->code_size);
break;
case UOP_FORMAT_UNUSED:
CHECK(0);
break;
}
if (_PyUop_Flags[opcode] & HAS_ERROR_FLAG) {
CHECK(inst->format == UOP_FORMAT_JUMP);
CHECK(inst->error_target < executor->code_size);
}
if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE || opcode == _COLD_EXIT) {
ended = true;
i++;
break;
}
}
CHECK(ended);
for (; i < executor->code_size; i++) {
const _PyUOpInstruction *inst = &executor->trace[i];
uint16_t opcode = inst->opcode;
CHECK(
opcode == _DEOPT ||
opcode == _SIDE_EXIT ||
opcode == _ERROR_POP_N);
if (opcode == _SIDE_EXIT) {
CHECK(inst->format == UOP_FORMAT_EXIT);
}
}
}
#undef CHECK
#endif
/* Makes an executor from a buffer of uops.
* Account for the buffer having gaps and NOPs by computing a "used"
* bit vector and only copying the used uops. Here "used" means reachable
* and not a NOP.
*/
static _PyExecutorObject *
make_executor_from_uops(_PyUOpInstruction *buffer, const _PyBloomFilter *dependencies)
make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies)
{
uint32_t used[(UOP_MAX_TRACE_LENGTH + 31)/32] = { 0 };
int exit_count;
int length = compute_used(buffer, used, &exit_count);
length += 1; // For _START_EXECUTOR
int exit_count = count_exits(buffer, length);
_PyExecutorObject *executor = allocate_executor(exit_count, length);
if (executor == NULL) {
return NULL;
}
OPT_HIST(length, optimized_trace_length_hist);
/* Initialize exits */
assert(exit_count < COLD_EXIT_COUNT);
for (int i = 0; i < exit_count; i++) {
executor->exits[i].executor = &COLD_EXITS[i];
executor->exits[i].temperature = 0;
}
int next_exit = exit_count-1;
_PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length-1];
/* Scan backwards, so that we see the destinations of jumps before the jumps themselves. */
for (int i = UOP_MAX_TRACE_LENGTH-1; i >= 0; i--) {
if (!BIT_IS_SET(used, i)) {
continue;
}
*dest = buffer[i];
_PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length];
assert(buffer[0].opcode == _START_EXECUTOR);
buffer[0].operand = (uint64_t)executor;
for (int i = length-1; i >= 0; i--) {
int opcode = buffer[i].opcode;
if (opcode == _POP_JUMP_IF_FALSE ||
opcode == _POP_JUMP_IF_TRUE)
{
/* The oparg of the target will already have been set to its new offset */
int oparg = dest->oparg;
dest->oparg = buffer[oparg].oparg;
}
if (_PyUop_Flags[opcode] & HAS_EXIT_FLAG) {
dest--;
*dest = buffer[i];
assert(opcode != _POP_JUMP_IF_FALSE && opcode != _POP_JUMP_IF_TRUE);
if (opcode == _SIDE_EXIT) {
executor->exits[next_exit].target = buffer[i].target;
dest->exit_index = next_exit;
dest->format = UOP_FORMAT_EXIT;
next_exit--;
}
/* Set the oparg to be the destination offset,
* so that we can set the oparg of earlier jumps correctly. */
buffer[i].oparg = (uint16_t)(dest - executor->trace);
dest--;
}
assert(next_exit == -1);
assert(dest == executor->trace);
dest->opcode = _START_EXECUTOR;
assert(dest->opcode == _START_EXECUTOR);
dest->oparg = 0;
dest->target = 0;
dest->operand = (uintptr_t)executor;
_Py_ExecutorInit(executor, dependencies);
#ifdef Py_DEBUG
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
@ -996,6 +1147,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, const _PyBloomFilter *depende
printf("\n");
}
}
sanity_check(executor);
#endif
#ifdef _Py_JIT
executor->jit_code = NULL;
@ -1024,6 +1176,9 @@ init_cold_exit_executor(_PyExecutorObject *executor, int oparg)
for (int i = 0; i < BLOOM_FILTER_WORDS; i++) {
assert(executor->vm_data.bloom.bits[i] == 0);
}
#ifdef Py_DEBUG
sanity_check(executor);
#endif
#ifdef _Py_JIT
executor->jit_code = NULL;
executor->jit_size = 0;
@ -1034,6 +1189,28 @@ init_cold_exit_executor(_PyExecutorObject *executor, int oparg)
return 0;
}
#ifdef Py_STATS
/* Returns the effective trace length.
* Ignores NOPs and trailing exit and error handling.*/
int effective_trace_length(_PyUOpInstruction *buffer, int length)
{
int nop_count = 0;
for (int i = 0; i < length; i++) {
int opcode = buffer[i].opcode;
if (opcode == _NOP) {
nop_count++;
}
if (opcode == _EXIT_TRACE ||
opcode == _JUMP_TO_TOP ||
opcode == _COLD_EXIT) {
return i+1-nop_count;
}
}
Py_FatalError("No terminating instruction");
Py_UNREACHABLE();
}
#endif
static int
uop_optimize(
_PyOptimizerObject *self,
@ -1046,24 +1223,26 @@ uop_optimize(
_Py_BloomFilter_Init(&dependencies);
_PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH];
OPT_STAT_INC(attempts);
int err = translate_bytecode_to_trace(frame, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies);
if (err <= 0) {
int length = translate_bytecode_to_trace(frame, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies);
if (length <= 0) {
// Error or nothing translated
return err;
return length;
}
assert(length < UOP_MAX_TRACE_LENGTH);
OPT_STAT_INC(traces_created);
char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE");
if (env_var == NULL || *env_var == '\0' || *env_var > '0') {
err = _Py_uop_analyze_and_optimize(frame, buffer,
UOP_MAX_TRACE_LENGTH,
length = _Py_uop_analyze_and_optimize(frame, buffer,
length,
curr_stackentries, &dependencies);
if (err <= 0) {
return err;
if (length <= 0) {
return length;
}
}
assert(err == 1);
assert(length < UOP_MAX_TRACE_LENGTH);
assert(length >= 1);
/* Fix up */
for (int pc = 0; pc < UOP_MAX_TRACE_LENGTH; pc++) {
for (int pc = 0; pc < length; pc++) {
int opcode = buffer[pc].opcode;
int oparg = buffer[pc].oparg;
if (_PyUop_Flags[opcode] & HAS_OPARG_AND_1_FLAG) {
@ -1078,10 +1257,14 @@ uop_optimize(
assert(_PyOpcode_uop_name[buffer[pc].opcode]);
assert(strncmp(_PyOpcode_uop_name[buffer[pc].opcode], _PyOpcode_uop_name[opcode], strlen(_PyOpcode_uop_name[opcode])) == 0);
}
_PyExecutorObject *executor = make_executor_from_uops(buffer, &dependencies);
OPT_HIST(effective_trace_length(buffer, length), optimized_trace_length_hist);
length = prepare_for_execution(buffer, length);
assert(length <= UOP_MAX_TRACE_LENGTH);
_PyExecutorObject *executor = make_executor_from_uops(buffer, length, &dependencies);
if (executor == NULL) {
return -1;
}
assert(length <= UOP_MAX_TRACE_LENGTH);
*exec_ptr = executor;
return 1;
}
@ -1156,12 +1339,14 @@ counter_optimize(
return 0;
}
_Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[JUMP_BACKWARD] - oparg;
_PyUOpInstruction buffer[3] = {
_PyUOpInstruction buffer[5] = {
{ .opcode = _START_EXECUTOR },
{ .opcode = _LOAD_CONST_INLINE_BORROW, .operand = (uintptr_t)self },
{ .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
{ .opcode = _EXIT_TRACE, .target = (uint32_t)(target - _PyCode_CODE(code)) }
{ .opcode = _EXIT_TRACE, .jump_target = 4, .format=UOP_FORMAT_JUMP },
{ .opcode = _SIDE_EXIT, .target = (uint32_t)(target - _PyCode_CODE(code)), .format=UOP_FORMAT_TARGET }
};
_PyExecutorObject *executor = make_executor_from_uops(buffer, &EMPTY_FILTER);
_PyExecutorObject *executor = make_executor_from_uops(buffer, 5, &EMPTY_FILTER);
if (executor == NULL) {
return -1;
}

View file

@ -387,9 +387,9 @@ optimize_uops(
ctx->curr_frame_depth++;
ctx->frame = frame;
for (_PyUOpInstruction *this_instr = trace;
this_instr < trace + trace_len && !op_is_end(this_instr->opcode);
this_instr++) {
_PyUOpInstruction *this_instr = NULL;
for (int i = 0; i < trace_len; i++) {
this_instr = &trace[i];
int oparg = this_instr->oparg;
opcode = this_instr->opcode;
@ -416,9 +416,8 @@ optimize_uops(
ctx->frame->stack_pointer = stack_pointer;
assert(STACK_LEVEL() >= 0);
}
_Py_uop_abstractcontext_fini(ctx);
return 1;
return trace_len;
out_of_space:
DPRINTF(3, "\n");
@ -447,11 +446,11 @@ done:
/* Cannot optimize further, but there would be no benefit
* in retrying later */
_Py_uop_abstractcontext_fini(ctx);
return 1;
return trace_len;
}
static void
static int
remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
{
/* Remove _SET_IP and _CHECK_VALIDITY where possible.
@ -506,7 +505,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
}
case _JUMP_TO_TOP:
case _EXIT_TRACE:
return;
return pc + 1;
default:
{
bool needs_ip = false;
@ -530,6 +529,8 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
}
}
}
Py_FatalError("No terminating instruction");
Py_UNREACHABLE();
}
static void
@ -582,43 +583,36 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s
// 0 - failure, no error raised, just fall back to Tier 1
// -1 - failure, and raise error
// 1 - optimizer success
// > 0 - length of optimized trace
int
_Py_uop_analyze_and_optimize(
_PyInterpreterFrame *frame,
_PyUOpInstruction *buffer,
int buffer_size,
int length,
int curr_stacklen,
_PyBloomFilter *dependencies
)
{
OPT_STAT_INC(optimizer_attempts);
int err = remove_globals(frame, buffer, buffer_size, dependencies);
if (err == 0) {
goto not_ready;
}
if (err < 0) {
goto error;
int err = remove_globals(frame, buffer, length, dependencies);
if (err <= 0) {
return err;
}
peephole_opt(frame, buffer, buffer_size);
peephole_opt(frame, buffer, length);
err = optimize_uops(
length = optimize_uops(
_PyFrame_GetCode(frame), buffer,
buffer_size, curr_stacklen, dependencies);
length, curr_stacklen, dependencies);
if (err == 0) {
goto not_ready;
if (length <= 0) {
return length;
}
assert(err == 1);
remove_unneeded_uops(buffer, buffer_size);
length = remove_unneeded_uops(buffer, length);
assert(length > 0);
OPT_STAT_INC(optimizer_successes);
return 1;
not_ready:
return 0;
error:
return -1;
return length;
}

View file

@ -769,14 +769,7 @@
break;
}
case _LOAD_NAME: {
_Py_UopsSymbol *v;
v = sym_new_not_null(ctx);
if (v == NULL) goto out_of_space;
stack_pointer[0] = v;
stack_pointer += 1;
break;
}
/* _LOAD_NAME is not a viable micro-op for tier 2 */
case _LOAD_GLOBAL: {
_Py_UopsSymbol *res;
@ -900,14 +893,7 @@
break;
}
case _BUILD_SET: {
_Py_UopsSymbol *set;
set = sym_new_not_null(ctx);
if (set == NULL) goto out_of_space;
stack_pointer[-oparg] = set;
stack_pointer += 1 - oparg;
break;
}
/* _BUILD_SET is not a viable micro-op for tier 2 */
case _BUILD_MAP: {
_Py_UopsSymbol *map;
@ -1408,31 +1394,9 @@
/* _FOR_ITER_GEN is not a viable micro-op for tier 2 */
case _BEFORE_ASYNC_WITH: {
_Py_UopsSymbol *exit;
_Py_UopsSymbol *res;
exit = sym_new_not_null(ctx);
if (exit == NULL) goto out_of_space;
res = sym_new_not_null(ctx);
if (res == NULL) goto out_of_space;
stack_pointer[-1] = exit;
stack_pointer[0] = res;
stack_pointer += 1;
break;
}
/* _BEFORE_ASYNC_WITH is not a viable micro-op for tier 2 */
case _BEFORE_WITH: {
_Py_UopsSymbol *exit;
_Py_UopsSymbol *res;
exit = sym_new_not_null(ctx);
if (exit == NULL) goto out_of_space;
res = sym_new_not_null(ctx);
if (res == NULL) goto out_of_space;
stack_pointer[-1] = exit;
stack_pointer[0] = res;
stack_pointer += 1;
break;
}
/* _BEFORE_WITH is not a viable micro-op for tier 2 */
case _WITH_EXCEPT_START: {
_Py_UopsSymbol *res;
@ -2029,3 +1993,16 @@
break;
}
case _DEOPT: {
break;
}
case _SIDE_EXIT: {
break;
}
case _ERROR_POP_N: {
stack_pointer += -oparg;
break;
}