GH-133231: Changes to executor management to support proposed sys._jit module (GH-133287)

* Track the current executor, not the previous one, on the thread-state. 

* Batch executors for deallocation to avoid having to constantly incref executors; this is an ad-hoc form of deferred reference counting.
This commit is contained in:
Mark Shannon 2025-05-04 10:05:35 +01:00 committed by GitHub
parent 1d9406e426
commit ac7d5ba96e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 176 additions and 54 deletions

View file

@ -194,7 +194,7 @@ struct _ts {
/* The thread's exception stack entry. (Always the last entry.) */
_PyErr_StackItem exc_state;
PyObject *previous_executor;
PyObject *current_executor;
uint64_t dict_global_version;

View file

@ -923,6 +923,8 @@ struct _is {
PyObject *common_consts[NUM_COMMON_CONSTANTS];
bool jit;
struct _PyExecutorObject *executor_list_head;
struct _PyExecutorObject *executor_deletion_list_head;
int executor_deletion_list_remaining_capacity;
size_t trace_run_counter;
_rare_events rare_events;
PyDict_WatchCallback builtins_dict_watcher;

View file

@ -1195,7 +1195,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = {
[INSTRUMENTED_RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
[INSTRUMENTED_RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[INSTRUMENTED_YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
[INTERPRETER_EXIT] = { true, INSTR_FMT_IX, 0 },
[INTERPRETER_EXIT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG },
[IS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG },
[JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[JUMP_BACKWARD_JIT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },

View file

@ -69,7 +69,7 @@ typedef struct {
typedef struct {
uint32_t target;
_Py_BackoffCounter temperature;
const struct _PyExecutorObject *executor;
struct _PyExecutorObject *executor;
} _PyExitData;
typedef struct _PyExecutorObject {
@ -84,6 +84,10 @@ typedef struct _PyExecutorObject {
_PyExitData exits[1];
} _PyExecutorObject;
/* If pending deletion list gets large enough, then scan,
* and free any executors that aren't executing
* i.e. any that aren't a thread's current_executor. */
#define EXECUTOR_DELETE_LIST_MAX 100
// Export for '_opcode' shared extension (JIT compiler).
PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset);
@ -304,6 +308,9 @@ static inline int is_terminator(const _PyUOpInstruction *uop)
}
PyAPI_FUNC(int) _PyDumpExecutors(FILE *out);
#ifdef _Py_TIER2
extern void _Py_ClearExecutorDeletionList(PyInterpreterState *interp);
#endif
#ifdef __cplusplus
}

View file

@ -301,7 +301,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
[_POP_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
[_CHECK_FUNCTION] = HAS_DEOPT_FLAG,
[_START_EXECUTOR] = HAS_ESCAPES_FLAG,
[_START_EXECUTOR] = 0,
[_MAKE_WARM] = 0,
[_FATAL_ERROR] = 0,
[_DEOPT] = 0,

View file

@ -1169,6 +1169,17 @@ dummy_func(
tstate->current_frame = frame->previous;
assert(!_PyErr_Occurred(tstate));
PyObject *result = PyStackRef_AsPyObjectSteal(retval);
#if !Py_TAIL_CALL_INTERP
assert(frame == &entry.frame);
#endif
#ifdef _Py_TIER2
_PyStackRef executor = frame->localsplus[0];
assert(tstate->current_executor == NULL);
if (!PyStackRef_IsNull(executor)) {
tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor);
PyStackRef_CLOSE(executor);
}
#endif
LLTRACE_RESUME_FRAME();
return result;
}
@ -2912,8 +2923,7 @@ dummy_func(
}
else {
this_instr[1].counter = initial_jump_backoff_counter();
assert(tstate->previous_executor == NULL);
tstate->previous_executor = Py_None;
assert(tstate->current_executor == NULL);
GOTO_TIER_TWO(executor);
}
}
@ -2965,7 +2975,7 @@ dummy_func(
assert(executor->vm_data.index == INSTR_OFFSET() - 1);
assert(executor->vm_data.code == code);
assert(executor->vm_data.valid);
assert(tstate->previous_executor == NULL);
assert(tstate->current_executor == NULL);
/* If the eval breaker is set then stay in tier 1.
* This avoids any potentially infinite loops
* involving _RESUME_CHECK */
@ -2978,8 +2988,6 @@ dummy_func(
}
DISPATCH_GOTO();
}
tstate->previous_executor = Py_None;
Py_INCREF(executor);
GOTO_TIER_TWO(executor);
#else
Py_FatalError("ENTER_EXECUTOR is not supported in this build");
@ -5254,7 +5262,6 @@ dummy_func(
exit->temperature = initial_temperature_backoff_counter();
Py_CLEAR(exit->executor);
}
tstate->previous_executor = (PyObject *)current_executor;
if (exit->executor == NULL) {
_Py_BackoffCounter temperature = exit->temperature;
if (!backoff_counter_triggers(temperature)) {
@ -5277,7 +5284,6 @@ dummy_func(
}
exit->executor = executor;
}
Py_INCREF(exit->executor);
GOTO_TIER_TWO(exit->executor);
}
@ -5316,7 +5322,6 @@ dummy_func(
}
tier2 op(_START_EXECUTOR, (executor/4 --)) {
Py_CLEAR(tstate->previous_executor);
#ifndef _Py_JIT
current_executor = (_PyExecutorObject*)executor;
#endif
@ -5337,12 +5342,10 @@ dummy_func(
}
tier2 op(_DEOPT, (--)) {
tstate->previous_executor = (PyObject *)current_executor;
GOTO_TIER_ONE(_PyFrame_GetBytecode(frame) + CURRENT_TARGET());
}
tier2 op(_ERROR_POP_N, (target/2 --)) {
tstate->previous_executor = (PyObject *)current_executor;
assert(oparg == 0);
frame->instr_ptr = _PyFrame_GetBytecode(frame) + target;
SYNC_SP();
@ -5463,6 +5466,17 @@ dummy_func(
if (frame->owner == FRAME_OWNED_BY_INTERPRETER) {
/* Restore previous frame and exit */
tstate->current_frame = frame->previous;
#if !Py_TAIL_CALL_INTERP
assert(frame == &entry.frame);
#endif
#ifdef _Py_TIER2
_PyStackRef executor = frame->localsplus[0];
assert(tstate->current_executor == NULL);
if (!PyStackRef_IsNull(executor)) {
tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor);
PyStackRef_CLOSE(executor);
}
#endif
return NULL;
}
next_instr = frame->instr_ptr;

View file

@ -990,6 +990,11 @@ _PyObjectArray_Free(PyObject **array, PyObject **scratch)
#define DONT_SLP_VECTORIZE
#endif
typedef struct {
_PyInterpreterFrame frame;
_PyStackRef stack[1];
} _PyEntryFrame;
PyObject* _Py_HOT_FUNCTION DONT_SLP_VECTORIZE
_PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag)
{
@ -1009,7 +1014,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
int oparg; /* Current opcode argument, if any */
assert(tstate->current_frame == NULL || tstate->current_frame->stackpointer != NULL);
#endif
_PyInterpreterFrame entry_frame;
_PyEntryFrame entry;
if (_Py_EnterRecursiveCallTstate(tstate, "")) {
assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
@ -1021,30 +1026,37 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
* These are cached values from the frame and code object. */
_Py_CODEUNIT *next_instr;
_PyStackRef *stack_pointer;
entry_frame.localsplus[0] = PyStackRef_NULL;
entry.stack[0] = PyStackRef_NULL;
#ifdef Py_STACKREF_DEBUG
entry_frame.f_funcobj = PyStackRef_None;
entry.frame.f_funcobj = PyStackRef_None;
#elif defined(Py_DEBUG)
/* Set these to invalid but identifiable values for debugging. */
entry_frame.f_funcobj = (_PyStackRef){.bits = 0xaaa0};
entry_frame.f_locals = (PyObject*)0xaaa1;
entry_frame.frame_obj = (PyFrameObject*)0xaaa2;
entry_frame.f_globals = (PyObject*)0xaaa3;
entry_frame.f_builtins = (PyObject*)0xaaa4;
entry.frame.f_funcobj = (_PyStackRef){.bits = 0xaaa0};
entry.frame.f_locals = (PyObject*)0xaaa1;
entry.frame.frame_obj = (PyFrameObject*)0xaaa2;
entry.frame.f_globals = (PyObject*)0xaaa3;
entry.frame.f_builtins = (PyObject*)0xaaa4;
#endif
entry_frame.f_executable = PyStackRef_None;
entry_frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1;
entry_frame.stackpointer = entry_frame.localsplus;
entry_frame.owner = FRAME_OWNED_BY_INTERPRETER;
entry_frame.visited = 0;
entry_frame.return_offset = 0;
entry.frame.f_executable = PyStackRef_None;
entry.frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1;
entry.frame.stackpointer = entry.stack;
entry.frame.owner = FRAME_OWNED_BY_INTERPRETER;
entry.frame.visited = 0;
entry.frame.return_offset = 0;
#ifdef Py_DEBUG
entry_frame.lltrace = 0;
entry.frame.lltrace = 0;
#endif
/* Push frame */
entry_frame.previous = tstate->current_frame;
frame->previous = &entry_frame;
entry.frame.previous = tstate->current_frame;
frame->previous = &entry.frame;
tstate->current_frame = frame;
entry.frame.localsplus[0] = PyStackRef_NULL;
#ifdef _Py_TIER2
if (tstate->current_executor != NULL) {
entry.frame.localsplus[0] = PyStackRef_FromPyObjectNew(tstate->current_executor);
tstate->current_executor = NULL;
}
#endif
/* support for generator.throw() */
if (throwflag) {
@ -1071,9 +1083,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
stack_pointer = _PyFrame_GetStackPointer(frame);
#if Py_TAIL_CALL_INTERP
# if Py_STATS
return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0, lastopcode);
return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0, lastopcode);
# else
return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0);
return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0);
# endif
#else
goto error;

View file

@ -359,12 +359,12 @@ _PyFrame_SetStackPointer(frame, stack_pointer)
do { \
OPT_STAT_INC(traces_executed); \
_PyExecutorObject *_executor = (EXECUTOR); \
tstate->current_executor = (PyObject *)_executor; \
jit_func jitted = _executor->jit_code; \
/* Keep the shim frame alive via the executor: */ \
Py_INCREF(_executor); \
next_instr = jitted(frame, stack_pointer, tstate); \
Py_DECREF(_executor); \
Py_CLEAR(tstate->previous_executor); \
frame = tstate->current_frame; \
stack_pointer = _PyFrame_GetStackPointer(frame); \
if (next_instr == NULL) { \
@ -377,7 +377,9 @@ do { \
#define GOTO_TIER_TWO(EXECUTOR) \
do { \
OPT_STAT_INC(traces_executed); \
next_uop = (EXECUTOR)->trace; \
_PyExecutorObject *_executor = (EXECUTOR); \
tstate->current_executor = (PyObject *)_executor; \
next_uop = _executor->trace; \
assert(next_uop->opcode == _START_EXECUTOR); \
goto enter_tier_two; \
} while (0)
@ -386,10 +388,11 @@ do { \
#define GOTO_TIER_ONE(TARGET) \
do \
{ \
tstate->current_executor = NULL; \
next_instr = (TARGET); \
assert(tstate->current_executor == NULL); \
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
_PyFrame_SetStackPointer(frame, stack_pointer); \
Py_CLEAR(tstate->previous_executor); \
stack_pointer = _PyFrame_GetStackPointer(frame); \
if (next_instr == NULL) \
{ \

View file

@ -6969,7 +6969,6 @@
Py_CLEAR(exit->executor);
stack_pointer = _PyFrame_GetStackPointer(frame);
}
tstate->previous_executor = (PyObject *)current_executor;
if (exit->executor == NULL) {
_Py_BackoffCounter temperature = exit->temperature;
if (!backoff_counter_triggers(temperature)) {
@ -6994,7 +6993,6 @@
}
exit->executor = executor;
}
Py_INCREF(exit->executor);
GOTO_TIER_TWO(exit->executor);
break;
}
@ -7098,9 +7096,6 @@
case _START_EXECUTOR: {
PyObject *executor = (PyObject *)CURRENT_OPERAND0();
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_CLEAR(tstate->previous_executor);
stack_pointer = _PyFrame_GetStackPointer(frame);
#ifndef _Py_JIT
current_executor = (_PyExecutorObject*)executor;
#endif
@ -7123,7 +7118,6 @@
}
case _DEOPT: {
tstate->previous_executor = (PyObject *)current_executor;
GOTO_TIER_ONE(_PyFrame_GetBytecode(frame) + CURRENT_TARGET());
break;
}
@ -7131,7 +7125,6 @@
case _ERROR_POP_N: {
oparg = CURRENT_OPARG();
uint32_t target = (uint32_t)CURRENT_OPERAND0();
tstate->previous_executor = (PyObject *)current_executor;
assert(oparg == 0);
frame->instr_ptr = _PyFrame_GetBytecode(frame) + target;
GOTO_TIER_ONE(NULL);

View file

@ -5557,7 +5557,7 @@
assert(executor->vm_data.index == INSTR_OFFSET() - 1);
assert(executor->vm_data.code == code);
assert(executor->vm_data.valid);
assert(tstate->previous_executor == NULL);
assert(tstate->current_executor == NULL);
if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) {
opcode = executor->vm_data.opcode;
oparg = (oparg & ~255) | executor->vm_data.oparg;
@ -5567,8 +5567,6 @@
}
DISPATCH_GOTO();
}
tstate->previous_executor = Py_None;
Py_INCREF(executor);
GOTO_TIER_TWO(executor);
#else
Py_FatalError("ENTER_EXECUTOR is not supported in this build");
@ -7652,6 +7650,22 @@
tstate->current_frame = frame->previous;
assert(!_PyErr_Occurred(tstate));
PyObject *result = PyStackRef_AsPyObjectSteal(retval);
#if !Py_TAIL_CALL_INTERP
assert(frame == &entry.frame);
#endif
#ifdef _Py_TIER2
_PyStackRef executor = frame->localsplus[0];
assert(tstate->current_executor == NULL);
if (!PyStackRef_IsNull(executor)) {
tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor);
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
_PyFrame_SetStackPointer(frame, stack_pointer);
PyStackRef_CLOSE(executor);
stack_pointer = _PyFrame_GetStackPointer(frame);
stack_pointer += 1;
}
#endif
LLTRACE_RESUME_FRAME();
return result;
}
@ -7786,8 +7800,7 @@
_PyFrame_SetStackPointer(frame, stack_pointer);
this_instr[1].counter = initial_jump_backoff_counter();
stack_pointer = _PyFrame_GetStackPointer(frame);
assert(tstate->previous_executor == NULL);
tstate->previous_executor = Py_None;
assert(tstate->current_executor == NULL);
GOTO_TIER_TWO(executor);
}
}
@ -12424,6 +12437,17 @@ JUMP_TO_LABEL(error);
frame->return_offset = 0;
if (frame->owner == FRAME_OWNED_BY_INTERPRETER) {
tstate->current_frame = frame->previous;
#if !Py_TAIL_CALL_INTERP
assert(frame == &entry.frame);
#endif
#ifdef _Py_TIER2
_PyStackRef executor = frame->localsplus[0];
assert(tstate->current_executor == NULL);
if (!PyStackRef_IsNull(executor)) {
tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor);
PyStackRef_CLOSE(executor);
}
#endif
return NULL;
}
next_instr = frame->instr_ptr;

View file

@ -204,16 +204,74 @@ get_oparg(PyObject *self, PyObject *Py_UNUSED(ignored))
static int executor_clear(PyObject *executor);
static void unlink_executor(_PyExecutorObject *executor);
static void
free_executor(_PyExecutorObject *self)
{
#ifdef _Py_JIT
_PyJIT_Free(self);
#endif
PyObject_GC_Del(self);
}
void
_Py_ClearExecutorDeletionList(PyInterpreterState *interp)
{
_PyRuntimeState *runtime = &_PyRuntime;
HEAD_LOCK(runtime);
PyThreadState* ts = PyInterpreterState_ThreadHead(interp);
HEAD_UNLOCK(runtime);
while (ts) {
_PyExecutorObject *current = (_PyExecutorObject *)ts->current_executor;
if (current != NULL) {
/* Anything in this list will be unlinked, so we can reuse the
* linked field as a reachability marker. */
current->vm_data.linked = 1;
}
HEAD_LOCK(runtime);
ts = PyThreadState_Next(ts);
HEAD_UNLOCK(runtime);
}
_PyExecutorObject **prev_to_next_ptr = &interp->executor_deletion_list_head;
_PyExecutorObject *exec = *prev_to_next_ptr;
while (exec != NULL) {
if (exec->vm_data.linked) {
// This executor is currently executing
exec->vm_data.linked = 0;
prev_to_next_ptr = &exec->vm_data.links.next;
}
else {
*prev_to_next_ptr = exec->vm_data.links.next;
free_executor(exec);
}
exec = *prev_to_next_ptr;
}
interp->executor_deletion_list_remaining_capacity = EXECUTOR_DELETE_LIST_MAX;
}
static void
add_to_pending_deletion_list(_PyExecutorObject *self)
{
PyInterpreterState *interp = PyInterpreterState_Get();
self->vm_data.links.next = interp->executor_deletion_list_head;
interp->executor_deletion_list_head = self;
if (interp->executor_deletion_list_remaining_capacity > 0) {
interp->executor_deletion_list_remaining_capacity--;
}
else {
_Py_ClearExecutorDeletionList(interp);
}
}
static void
uop_dealloc(PyObject *op) {
_PyExecutorObject *self = _PyExecutorObject_CAST(op);
_PyObject_GC_UNTRACK(self);
assert(self->vm_data.code == NULL);
unlink_executor(self);
#ifdef _Py_JIT
_PyJIT_Free(self);
#endif
PyObject_GC_Del(self);
// Once unlinked it becomes impossible to invalidate an executor, so do it here.
self->vm_data.valid = 0;
add_to_pending_deletion_list(self);
}
const char *

View file

@ -678,6 +678,8 @@ init_interpreter(PyInterpreterState *interp,
interp->sys_trace_initialized = false;
interp->jit = false;
interp->executor_list_head = NULL;
interp->executor_deletion_list_head = NULL;
interp->executor_deletion_list_remaining_capacity = 0;
interp->trace_run_counter = JIT_CLEANUP_THRESHOLD;
if (interp != &runtime->_main_interpreter) {
/* Fix the self-referential, statically initialized fields. */
@ -902,6 +904,10 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
Py_CLEAR(interp->after_forkers_child);
#endif
#ifdef _Py_TIER2
_Py_ClearExecutorDeletionList(interp);
#endif
_PyAST_Fini(interp);
_PyWarnings_Fini(interp);
_PyAtExit_Fini(interp);
@ -1570,7 +1576,7 @@ init_threadstate(_PyThreadStateImpl *_tstate,
tstate->datastack_top = NULL;
tstate->datastack_limit = NULL;
tstate->what_event = -1;
tstate->previous_executor = NULL;
tstate->current_executor = NULL;
tstate->dict_global_version = 0;
_tstate->c_stack_soft_limit = UINTPTR_MAX;

View file

@ -50,13 +50,16 @@
#define GOTO_TIER_TWO(EXECUTOR) \
do { \
OPT_STAT_INC(traces_executed); \
jit_func_preserve_none jitted = (EXECUTOR)->jit_side_entry; \
_PyExecutorObject *_executor = (EXECUTOR); \
tstate->current_executor = (PyObject *)_executor; \
jit_func_preserve_none jitted = _executor->jit_side_entry; \
__attribute__((musttail)) return jitted(frame, stack_pointer, tstate); \
} while (0)
#undef GOTO_TIER_ONE
#define GOTO_TIER_ONE(TARGET) \
do { \
tstate->current_executor = NULL; \
_PyFrame_SetStackPointer(frame, stack_pointer); \
return TARGET; \
} while (0)