mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
gh-115999: Implement thread-local bytecode and enable specialization for BINARY_OP
(#123926)
Each thread specializes a thread-local copy of the bytecode, created on the first RESUME, in free-threaded builds. All copies of the bytecode for a code object are stored in the co_tlbc array on the code object. Threads reserve a globally unique index identifying its copy of the bytecode in all co_tlbc arrays at thread creation and release the index at thread destruction. The first entry in every co_tlbc array always points to the "main" copy of the bytecode that is stored at the end of the code object. This ensures that no bytecode is copied for programs that do not use threads. Thread-local bytecode can be disabled at runtime by providing either -X tlbc=0 or PYTHON_TLBC=0. Disabling thread-local bytecode also disables specialization. Concurrent modifications to the bytecode made by the specializing interpreter and instrumentation use atomics, with specialization taking care not to overwrite an instruction that was instrumented concurrently.
This commit is contained in:
parent
e5a4b402ae
commit
2e95c5ba3b
44 changed files with 1510 additions and 255 deletions
|
@ -168,11 +168,11 @@ dummy_func(
|
|||
}
|
||||
|
||||
op(_QUICKEN_RESUME, (--)) {
|
||||
#if ENABLE_SPECIALIZATION
|
||||
#if ENABLE_SPECIALIZATION_FT
|
||||
if (tstate->tracing == 0 && this_instr->op.code == RESUME) {
|
||||
FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, RESUME_CHECK);
|
||||
}
|
||||
#endif /* ENABLE_SPECIALIZATION */
|
||||
#endif /* ENABLE_SPECIALIZATION_FT */
|
||||
}
|
||||
|
||||
tier1 op(_MAYBE_INSTRUMENT, (--)) {
|
||||
|
@ -190,7 +190,26 @@ dummy_func(
|
|||
}
|
||||
}
|
||||
|
||||
op(_LOAD_BYTECODE, (--)) {
|
||||
#ifdef Py_GIL_DISABLED
|
||||
if (frame->tlbc_index !=
|
||||
((_PyThreadStateImpl *)tstate)->tlbc_index) {
|
||||
_Py_CODEUNIT *bytecode =
|
||||
_PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame));
|
||||
ERROR_IF(bytecode == NULL, error);
|
||||
int off = this_instr - _PyFrame_GetBytecode(frame);
|
||||
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
|
||||
frame->instr_ptr = bytecode + off;
|
||||
// Make sure this_instr gets reset correctley for any uops that
|
||||
// follow
|
||||
next_instr = frame->instr_ptr;
|
||||
DISPATCH();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
macro(RESUME) =
|
||||
_LOAD_BYTECODE +
|
||||
_MAYBE_INSTRUMENT +
|
||||
_QUICKEN_RESUME +
|
||||
_CHECK_PERIODIC_IF_NOT_YIELD_FROM;
|
||||
|
@ -204,6 +223,10 @@ dummy_func(
|
|||
uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
|
||||
assert((version & _PY_EVAL_EVENTS_MASK) == 0);
|
||||
DEOPT_IF(eval_breaker != version);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
DEOPT_IF(frame->tlbc_index !=
|
||||
((_PyThreadStateImpl *)tstate)->tlbc_index);
|
||||
#endif
|
||||
}
|
||||
|
||||
op(_MONITOR_RESUME, (--)) {
|
||||
|
@ -217,6 +240,7 @@ dummy_func(
|
|||
}
|
||||
|
||||
macro(INSTRUMENTED_RESUME) =
|
||||
_LOAD_BYTECODE +
|
||||
_MAYBE_INSTRUMENT +
|
||||
_CHECK_PERIODIC_IF_NOT_YIELD_FROM +
|
||||
_MONITOR_RESUME;
|
||||
|
@ -682,8 +706,8 @@ dummy_func(
|
|||
};
|
||||
|
||||
specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) {
|
||||
assert(frame->stackpointer == NULL);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
assert(frame->stackpointer == NULL);
|
||||
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
|
||||
next_instr = this_instr;
|
||||
_Py_Specialize_BinarySubscr(container, sub, next_instr);
|
||||
|
@ -1236,7 +1260,7 @@ dummy_func(
|
|||
if (oparg) {
|
||||
PyObject *lasti = PyStackRef_AsPyObjectBorrow(values[0]);
|
||||
if (PyLong_Check(lasti)) {
|
||||
frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + PyLong_AsLong(lasti);
|
||||
frame->instr_ptr = _PyFrame_GetBytecode(frame) + PyLong_AsLong(lasti);
|
||||
assert(!_PyErr_Occurred(tstate));
|
||||
}
|
||||
else {
|
||||
|
@ -2671,9 +2695,7 @@ dummy_func(
|
|||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_False);
|
||||
DEAD(cond);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
JUMPBY(oparg * flag);
|
||||
}
|
||||
|
||||
|
@ -2681,9 +2703,7 @@ dummy_func(
|
|||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_True);
|
||||
DEAD(cond);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
JUMPBY(oparg * flag);
|
||||
}
|
||||
|
||||
|
@ -3697,7 +3717,7 @@ dummy_func(
|
|||
op(_CREATE_INIT_FRAME, (init[1], self[1], args[oparg] -- init_frame: _PyInterpreterFrame *)) {
|
||||
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
|
||||
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
|
||||
assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK);
|
||||
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
|
||||
/* Push self onto stack of shim */
|
||||
shim->localsplus[0] = PyStackRef_DUP(self[0]);
|
||||
DEAD(init);
|
||||
|
@ -4593,7 +4613,7 @@ dummy_func(
|
|||
}
|
||||
|
||||
specializing op(_SPECIALIZE_BINARY_OP, (counter/1, lhs, rhs -- lhs, rhs)) {
|
||||
#if ENABLE_SPECIALIZATION
|
||||
#if ENABLE_SPECIALIZATION_FT
|
||||
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
|
||||
next_instr = this_instr;
|
||||
_Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY);
|
||||
|
@ -4601,7 +4621,7 @@ dummy_func(
|
|||
}
|
||||
OPCODE_DEFERRED_INC(BINARY_OP);
|
||||
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
|
||||
#endif /* ENABLE_SPECIALIZATION */
|
||||
#endif /* ENABLE_SPECIALIZATION_FT */
|
||||
assert(NB_ADD <= oparg);
|
||||
assert(oparg <= NB_INPLACE_XOR);
|
||||
}
|
||||
|
@ -4632,7 +4652,7 @@ dummy_func(
|
|||
int original_opcode = 0;
|
||||
if (tstate->tracing) {
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyCode_CODE(code))].original_opcode;
|
||||
original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyFrame_GetBytecode(frame))].original_opcode;
|
||||
next_instr = this_instr;
|
||||
} else {
|
||||
original_opcode = _Py_call_instrumentation_line(
|
||||
|
@ -4687,9 +4707,7 @@ dummy_func(
|
|||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_True);
|
||||
int offset = flag * oparg;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
}
|
||||
|
||||
|
@ -4698,9 +4716,7 @@ dummy_func(
|
|||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_False);
|
||||
int offset = flag * oparg;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
}
|
||||
|
||||
|
@ -4715,9 +4731,7 @@ dummy_func(
|
|||
PyStackRef_CLOSE(value_stackref);
|
||||
offset = 0;
|
||||
}
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
}
|
||||
|
||||
|
@ -4815,7 +4829,7 @@ dummy_func(
|
|||
tier2 op(_EXIT_TRACE, (exit_p/4 --)) {
|
||||
_PyExitData *exit = (_PyExitData *)exit_p;
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
_Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
|
||||
_Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target;
|
||||
#if defined(Py_DEBUG) && !defined(_Py_JIT)
|
||||
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
|
||||
if (lltrace >= 2) {
|
||||
|
@ -4823,7 +4837,7 @@ dummy_func(
|
|||
_PyUOpPrint(&next_uop[-1]);
|
||||
printf(", exit %u, temp %d, target %d -> %s]\n",
|
||||
exit - current_executor->exits, exit->temperature.value_and_backoff,
|
||||
(int)(target - _PyCode_CODE(code)),
|
||||
(int)(target - _PyFrame_GetBytecode(frame)),
|
||||
_PyOpcode_OpName[target->op.code]);
|
||||
}
|
||||
#endif
|
||||
|
@ -4933,7 +4947,7 @@ dummy_func(
|
|||
_PyUOpPrint(&next_uop[-1]);
|
||||
printf(", exit %u, temp %d, target %d -> %s]\n",
|
||||
exit - current_executor->exits, exit->temperature.value_and_backoff,
|
||||
(int)(target - _PyCode_CODE(_PyFrame_GetCode(frame))),
|
||||
(int)(target - _PyFrame_GetBytecode(frame)),
|
||||
_PyOpcode_OpName[target->op.code]);
|
||||
}
|
||||
#endif
|
||||
|
@ -4995,7 +5009,7 @@ dummy_func(
|
|||
}
|
||||
|
||||
tier2 op(_ERROR_POP_N, (target/2, unused[oparg] --)) {
|
||||
frame->instr_ptr = ((_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive) + target;
|
||||
frame->instr_ptr = _PyFrame_GetBytecode(frame) + target;
|
||||
SYNC_SP();
|
||||
GOTO_UNWIND();
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue