mirror of
https://github.com/python/cpython.git
synced 2025-08-31 22:18:28 +00:00
gh-115999: Implement thread-local bytecode and enable specialization for BINARY_OP
(#123926)
Each thread specializes a thread-local copy of the bytecode, created on the first RESUME, in free-threaded builds. All copies of the bytecode for a code object are stored in the co_tlbc array on the code object. Threads reserve a globally unique index identifying its copy of the bytecode in all co_tlbc arrays at thread creation and release the index at thread destruction. The first entry in every co_tlbc array always points to the "main" copy of the bytecode that is stored at the end of the code object. This ensures that no bytecode is copied for programs that do not use threads. Thread-local bytecode can be disabled at runtime by providing either -X tlbc=0 or PYTHON_TLBC=0. Disabling thread-local bytecode also disables specialization. Concurrent modifications to the bytecode made by the specializing interpreter and instrumentation use atomics, with specialization taking care not to overwrite an instruction that was instrumented concurrently.
This commit is contained in:
parent
e5a4b402ae
commit
2e95c5ba3b
44 changed files with 1510 additions and 255 deletions
|
@ -168,11 +168,11 @@ dummy_func(
|
|||
}
|
||||
|
||||
op(_QUICKEN_RESUME, (--)) {
|
||||
#if ENABLE_SPECIALIZATION
|
||||
#if ENABLE_SPECIALIZATION_FT
|
||||
if (tstate->tracing == 0 && this_instr->op.code == RESUME) {
|
||||
FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, RESUME_CHECK);
|
||||
}
|
||||
#endif /* ENABLE_SPECIALIZATION */
|
||||
#endif /* ENABLE_SPECIALIZATION_FT */
|
||||
}
|
||||
|
||||
tier1 op(_MAYBE_INSTRUMENT, (--)) {
|
||||
|
@ -190,7 +190,26 @@ dummy_func(
|
|||
}
|
||||
}
|
||||
|
||||
op(_LOAD_BYTECODE, (--)) {
|
||||
#ifdef Py_GIL_DISABLED
|
||||
if (frame->tlbc_index !=
|
||||
((_PyThreadStateImpl *)tstate)->tlbc_index) {
|
||||
_Py_CODEUNIT *bytecode =
|
||||
_PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame));
|
||||
ERROR_IF(bytecode == NULL, error);
|
||||
int off = this_instr - _PyFrame_GetBytecode(frame);
|
||||
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
|
||||
frame->instr_ptr = bytecode + off;
|
||||
// Make sure this_instr gets reset correctley for any uops that
|
||||
// follow
|
||||
next_instr = frame->instr_ptr;
|
||||
DISPATCH();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
macro(RESUME) =
|
||||
_LOAD_BYTECODE +
|
||||
_MAYBE_INSTRUMENT +
|
||||
_QUICKEN_RESUME +
|
||||
_CHECK_PERIODIC_IF_NOT_YIELD_FROM;
|
||||
|
@ -204,6 +223,10 @@ dummy_func(
|
|||
uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
|
||||
assert((version & _PY_EVAL_EVENTS_MASK) == 0);
|
||||
DEOPT_IF(eval_breaker != version);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
DEOPT_IF(frame->tlbc_index !=
|
||||
((_PyThreadStateImpl *)tstate)->tlbc_index);
|
||||
#endif
|
||||
}
|
||||
|
||||
op(_MONITOR_RESUME, (--)) {
|
||||
|
@ -217,6 +240,7 @@ dummy_func(
|
|||
}
|
||||
|
||||
macro(INSTRUMENTED_RESUME) =
|
||||
_LOAD_BYTECODE +
|
||||
_MAYBE_INSTRUMENT +
|
||||
_CHECK_PERIODIC_IF_NOT_YIELD_FROM +
|
||||
_MONITOR_RESUME;
|
||||
|
@ -682,8 +706,8 @@ dummy_func(
|
|||
};
|
||||
|
||||
specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) {
|
||||
assert(frame->stackpointer == NULL);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
assert(frame->stackpointer == NULL);
|
||||
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
|
||||
next_instr = this_instr;
|
||||
_Py_Specialize_BinarySubscr(container, sub, next_instr);
|
||||
|
@ -1236,7 +1260,7 @@ dummy_func(
|
|||
if (oparg) {
|
||||
PyObject *lasti = PyStackRef_AsPyObjectBorrow(values[0]);
|
||||
if (PyLong_Check(lasti)) {
|
||||
frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + PyLong_AsLong(lasti);
|
||||
frame->instr_ptr = _PyFrame_GetBytecode(frame) + PyLong_AsLong(lasti);
|
||||
assert(!_PyErr_Occurred(tstate));
|
||||
}
|
||||
else {
|
||||
|
@ -2671,9 +2695,7 @@ dummy_func(
|
|||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_False);
|
||||
DEAD(cond);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
JUMPBY(oparg * flag);
|
||||
}
|
||||
|
||||
|
@ -2681,9 +2703,7 @@ dummy_func(
|
|||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_True);
|
||||
DEAD(cond);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
JUMPBY(oparg * flag);
|
||||
}
|
||||
|
||||
|
@ -3697,7 +3717,7 @@ dummy_func(
|
|||
op(_CREATE_INIT_FRAME, (init[1], self[1], args[oparg] -- init_frame: _PyInterpreterFrame *)) {
|
||||
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
|
||||
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
|
||||
assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK);
|
||||
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
|
||||
/* Push self onto stack of shim */
|
||||
shim->localsplus[0] = PyStackRef_DUP(self[0]);
|
||||
DEAD(init);
|
||||
|
@ -4593,7 +4613,7 @@ dummy_func(
|
|||
}
|
||||
|
||||
specializing op(_SPECIALIZE_BINARY_OP, (counter/1, lhs, rhs -- lhs, rhs)) {
|
||||
#if ENABLE_SPECIALIZATION
|
||||
#if ENABLE_SPECIALIZATION_FT
|
||||
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
|
||||
next_instr = this_instr;
|
||||
_Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY);
|
||||
|
@ -4601,7 +4621,7 @@ dummy_func(
|
|||
}
|
||||
OPCODE_DEFERRED_INC(BINARY_OP);
|
||||
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
|
||||
#endif /* ENABLE_SPECIALIZATION */
|
||||
#endif /* ENABLE_SPECIALIZATION_FT */
|
||||
assert(NB_ADD <= oparg);
|
||||
assert(oparg <= NB_INPLACE_XOR);
|
||||
}
|
||||
|
@ -4632,7 +4652,7 @@ dummy_func(
|
|||
int original_opcode = 0;
|
||||
if (tstate->tracing) {
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyCode_CODE(code))].original_opcode;
|
||||
original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyFrame_GetBytecode(frame))].original_opcode;
|
||||
next_instr = this_instr;
|
||||
} else {
|
||||
original_opcode = _Py_call_instrumentation_line(
|
||||
|
@ -4687,9 +4707,7 @@ dummy_func(
|
|||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_True);
|
||||
int offset = flag * oparg;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
}
|
||||
|
||||
|
@ -4698,9 +4716,7 @@ dummy_func(
|
|||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_False);
|
||||
int offset = flag * oparg;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
}
|
||||
|
||||
|
@ -4715,9 +4731,7 @@ dummy_func(
|
|||
PyStackRef_CLOSE(value_stackref);
|
||||
offset = 0;
|
||||
}
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
}
|
||||
|
||||
|
@ -4815,7 +4829,7 @@ dummy_func(
|
|||
tier2 op(_EXIT_TRACE, (exit_p/4 --)) {
|
||||
_PyExitData *exit = (_PyExitData *)exit_p;
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
_Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
|
||||
_Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target;
|
||||
#if defined(Py_DEBUG) && !defined(_Py_JIT)
|
||||
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
|
||||
if (lltrace >= 2) {
|
||||
|
@ -4823,7 +4837,7 @@ dummy_func(
|
|||
_PyUOpPrint(&next_uop[-1]);
|
||||
printf(", exit %u, temp %d, target %d -> %s]\n",
|
||||
exit - current_executor->exits, exit->temperature.value_and_backoff,
|
||||
(int)(target - _PyCode_CODE(code)),
|
||||
(int)(target - _PyFrame_GetBytecode(frame)),
|
||||
_PyOpcode_OpName[target->op.code]);
|
||||
}
|
||||
#endif
|
||||
|
@ -4933,7 +4947,7 @@ dummy_func(
|
|||
_PyUOpPrint(&next_uop[-1]);
|
||||
printf(", exit %u, temp %d, target %d -> %s]\n",
|
||||
exit - current_executor->exits, exit->temperature.value_and_backoff,
|
||||
(int)(target - _PyCode_CODE(_PyFrame_GetCode(frame))),
|
||||
(int)(target - _PyFrame_GetBytecode(frame)),
|
||||
_PyOpcode_OpName[target->op.code]);
|
||||
}
|
||||
#endif
|
||||
|
@ -4995,7 +5009,7 @@ dummy_func(
|
|||
}
|
||||
|
||||
tier2 op(_ERROR_POP_N, (target/2, unused[oparg] --)) {
|
||||
frame->instr_ptr = ((_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive) + target;
|
||||
frame->instr_ptr = _PyFrame_GetBytecode(frame) + target;
|
||||
SYNC_SP();
|
||||
GOTO_UNWIND();
|
||||
}
|
||||
|
|
|
@ -189,7 +189,7 @@ lltrace_instruction(_PyInterpreterFrame *frame,
|
|||
dump_stack(frame, stack_pointer);
|
||||
const char *opname = _PyOpcode_OpName[opcode];
|
||||
assert(opname != NULL);
|
||||
int offset = (int)(next_instr - _PyCode_CODE(_PyFrame_GetCode(frame)));
|
||||
int offset = (int)(next_instr - _PyFrame_GetBytecode(frame));
|
||||
if (OPCODE_HAS_ARG((int)_PyOpcode_Deopt[opcode])) {
|
||||
printf("%d: %s %d\n", offset * 2, opname, oparg);
|
||||
}
|
||||
|
@ -841,6 +841,19 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
|
|||
}
|
||||
/* Because this avoids the RESUME,
|
||||
* we need to update instrumentation */
|
||||
#ifdef Py_GIL_DISABLED
|
||||
/* Load thread-local bytecode */
|
||||
if (frame->tlbc_index != ((_PyThreadStateImpl *)tstate)->tlbc_index) {
|
||||
_Py_CODEUNIT *bytecode =
|
||||
_PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame));
|
||||
if (bytecode == NULL) {
|
||||
goto error;
|
||||
}
|
||||
ptrdiff_t off = frame->instr_ptr - _PyFrame_GetBytecode(frame);
|
||||
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
|
||||
frame->instr_ptr = bytecode + off;
|
||||
}
|
||||
#endif
|
||||
_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp);
|
||||
monitor_throw(tstate, frame, frame->instr_ptr);
|
||||
/* TO DO -- Monitor throw entry. */
|
||||
|
@ -983,7 +996,7 @@ exception_unwind:
|
|||
Python main loop. */
|
||||
PyObject *exc = _PyErr_GetRaisedException(tstate);
|
||||
PUSH(PyStackRef_FromPyObjectSteal(exc));
|
||||
next_instr = _PyCode_CODE(_PyFrame_GetCode(frame)) + handler;
|
||||
next_instr = _PyFrame_GetBytecode(frame) + handler;
|
||||
|
||||
if (monitor_handled(tstate, frame, next_instr, exc) < 0) {
|
||||
goto exception_unwind;
|
||||
|
@ -1045,6 +1058,8 @@ enter_tier_two:
|
|||
|
||||
#undef ENABLE_SPECIALIZATION
|
||||
#define ENABLE_SPECIALIZATION 0
|
||||
#undef ENABLE_SPECIALIZATION_FT
|
||||
#define ENABLE_SPECIALIZATION_FT 0
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
#define DPRINTF(level, ...) \
|
||||
|
@ -1139,7 +1154,7 @@ exit_to_tier1_dynamic:
|
|||
goto goto_to_tier1;
|
||||
exit_to_tier1:
|
||||
assert(next_uop[-1].format == UOP_FORMAT_TARGET);
|
||||
next_instr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame));
|
||||
next_instr = next_uop[-1].target + _PyFrame_GetBytecode(frame);
|
||||
goto_to_tier1:
|
||||
#ifdef Py_DEBUG
|
||||
if (lltrace >= 2) {
|
||||
|
@ -1764,7 +1779,7 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, _PyStackRef func,
|
|||
if (frame == NULL) {
|
||||
goto fail;
|
||||
}
|
||||
_PyFrame_Initialize(frame, func, locals, code, 0, previous);
|
||||
_PyFrame_Initialize(tstate, frame, func, locals, code, 0, previous);
|
||||
if (initialize_locals(tstate, func_obj, frame->localsplus, args, argcount, kwnames)) {
|
||||
assert(frame->owner == FRAME_OWNED_BY_THREAD);
|
||||
clear_thread_frame(tstate, frame);
|
||||
|
|
|
@ -151,7 +151,7 @@ GETITEM(PyObject *v, Py_ssize_t i) {
|
|||
/* Code access macros */
|
||||
|
||||
/* The integer overflow is checked by an assertion below. */
|
||||
#define INSTR_OFFSET() ((int)(next_instr - _PyCode_CODE(_PyFrame_GetCode(frame))))
|
||||
#define INSTR_OFFSET() ((int)(next_instr - _PyFrame_GetBytecode(frame)))
|
||||
#define NEXTOPARG() do { \
|
||||
_Py_CODEUNIT word = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t*)next_instr)}; \
|
||||
opcode = word.op.code; \
|
||||
|
@ -301,14 +301,6 @@ GETITEM(PyObject *v, Py_ssize_t i) {
|
|||
#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
|
||||
backoff_counter_triggers(forge_backoff_counter((COUNTER)))
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
|
||||
do { \
|
||||
/* gh-115999 tracks progress on addressing this. */ \
|
||||
static_assert(0, "The specializing interpreter is not yet thread-safe"); \
|
||||
} while (0);
|
||||
#define PAUSE_ADAPTIVE_COUNTER(COUNTER) ((void)COUNTER)
|
||||
#else
|
||||
#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
|
||||
do { \
|
||||
(COUNTER) = advance_backoff_counter((COUNTER)); \
|
||||
|
@ -318,6 +310,18 @@ GETITEM(PyObject *v, Py_ssize_t i) {
|
|||
do { \
|
||||
(COUNTER) = pause_backoff_counter((COUNTER)); \
|
||||
} while (0);
|
||||
|
||||
#ifdef ENABLE_SPECIALIZATION_FT
|
||||
/* Multiple threads may execute these concurrently if thread-local bytecode is
|
||||
* disabled and they all execute the main copy of the bytecode. Specialization
|
||||
* is disabled in that case so the value is unused, but the RMW cycle should be
|
||||
* free of data races.
|
||||
*/
|
||||
#define RECORD_BRANCH_TAKEN(bitset, flag) \
|
||||
FT_ATOMIC_STORE_UINT16_RELAXED( \
|
||||
bitset, (FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) | (flag))
|
||||
#else
|
||||
#define RECORD_BRANCH_TAKEN(bitset, flag)
|
||||
#endif
|
||||
|
||||
#define UNBOUNDLOCAL_ERROR_MSG \
|
||||
|
|
23
Python/executor_cases.c.h
generated
23
Python/executor_cases.c.h
generated
|
@ -41,6 +41,8 @@
|
|||
|
||||
/* _QUICKEN_RESUME is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */
|
||||
|
||||
/* _LOAD_BYTECODE is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */
|
||||
|
||||
case _RESUME_CHECK: {
|
||||
#if defined(__EMSCRIPTEN__)
|
||||
if (_Py_emscripten_signal_clock == 0) {
|
||||
|
@ -56,6 +58,13 @@
|
|||
UOP_STAT_INC(uopcode, miss);
|
||||
JUMP_TO_JUMP_TARGET();
|
||||
}
|
||||
#ifdef Py_GIL_DISABLED
|
||||
if (frame->tlbc_index !=
|
||||
((_PyThreadStateImpl *)tstate)->tlbc_index) {
|
||||
UOP_STAT_INC(uopcode, miss);
|
||||
JUMP_TO_JUMP_TARGET();
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -4480,8 +4489,8 @@
|
|||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
|
||||
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
|
||||
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK);
|
||||
/* Push self onto stack of shim */
|
||||
shim->localsplus[0] = PyStackRef_DUP(self[0]);
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
|
@ -5683,7 +5692,9 @@
|
|||
PyObject *exit_p = (PyObject *)CURRENT_OPERAND();
|
||||
_PyExitData *exit = (_PyExitData *)exit_p;
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
_Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
_Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target;
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
#if defined(Py_DEBUG) && !defined(_Py_JIT)
|
||||
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
|
||||
if (lltrace >= 2) {
|
||||
|
@ -5692,7 +5703,7 @@
|
|||
_PyUOpPrint(&next_uop[-1]);
|
||||
printf(", exit %u, temp %d, target %d -> %s]\n",
|
||||
exit - current_executor->exits, exit->temperature.value_and_backoff,
|
||||
(int)(target - _PyCode_CODE(code)),
|
||||
(int)(target - _PyFrame_GetBytecode(frame)),
|
||||
_PyOpcode_OpName[target->op.code]);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
}
|
||||
|
@ -5878,7 +5889,7 @@
|
|||
_PyUOpPrint(&next_uop[-1]);
|
||||
printf(", exit %u, temp %d, target %d -> %s]\n",
|
||||
exit - current_executor->exits, exit->temperature.value_and_backoff,
|
||||
(int)(target - _PyCode_CODE(_PyFrame_GetCode(frame))),
|
||||
(int)(target - _PyFrame_GetBytecode(frame)),
|
||||
_PyOpcode_OpName[target->op.code]);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
}
|
||||
|
@ -5956,9 +5967,11 @@
|
|||
case _ERROR_POP_N: {
|
||||
oparg = CURRENT_OPARG();
|
||||
uint32_t target = (uint32_t)CURRENT_OPERAND();
|
||||
frame->instr_ptr = ((_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive) + target;
|
||||
stack_pointer += -oparg;
|
||||
assert(WITHIN_STACK_BOUNDS());
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
frame->instr_ptr = _PyFrame_GetBytecode(frame) + target;
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
GOTO_UNWIND();
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -63,7 +63,8 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame)
|
|||
// This may be a newly-created generator or coroutine frame. Since it's
|
||||
// dead anyways, just pretend that the first RESUME ran:
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
frame->instr_ptr = _PyCode_CODE(code) + code->_co_firsttraceable + 1;
|
||||
frame->instr_ptr =
|
||||
_PyFrame_GetBytecode(frame) + code->_co_firsttraceable + 1;
|
||||
}
|
||||
assert(!_PyFrame_IsIncomplete(frame));
|
||||
assert(f->f_back == NULL);
|
||||
|
|
|
@ -1953,16 +1953,22 @@ custom_visitor_wrapper(const mi_heap_t *heap, const mi_heap_area_t *area,
|
|||
}
|
||||
|
||||
void
|
||||
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
|
||||
_PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp,
|
||||
gcvisitobjects_t callback, void *arg)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
struct custom_visitor_args wrapper = {
|
||||
.callback = callback,
|
||||
.arg = arg,
|
||||
};
|
||||
|
||||
_PyEval_StopTheWorld(interp);
|
||||
gc_visit_heaps(interp, &custom_visitor_wrapper, &wrapper.base);
|
||||
}
|
||||
|
||||
void
|
||||
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
_PyEval_StopTheWorld(interp);
|
||||
_PyGC_VisitObjectsWorldStopped(interp, callback, arg);
|
||||
_PyEval_StartTheWorld(interp);
|
||||
}
|
||||
|
||||
|
|
100
Python/generated_cases.c.h
generated
100
Python/generated_cases.c.h
generated
|
@ -25,7 +25,7 @@
|
|||
lhs = stack_pointer[-2];
|
||||
uint16_t counter = read_u16(&this_instr[1].cache);
|
||||
(void)counter;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
#if ENABLE_SPECIALIZATION_FT
|
||||
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
|
||||
next_instr = this_instr;
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
|
@ -35,7 +35,7 @@
|
|||
}
|
||||
OPCODE_DEFERRED_INC(BINARY_OP);
|
||||
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
|
||||
#endif /* ENABLE_SPECIALIZATION */
|
||||
#endif /* ENABLE_SPECIALIZATION_FT */
|
||||
assert(NB_ADD <= oparg);
|
||||
assert(oparg <= NB_INPLACE_XOR);
|
||||
}
|
||||
|
@ -435,8 +435,8 @@
|
|||
container = stack_pointer[-2];
|
||||
uint16_t counter = read_u16(&this_instr[1].cache);
|
||||
(void)counter;
|
||||
assert(frame->stackpointer == NULL);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
assert(frame->stackpointer == NULL);
|
||||
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
|
||||
next_instr = this_instr;
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
|
@ -1066,8 +1066,8 @@
|
|||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
|
||||
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
|
||||
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK);
|
||||
/* Push self onto stack of shim */
|
||||
shim->localsplus[0] = PyStackRef_DUP(self[0]);
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
|
@ -4711,7 +4711,9 @@
|
|||
int original_opcode = 0;
|
||||
if (tstate->tracing) {
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyCode_CODE(code))].original_opcode;
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyFrame_GetBytecode(frame))].original_opcode;
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
next_instr = this_instr;
|
||||
} else {
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
|
@ -4759,9 +4761,7 @@
|
|||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_False);
|
||||
int offset = flag * oparg;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
DISPATCH();
|
||||
}
|
||||
|
@ -4782,9 +4782,7 @@
|
|||
PyStackRef_CLOSE(value_stackref);
|
||||
offset = 0;
|
||||
}
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
DISPATCH();
|
||||
}
|
||||
|
@ -4822,9 +4820,7 @@
|
|||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_True);
|
||||
int offset = flag * oparg;
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
|
||||
DISPATCH();
|
||||
}
|
||||
|
@ -4834,6 +4830,28 @@
|
|||
(void)this_instr;
|
||||
next_instr += 1;
|
||||
INSTRUCTION_STATS(INSTRUMENTED_RESUME);
|
||||
// _LOAD_BYTECODE
|
||||
{
|
||||
#ifdef Py_GIL_DISABLED
|
||||
if (frame->tlbc_index !=
|
||||
((_PyThreadStateImpl *)tstate)->tlbc_index) {
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
_Py_CODEUNIT *bytecode =
|
||||
_PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame));
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
if (bytecode == NULL) goto error;
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
int off = this_instr - _PyFrame_GetBytecode(frame);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
|
||||
frame->instr_ptr = bytecode + off;
|
||||
// Make sure this_instr gets reset correctley for any uops that
|
||||
// follow
|
||||
next_instr = frame->instr_ptr;
|
||||
DISPATCH();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
// _MAYBE_INSTRUMENT
|
||||
{
|
||||
if (tstate->tracing == 0) {
|
||||
|
@ -6646,9 +6664,7 @@
|
|||
cond = stack_pointer[-1];
|
||||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_False);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
JUMPBY(oparg * flag);
|
||||
stack_pointer += -1;
|
||||
assert(WITHIN_STACK_BOUNDS());
|
||||
|
@ -6680,9 +6696,7 @@
|
|||
cond = b;
|
||||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_True);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
JUMPBY(oparg * flag);
|
||||
}
|
||||
stack_pointer += -1;
|
||||
|
@ -6715,9 +6729,7 @@
|
|||
cond = b;
|
||||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_False);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
JUMPBY(oparg * flag);
|
||||
}
|
||||
stack_pointer += -1;
|
||||
|
@ -6735,9 +6747,7 @@
|
|||
cond = stack_pointer[-1];
|
||||
assert(PyStackRef_BoolCheck(cond));
|
||||
int flag = PyStackRef_Is(cond, PyStackRef_True);
|
||||
#if ENABLE_SPECIALIZATION
|
||||
this_instr[1].cache = (this_instr[1].cache << 1) | flag;
|
||||
#endif
|
||||
RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
|
||||
JUMPBY(oparg * flag);
|
||||
stack_pointer += -1;
|
||||
assert(WITHIN_STACK_BOUNDS());
|
||||
|
@ -6832,7 +6842,11 @@
|
|||
if (oparg) {
|
||||
PyObject *lasti = PyStackRef_AsPyObjectBorrow(values[0]);
|
||||
if (PyLong_Check(lasti)) {
|
||||
frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + PyLong_AsLong(lasti);
|
||||
stack_pointer += -1;
|
||||
assert(WITHIN_STACK_BOUNDS());
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
frame->instr_ptr = _PyFrame_GetBytecode(frame) + PyLong_AsLong(lasti);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
assert(!_PyErr_Occurred(tstate));
|
||||
}
|
||||
else {
|
||||
|
@ -6844,6 +6858,8 @@
|
|||
Py_DECREF(exc);
|
||||
goto error;
|
||||
}
|
||||
stack_pointer += 1;
|
||||
assert(WITHIN_STACK_BOUNDS());
|
||||
}
|
||||
assert(exc && PyExceptionInstance_Check(exc));
|
||||
stack_pointer += -1;
|
||||
|
@ -6871,6 +6887,28 @@
|
|||
PREDICTED(RESUME);
|
||||
_Py_CODEUNIT* const this_instr = next_instr - 1;
|
||||
(void)this_instr;
|
||||
// _LOAD_BYTECODE
|
||||
{
|
||||
#ifdef Py_GIL_DISABLED
|
||||
if (frame->tlbc_index !=
|
||||
((_PyThreadStateImpl *)tstate)->tlbc_index) {
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
_Py_CODEUNIT *bytecode =
|
||||
_PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame));
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
if (bytecode == NULL) goto error;
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
int off = this_instr - _PyFrame_GetBytecode(frame);
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
|
||||
frame->instr_ptr = bytecode + off;
|
||||
// Make sure this_instr gets reset correctley for any uops that
|
||||
// follow
|
||||
next_instr = frame->instr_ptr;
|
||||
DISPATCH();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
// _MAYBE_INSTRUMENT
|
||||
{
|
||||
if (tstate->tracing == 0) {
|
||||
|
@ -6890,11 +6928,11 @@
|
|||
}
|
||||
// _QUICKEN_RESUME
|
||||
{
|
||||
#if ENABLE_SPECIALIZATION
|
||||
#if ENABLE_SPECIALIZATION_FT
|
||||
if (tstate->tracing == 0 && this_instr->op.code == RESUME) {
|
||||
FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, RESUME_CHECK);
|
||||
}
|
||||
#endif /* ENABLE_SPECIALIZATION */
|
||||
#endif /* ENABLE_SPECIALIZATION_FT */
|
||||
}
|
||||
// _CHECK_PERIODIC_IF_NOT_YIELD_FROM
|
||||
{
|
||||
|
@ -6925,6 +6963,10 @@
|
|||
uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
|
||||
assert((version & _PY_EVAL_EVENTS_MASK) == 0);
|
||||
DEOPT_IF(eval_breaker != version, RESUME);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
DEOPT_IF(frame->tlbc_index !=
|
||||
((_PyThreadStateImpl *)tstate)->tlbc_index, RESUME);
|
||||
#endif
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
|
|
193
Python/index_pool.c
Normal file
193
Python/index_pool.c
Normal file
|
@ -0,0 +1,193 @@
|
|||
#include <stdbool.h>
|
||||
|
||||
#include "Python.h"
|
||||
|
||||
#include "pycore_index_pool.h"
|
||||
#include "pycore_lock.h"
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
|
||||
static inline void
|
||||
swap(int32_t *values, Py_ssize_t i, Py_ssize_t j)
|
||||
{
|
||||
int32_t tmp = values[i];
|
||||
values[i] = values[j];
|
||||
values[j] = tmp;
|
||||
}
|
||||
|
||||
static bool
|
||||
heap_try_swap(_PyIndexHeap *heap, Py_ssize_t i, Py_ssize_t j)
|
||||
{
|
||||
if (i < 0 || i >= heap->size) {
|
||||
return 0;
|
||||
}
|
||||
if (j < 0 || j >= heap->size) {
|
||||
return 0;
|
||||
}
|
||||
if (i <= j) {
|
||||
if (heap->values[i] <= heap->values[j]) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else if (heap->values[j] <= heap->values[i]) {
|
||||
return 0;
|
||||
}
|
||||
swap(heap->values, i, j);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline Py_ssize_t
|
||||
parent(Py_ssize_t i)
|
||||
{
|
||||
return (i - 1) / 2;
|
||||
}
|
||||
|
||||
static inline Py_ssize_t
|
||||
left_child(Py_ssize_t i)
|
||||
{
|
||||
return 2 * i + 1;
|
||||
}
|
||||
|
||||
static inline Py_ssize_t
|
||||
right_child(Py_ssize_t i)
|
||||
{
|
||||
return 2 * i + 2;
|
||||
}
|
||||
|
||||
static void
|
||||
heap_add(_PyIndexHeap *heap, int32_t val)
|
||||
{
|
||||
assert(heap->size < heap->capacity);
|
||||
// Add val to end
|
||||
heap->values[heap->size] = val;
|
||||
heap->size++;
|
||||
// Sift up
|
||||
for (Py_ssize_t cur = heap->size - 1; cur > 0; cur = parent(cur)) {
|
||||
if (!heap_try_swap(heap, cur, parent(cur))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static Py_ssize_t
|
||||
heap_min_child(_PyIndexHeap *heap, Py_ssize_t i)
|
||||
{
|
||||
if (left_child(i) < heap->size) {
|
||||
if (right_child(i) < heap->size) {
|
||||
Py_ssize_t lval = heap->values[left_child(i)];
|
||||
Py_ssize_t rval = heap->values[right_child(i)];
|
||||
return lval < rval ? left_child(i) : right_child(i);
|
||||
}
|
||||
return left_child(i);
|
||||
}
|
||||
else if (right_child(i) < heap->size) {
|
||||
return right_child(i);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
heap_pop(_PyIndexHeap *heap)
|
||||
{
|
||||
assert(heap->size > 0);
|
||||
// Pop smallest and replace with the last element
|
||||
int32_t result = heap->values[0];
|
||||
heap->values[0] = heap->values[heap->size - 1];
|
||||
heap->size--;
|
||||
// Sift down
|
||||
for (Py_ssize_t cur = 0; cur < heap->size;) {
|
||||
Py_ssize_t min_child = heap_min_child(heap, cur);
|
||||
if (min_child > -1 && heap_try_swap(heap, cur, min_child)) {
|
||||
cur = min_child;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static int
|
||||
heap_ensure_capacity(_PyIndexHeap *heap, Py_ssize_t limit)
|
||||
{
|
||||
assert(limit > 0);
|
||||
if (heap->capacity > limit) {
|
||||
return 0;
|
||||
}
|
||||
Py_ssize_t new_capacity = heap->capacity ? heap->capacity : 1024;
|
||||
while (new_capacity && new_capacity < limit) {
|
||||
new_capacity <<= 1;
|
||||
}
|
||||
if (!new_capacity) {
|
||||
return -1;
|
||||
}
|
||||
int32_t *new_values = PyMem_RawCalloc(new_capacity, sizeof(int32_t));
|
||||
if (new_values == NULL) {
|
||||
return -1;
|
||||
}
|
||||
if (heap->values != NULL) {
|
||||
memcpy(new_values, heap->values, heap->capacity);
|
||||
PyMem_RawFree(heap->values);
|
||||
}
|
||||
heap->values = new_values;
|
||||
heap->capacity = new_capacity;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
heap_fini(_PyIndexHeap *heap)
|
||||
{
|
||||
if (heap->values != NULL) {
|
||||
PyMem_RawFree(heap->values);
|
||||
heap->values = NULL;
|
||||
}
|
||||
heap->size = -1;
|
||||
heap->capacity = -1;
|
||||
}
|
||||
|
||||
#define LOCK_POOL(pool) PyMutex_LockFlags(&pool->mutex, _Py_LOCK_DONT_DETACH)
|
||||
#define UNLOCK_POOL(pool) PyMutex_Unlock(&pool->mutex)
|
||||
|
||||
int32_t
|
||||
_PyIndexPool_AllocIndex(_PyIndexPool *pool)
|
||||
{
|
||||
LOCK_POOL(pool);
|
||||
int32_t index;
|
||||
_PyIndexHeap *free_indices = &pool->free_indices;
|
||||
if (free_indices->size == 0) {
|
||||
// No free indices. Make sure the heap can always store all of the
|
||||
// indices that have been allocated to avoid having to allocate memory
|
||||
// (which can fail) when freeing an index. Freeing indices happens when
|
||||
// threads are being destroyed, which makes error handling awkward /
|
||||
// impossible. This arrangement shifts handling of allocation failures
|
||||
// to when indices are allocated, which happens at thread creation,
|
||||
// where we are better equipped to deal with failure.
|
||||
if (heap_ensure_capacity(free_indices, pool->next_index + 1) < 0) {
|
||||
UNLOCK_POOL(pool);
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
index = pool->next_index++;
|
||||
}
|
||||
else {
|
||||
index = heap_pop(free_indices);
|
||||
}
|
||||
UNLOCK_POOL(pool);
|
||||
return index;
|
||||
}
|
||||
|
||||
void
|
||||
_PyIndexPool_FreeIndex(_PyIndexPool *pool, int32_t index)
|
||||
{
|
||||
LOCK_POOL(pool);
|
||||
heap_add(&pool->free_indices, index);
|
||||
UNLOCK_POOL(pool);
|
||||
}
|
||||
|
||||
void
|
||||
_PyIndexPool_Fini(_PyIndexPool *pool)
|
||||
{
|
||||
heap_fini(&pool->free_indices);
|
||||
}
|
||||
|
||||
#endif // Py_GIL_DISABLED
|
|
@ -134,6 +134,7 @@ static const PyConfigSpec PYCONFIG_SPEC[] = {
|
|||
SPEC(dump_refs_file, WSTR_OPT, READ_ONLY, NO_SYS),
|
||||
#ifdef Py_GIL_DISABLED
|
||||
SPEC(enable_gil, INT, READ_ONLY, NO_SYS),
|
||||
SPEC(tlbc_enabled, INT, READ_ONLY, NO_SYS),
|
||||
#endif
|
||||
SPEC(faulthandler, BOOL, READ_ONLY, NO_SYS),
|
||||
SPEC(filesystem_encoding, WSTR, READ_ONLY, NO_SYS),
|
||||
|
@ -315,8 +316,13 @@ The following implementation-specific options are available:\n\
|
|||
"\
|
||||
-X showrefcount: output the total reference count and number of used\n\
|
||||
memory blocks when the program finishes or after each statement in\n\
|
||||
the interactive interpreter; only works on debug builds\n\
|
||||
-X tracemalloc[=N]: trace Python memory allocations; N sets a traceback limit\n\
|
||||
the interactive interpreter; only works on debug builds\n"
|
||||
#ifdef Py_GIL_DISABLED
|
||||
"-X tlbc=[0|1]: enable (1) or disable (0) thread-local bytecode. Also\n\
|
||||
PYTHON_TLBC\n"
|
||||
#endif
|
||||
"\
|
||||
-X tracemalloc[=N]: trace Python memory allocations; N sets a traceback limit\n \
|
||||
of N frames (default: 1); also PYTHONTRACEMALLOC=N\n\
|
||||
-X utf8[=0|1]: enable (1) or disable (0) UTF-8 mode; also PYTHONUTF8\n\
|
||||
-X warn_default_encoding: enable opt-in EncodingWarning for 'encoding=None';\n\
|
||||
|
@ -400,6 +406,9 @@ static const char usage_envvars[] =
|
|||
#ifdef Py_STATS
|
||||
"PYTHONSTATS : turns on statistics gathering (-X pystats)\n"
|
||||
#endif
|
||||
#ifdef Py_GIL_DISABLED
|
||||
"PYTHON_TLBC : when set to 0, disables thread-local bytecode (-X tlbc)\n"
|
||||
#endif
|
||||
"PYTHONTRACEMALLOC: trace Python memory allocations (-X tracemalloc)\n"
|
||||
"PYTHONUNBUFFERED: disable stdout/stderr buffering (-u)\n"
|
||||
"PYTHONUTF8 : control the UTF-8 mode (-X utf8)\n"
|
||||
|
@ -979,6 +988,7 @@ _PyConfig_InitCompatConfig(PyConfig *config)
|
|||
config->cpu_count = -1;
|
||||
#ifdef Py_GIL_DISABLED
|
||||
config->enable_gil = _PyConfig_GIL_DEFAULT;
|
||||
config->tlbc_enabled = 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1862,6 +1872,36 @@ error:
|
|||
"n must be greater than 0");
|
||||
}
|
||||
|
||||
static PyStatus
|
||||
config_init_tlbc(PyConfig *config)
|
||||
{
|
||||
#ifdef Py_GIL_DISABLED
|
||||
const char *env = config_get_env(config, "PYTHON_TLBC");
|
||||
if (env) {
|
||||
int enabled;
|
||||
if (_Py_str_to_int(env, &enabled) < 0 || (enabled < 0) || (enabled > 1)) {
|
||||
return _PyStatus_ERR(
|
||||
"PYTHON_TLBC=N: N is missing or invalid");
|
||||
}
|
||||
config->tlbc_enabled = enabled;
|
||||
}
|
||||
|
||||
const wchar_t *xoption = config_get_xoption(config, L"tlbc");
|
||||
if (xoption) {
|
||||
int enabled;
|
||||
const wchar_t *sep = wcschr(xoption, L'=');
|
||||
if (!sep || (config_wstr_to_int(sep + 1, &enabled) < 0) || (enabled < 0) || (enabled > 1)) {
|
||||
return _PyStatus_ERR(
|
||||
"-X tlbc=n: n is missing or invalid");
|
||||
}
|
||||
config->tlbc_enabled = enabled;
|
||||
}
|
||||
return _PyStatus_OK();
|
||||
#else
|
||||
return _PyStatus_OK();
|
||||
#endif
|
||||
}
|
||||
|
||||
static PyStatus
|
||||
config_init_perf_profiling(PyConfig *config)
|
||||
{
|
||||
|
@ -2111,6 +2151,11 @@ config_read_complex_options(PyConfig *config)
|
|||
}
|
||||
#endif
|
||||
|
||||
status = config_init_tlbc(config);
|
||||
if (_PyStatus_EXCEPTION(status)) {
|
||||
return status;
|
||||
}
|
||||
|
||||
return _PyStatus_OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -44,10 +44,24 @@
|
|||
|
||||
#define UNLOCK_CODE() Py_END_CRITICAL_SECTION()
|
||||
|
||||
#define MODIFY_BYTECODE(code, func, ...) \
|
||||
do { \
|
||||
PyCodeObject *co = (code); \
|
||||
for (Py_ssize_t i = 0; i < code->co_tlbc->size; i++) { \
|
||||
char *bc = co->co_tlbc->entries[i]; \
|
||||
if (bc == NULL) { \
|
||||
continue; \
|
||||
} \
|
||||
(func)((_Py_CODEUNIT *)bc, __VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define LOCK_CODE(code)
|
||||
#define UNLOCK_CODE()
|
||||
#define MODIFY_BYTECODE(code, func, ...) \
|
||||
(func)(_PyCode_CODE(code), __VA_ARGS__)
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -309,7 +323,8 @@ _PyInstruction_GetLength(PyCodeObject *code, int offset)
|
|||
{
|
||||
ASSERT_WORLD_STOPPED_OR_LOCKED(code);
|
||||
|
||||
int opcode = _PyCode_CODE(code)[offset].op.code;
|
||||
int opcode =
|
||||
FT_ATOMIC_LOAD_UINT8_RELAXED(_PyCode_CODE(code)[offset].op.code);
|
||||
assert(opcode != 0);
|
||||
assert(opcode != RESERVED);
|
||||
if (opcode == INSTRUMENTED_LINE) {
|
||||
|
@ -578,7 +593,9 @@ sanity_check_instrumentation(PyCodeObject *code)
|
|||
_Py_CODEUNIT
|
||||
_Py_GetBaseCodeUnit(PyCodeObject *code, int i)
|
||||
{
|
||||
_Py_CODEUNIT inst = _PyCode_CODE(code)[i];
|
||||
_Py_CODEUNIT *src_instr = _PyCode_CODE(code) + i;
|
||||
_Py_CODEUNIT inst = {
|
||||
.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t *)src_instr)};
|
||||
int opcode = inst.op.code;
|
||||
if (opcode < MIN_INSTRUMENTED_OPCODE) {
|
||||
inst.op.code = _PyOpcode_Deopt[opcode];
|
||||
|
@ -614,21 +631,22 @@ _Py_GetBaseCodeUnit(PyCodeObject *code, int i)
|
|||
}
|
||||
|
||||
static void
|
||||
de_instrument(PyCodeObject *code, int i, int event)
|
||||
de_instrument(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i,
|
||||
int event)
|
||||
{
|
||||
assert(event != PY_MONITORING_EVENT_INSTRUCTION);
|
||||
assert(event != PY_MONITORING_EVENT_LINE);
|
||||
|
||||
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
|
||||
_Py_CODEUNIT *instr = &bytecode[i];
|
||||
uint8_t *opcode_ptr = &instr->op.code;
|
||||
int opcode = *opcode_ptr;
|
||||
assert(opcode != ENTER_EXECUTOR);
|
||||
if (opcode == INSTRUMENTED_LINE) {
|
||||
opcode_ptr = &code->_co_monitoring->lines[i].original_opcode;
|
||||
opcode_ptr = &monitoring->lines[i].original_opcode;
|
||||
opcode = *opcode_ptr;
|
||||
}
|
||||
if (opcode == INSTRUMENTED_INSTRUCTION) {
|
||||
opcode_ptr = &code->_co_monitoring->per_instruction_opcodes[i];
|
||||
opcode_ptr = &monitoring->per_instruction_opcodes[i];
|
||||
opcode = *opcode_ptr;
|
||||
}
|
||||
int deinstrumented = DE_INSTRUMENT[opcode];
|
||||
|
@ -644,65 +662,68 @@ de_instrument(PyCodeObject *code, int i, int event)
|
|||
}
|
||||
|
||||
static void
|
||||
de_instrument_line(PyCodeObject *code, int i)
|
||||
de_instrument_line(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring,
|
||||
int i)
|
||||
{
|
||||
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
|
||||
_Py_CODEUNIT *instr = &bytecode[i];
|
||||
int opcode = instr->op.code;
|
||||
if (opcode != INSTRUMENTED_LINE) {
|
||||
return;
|
||||
}
|
||||
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i];
|
||||
_PyCoLineInstrumentationData *lines = &monitoring->lines[i];
|
||||
int original_opcode = lines->original_opcode;
|
||||
if (original_opcode == INSTRUMENTED_INSTRUCTION) {
|
||||
lines->original_opcode = code->_co_monitoring->per_instruction_opcodes[i];
|
||||
lines->original_opcode = monitoring->per_instruction_opcodes[i];
|
||||
}
|
||||
CHECK(original_opcode != 0);
|
||||
CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]);
|
||||
instr->op.code = original_opcode;
|
||||
FT_ATOMIC_STORE_UINT8(instr->op.code, original_opcode);
|
||||
if (_PyOpcode_Caches[original_opcode]) {
|
||||
instr[1].counter = adaptive_counter_warmup();
|
||||
FT_ATOMIC_STORE_UINT16_RELAXED(instr[1].counter.value_and_backoff,
|
||||
adaptive_counter_warmup().value_and_backoff);
|
||||
}
|
||||
assert(instr->op.code != INSTRUMENTED_LINE);
|
||||
}
|
||||
|
||||
static void
|
||||
de_instrument_per_instruction(PyCodeObject *code, int i)
|
||||
de_instrument_per_instruction(_Py_CODEUNIT *bytecode,
|
||||
_PyCoMonitoringData *monitoring, int i)
|
||||
{
|
||||
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
|
||||
_Py_CODEUNIT *instr = &bytecode[i];
|
||||
uint8_t *opcode_ptr = &instr->op.code;
|
||||
int opcode = *opcode_ptr;
|
||||
if (opcode == INSTRUMENTED_LINE) {
|
||||
opcode_ptr = &code->_co_monitoring->lines[i].original_opcode;
|
||||
opcode_ptr = &monitoring->lines[i].original_opcode;
|
||||
opcode = *opcode_ptr;
|
||||
}
|
||||
if (opcode != INSTRUMENTED_INSTRUCTION) {
|
||||
return;
|
||||
}
|
||||
int original_opcode = code->_co_monitoring->per_instruction_opcodes[i];
|
||||
int original_opcode = monitoring->per_instruction_opcodes[i];
|
||||
CHECK(original_opcode != 0);
|
||||
CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]);
|
||||
*opcode_ptr = original_opcode;
|
||||
FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, original_opcode);
|
||||
if (_PyOpcode_Caches[original_opcode]) {
|
||||
instr[1].counter = adaptive_counter_warmup();
|
||||
FT_ATOMIC_STORE_UINT16_RELAXED(instr[1].counter.value_and_backoff,
|
||||
adaptive_counter_warmup().value_and_backoff);
|
||||
}
|
||||
assert(*opcode_ptr != INSTRUMENTED_INSTRUCTION);
|
||||
assert(instr->op.code != INSTRUMENTED_INSTRUCTION);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
instrument(PyCodeObject *code, int i)
|
||||
instrument(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i)
|
||||
{
|
||||
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
|
||||
_Py_CODEUNIT *instr = &bytecode[i];
|
||||
uint8_t *opcode_ptr = &instr->op.code;
|
||||
int opcode =*opcode_ptr;
|
||||
if (opcode == INSTRUMENTED_LINE) {
|
||||
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i];
|
||||
_PyCoLineInstrumentationData *lines = &monitoring->lines[i];
|
||||
opcode_ptr = &lines->original_opcode;
|
||||
opcode = *opcode_ptr;
|
||||
}
|
||||
if (opcode == INSTRUMENTED_INSTRUCTION) {
|
||||
opcode_ptr = &code->_co_monitoring->per_instruction_opcodes[i];
|
||||
opcode_ptr = &monitoring->per_instruction_opcodes[i];
|
||||
opcode = *opcode_ptr;
|
||||
CHECK(opcode != INSTRUMENTED_INSTRUCTION && opcode != INSTRUMENTED_LINE);
|
||||
CHECK(opcode == _PyOpcode_Deopt[opcode]);
|
||||
|
@ -716,52 +737,52 @@ instrument(PyCodeObject *code, int i)
|
|||
if (_PyOpcode_Caches[deopt]) {
|
||||
FT_ATOMIC_STORE_UINT16_RELAXED(instr[1].counter.value_and_backoff,
|
||||
adaptive_counter_warmup().value_and_backoff);
|
||||
instr[1].counter = adaptive_counter_warmup();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
instrument_line(PyCodeObject *code, int i)
|
||||
instrument_line(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i)
|
||||
{
|
||||
uint8_t *opcode_ptr = &_PyCode_CODE(code)[i].op.code;
|
||||
uint8_t *opcode_ptr = &bytecode[i].op.code;
|
||||
int opcode = *opcode_ptr;
|
||||
if (opcode == INSTRUMENTED_LINE) {
|
||||
return;
|
||||
}
|
||||
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i];
|
||||
_PyCoLineInstrumentationData *lines = &monitoring->lines[i];
|
||||
lines->original_opcode = _PyOpcode_Deopt[opcode];
|
||||
CHECK(lines->original_opcode > 0);
|
||||
*opcode_ptr = INSTRUMENTED_LINE;
|
||||
FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_LINE);
|
||||
}
|
||||
|
||||
static void
|
||||
instrument_per_instruction(PyCodeObject *code, int i)
|
||||
instrument_per_instruction(_Py_CODEUNIT *bytecode,
|
||||
_PyCoMonitoringData *monitoring, int i)
|
||||
{
|
||||
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
|
||||
_Py_CODEUNIT *instr = &bytecode[i];
|
||||
uint8_t *opcode_ptr = &instr->op.code;
|
||||
int opcode = *opcode_ptr;
|
||||
if (opcode == INSTRUMENTED_LINE) {
|
||||
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i];
|
||||
_PyCoLineInstrumentationData *lines = &monitoring->lines[i];
|
||||
opcode_ptr = &lines->original_opcode;
|
||||
opcode = *opcode_ptr;
|
||||
}
|
||||
if (opcode == INSTRUMENTED_INSTRUCTION) {
|
||||
assert(code->_co_monitoring->per_instruction_opcodes[i] > 0);
|
||||
assert(monitoring->per_instruction_opcodes[i] > 0);
|
||||
return;
|
||||
}
|
||||
CHECK(opcode != 0);
|
||||
if (is_instrumented(opcode)) {
|
||||
code->_co_monitoring->per_instruction_opcodes[i] = opcode;
|
||||
monitoring->per_instruction_opcodes[i] = opcode;
|
||||
}
|
||||
else {
|
||||
assert(opcode != 0);
|
||||
assert(_PyOpcode_Deopt[opcode] != 0);
|
||||
assert(_PyOpcode_Deopt[opcode] != RESUME);
|
||||
code->_co_monitoring->per_instruction_opcodes[i] = _PyOpcode_Deopt[opcode];
|
||||
monitoring->per_instruction_opcodes[i] = _PyOpcode_Deopt[opcode];
|
||||
}
|
||||
assert(code->_co_monitoring->per_instruction_opcodes[i] > 0);
|
||||
*opcode_ptr = INSTRUMENTED_INSTRUCTION;
|
||||
assert(monitoring->per_instruction_opcodes[i] > 0);
|
||||
FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_INSTRUCTION);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -773,19 +794,19 @@ remove_tools(PyCodeObject * code, int offset, int event, int tools)
|
|||
assert(PY_MONITORING_IS_INSTRUMENTED_EVENT(event));
|
||||
assert(opcode_has_event(_Py_GetBaseCodeUnit(code, offset).op.code));
|
||||
_PyCoMonitoringData *monitoring = code->_co_monitoring;
|
||||
bool should_de_instrument;
|
||||
if (monitoring && monitoring->tools) {
|
||||
monitoring->tools[offset] &= ~tools;
|
||||
if (monitoring->tools[offset] == 0) {
|
||||
de_instrument(code, offset, event);
|
||||
}
|
||||
should_de_instrument = (monitoring->tools[offset] == 0);
|
||||
}
|
||||
else {
|
||||
/* Single tool */
|
||||
uint8_t single_tool = code->_co_monitoring->active_monitors.tools[event];
|
||||
assert(_Py_popcount32(single_tool) <= 1);
|
||||
if (((single_tool & tools) == single_tool)) {
|
||||
de_instrument(code, offset, event);
|
||||
}
|
||||
should_de_instrument = ((single_tool & tools) == single_tool);
|
||||
}
|
||||
if (should_de_instrument) {
|
||||
MODIFY_BYTECODE(code, de_instrument, monitoring, offset, event);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -804,22 +825,23 @@ remove_line_tools(PyCodeObject * code, int offset, int tools)
|
|||
{
|
||||
ASSERT_WORLD_STOPPED_OR_LOCKED(code);
|
||||
|
||||
assert(code->_co_monitoring);
|
||||
if (code->_co_monitoring->line_tools)
|
||||
_PyCoMonitoringData *monitoring = code->_co_monitoring;
|
||||
assert(monitoring);
|
||||
bool should_de_instrument;
|
||||
if (monitoring->line_tools)
|
||||
{
|
||||
uint8_t *toolsptr = &code->_co_monitoring->line_tools[offset];
|
||||
uint8_t *toolsptr = &monitoring->line_tools[offset];
|
||||
*toolsptr &= ~tools;
|
||||
if (*toolsptr == 0 ) {
|
||||
de_instrument_line(code, offset);
|
||||
}
|
||||
should_de_instrument = (*toolsptr == 0);
|
||||
}
|
||||
else {
|
||||
/* Single tool */
|
||||
uint8_t single_tool = code->_co_monitoring->active_monitors.tools[PY_MONITORING_EVENT_LINE];
|
||||
uint8_t single_tool = monitoring->active_monitors.tools[PY_MONITORING_EVENT_LINE];
|
||||
assert(_Py_popcount32(single_tool) <= 1);
|
||||
if (((single_tool & tools) == single_tool)) {
|
||||
de_instrument_line(code, offset);
|
||||
}
|
||||
should_de_instrument = ((single_tool & tools) == single_tool);
|
||||
}
|
||||
if (should_de_instrument) {
|
||||
MODIFY_BYTECODE(code, de_instrument_line, monitoring, offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -841,7 +863,7 @@ add_tools(PyCodeObject * code, int offset, int event, int tools)
|
|||
assert(_Py_popcount32(tools) == 1);
|
||||
assert(tools_is_subset_for_event(code, event, tools));
|
||||
}
|
||||
instrument(code, offset);
|
||||
MODIFY_BYTECODE(code, instrument, code->_co_monitoring, offset);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -858,7 +880,7 @@ add_line_tools(PyCodeObject * code, int offset, int tools)
|
|||
/* Single tool */
|
||||
assert(_Py_popcount32(tools) == 1);
|
||||
}
|
||||
instrument_line(code, offset);
|
||||
MODIFY_BYTECODE(code, instrument_line, code->_co_monitoring, offset);
|
||||
}
|
||||
|
||||
|
||||
|
@ -876,7 +898,7 @@ add_per_instruction_tools(PyCodeObject * code, int offset, int tools)
|
|||
/* Single tool */
|
||||
assert(_Py_popcount32(tools) == 1);
|
||||
}
|
||||
instrument_per_instruction(code, offset);
|
||||
MODIFY_BYTECODE(code, instrument_per_instruction, code->_co_monitoring, offset);
|
||||
}
|
||||
|
||||
|
||||
|
@ -885,21 +907,22 @@ remove_per_instruction_tools(PyCodeObject * code, int offset, int tools)
|
|||
{
|
||||
ASSERT_WORLD_STOPPED_OR_LOCKED(code);
|
||||
|
||||
_PyCoMonitoringData *monitoring = code->_co_monitoring;
|
||||
assert(code->_co_monitoring);
|
||||
bool should_de_instrument;
|
||||
if (code->_co_monitoring->per_instruction_tools) {
|
||||
uint8_t *toolsptr = &code->_co_monitoring->per_instruction_tools[offset];
|
||||
*toolsptr &= ~tools;
|
||||
if (*toolsptr == 0) {
|
||||
de_instrument_per_instruction(code, offset);
|
||||
}
|
||||
should_de_instrument = (*toolsptr == 0);
|
||||
}
|
||||
else {
|
||||
/* Single tool */
|
||||
uint8_t single_tool = code->_co_monitoring->active_monitors.tools[PY_MONITORING_EVENT_INSTRUCTION];
|
||||
assert(_Py_popcount32(single_tool) <= 1);
|
||||
if (((single_tool & tools) == single_tool)) {
|
||||
de_instrument_per_instruction(code, offset);
|
||||
}
|
||||
should_de_instrument = ((single_tool & tools) == single_tool);
|
||||
}
|
||||
if (should_de_instrument) {
|
||||
MODIFY_BYTECODE(code, de_instrument_per_instruction, monitoring, offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1087,7 +1110,7 @@ call_instrumentation_vector(
|
|||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
assert(args[1] == NULL);
|
||||
args[1] = (PyObject *)code;
|
||||
int offset = (int)(instr - _PyCode_CODE(code));
|
||||
int offset = (int)(instr - _PyFrame_GetBytecode(frame));
|
||||
/* Offset visible to user should be the offset in bytes, as that is the
|
||||
* convention for APIs involving code offsets. */
|
||||
int bytes_offset = offset * (int)sizeof(_Py_CODEUNIT);
|
||||
|
@ -1173,8 +1196,7 @@ _Py_call_instrumentation_jump(
|
|||
assert(event == PY_MONITORING_EVENT_JUMP ||
|
||||
event == PY_MONITORING_EVENT_BRANCH);
|
||||
assert(frame->instr_ptr == instr);
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
int to = (int)(target - _PyCode_CODE(code));
|
||||
int to = (int)(target - _PyFrame_GetBytecode(frame));
|
||||
PyObject *to_obj = PyLong_FromLong(to * (int)sizeof(_Py_CODEUNIT));
|
||||
if (to_obj == NULL) {
|
||||
return NULL;
|
||||
|
@ -1240,7 +1262,8 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame,
|
|||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
assert(tstate->tracing == 0);
|
||||
assert(debug_check_sanity(tstate->interp, code));
|
||||
int i = (int)(instr - _PyCode_CODE(code));
|
||||
_Py_CODEUNIT *bytecode = _PyFrame_GetBytecode(frame);
|
||||
int i = (int)(instr - bytecode);
|
||||
|
||||
_PyCoMonitoringData *monitoring = code->_co_monitoring;
|
||||
_PyCoLineInstrumentationData *line_data = &monitoring->lines[i];
|
||||
|
@ -1256,10 +1279,10 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame,
|
|||
line = compute_line(code, i, line_delta);
|
||||
assert(line >= 0);
|
||||
assert(prev != NULL);
|
||||
int prev_index = (int)(prev - _PyCode_CODE(code));
|
||||
int prev_index = (int)(prev - bytecode);
|
||||
int prev_line = _Py_Instrumentation_GetLine(code, prev_index);
|
||||
if (prev_line == line) {
|
||||
int prev_opcode = _PyCode_CODE(code)[prev_index].op.code;
|
||||
int prev_opcode = bytecode[prev_index].op.code;
|
||||
/* RESUME and INSTRUMENTED_RESUME are needed for the operation of
|
||||
* instrumentation, so must never be hidden by an INSTRUMENTED_LINE.
|
||||
*/
|
||||
|
@ -1359,7 +1382,7 @@ int
|
|||
_Py_call_instrumentation_instruction(PyThreadState *tstate, _PyInterpreterFrame* frame, _Py_CODEUNIT *instr)
|
||||
{
|
||||
PyCodeObject *code = _PyFrame_GetCode(frame);
|
||||
int offset = (int)(instr - _PyCode_CODE(code));
|
||||
int offset = (int)(instr - _PyFrame_GetBytecode(frame));
|
||||
_PyCoMonitoringData *instrumentation_data = code->_co_monitoring;
|
||||
assert(instrumentation_data->per_instruction_opcodes);
|
||||
int next_opcode = instrumentation_data->per_instruction_opcodes[offset];
|
||||
|
|
2
Python/optimizer_cases.c.h
generated
2
Python/optimizer_cases.c.h
generated
|
@ -17,6 +17,8 @@
|
|||
|
||||
/* _QUICKEN_RESUME is not a viable micro-op for tier 2 */
|
||||
|
||||
/* _LOAD_BYTECODE is not a viable micro-op for tier 2 */
|
||||
|
||||
case _RESUME_CHECK: {
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -1513,6 +1513,11 @@ new_threadstate(PyInterpreterState *interp, int whence)
|
|||
PyMem_RawFree(new_tstate);
|
||||
return NULL;
|
||||
}
|
||||
int32_t tlbc_idx = _Py_ReserveTLBCIndex(interp);
|
||||
if (tlbc_idx < 0) {
|
||||
PyMem_RawFree(new_tstate);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* We serialize concurrent creation to protect global state. */
|
||||
|
@ -1555,6 +1560,7 @@ new_threadstate(PyInterpreterState *interp, int whence)
|
|||
#ifdef Py_GIL_DISABLED
|
||||
// Must be called with lock unlocked to avoid lock ordering deadlocks.
|
||||
_Py_qsbr_register(tstate, interp, qsbr_idx);
|
||||
tstate->tlbc_index = tlbc_idx;
|
||||
#endif
|
||||
|
||||
return (PyThreadState *)tstate;
|
||||
|
@ -1706,6 +1712,10 @@ PyThreadState_Clear(PyThreadState *tstate)
|
|||
|
||||
// Remove ourself from the biased reference counting table of threads.
|
||||
_Py_brc_remove_thread(tstate);
|
||||
|
||||
// Release our thread-local copies of the bytecode for reuse by another
|
||||
// thread
|
||||
_Py_ClearTLBCIndex((_PyThreadStateImpl *)tstate);
|
||||
#endif
|
||||
|
||||
// Merge our queue of pointers to be freed into the interpreter queue.
|
||||
|
|
|
@ -24,6 +24,25 @@ extern const char *_PyUOpName(int index);
|
|||
* ./adaptive.md
|
||||
*/
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
#define SET_OPCODE_OR_RETURN(instr, opcode) \
|
||||
do { \
|
||||
uint8_t old_op = _Py_atomic_load_uint8_relaxed(&(instr)->op.code); \
|
||||
if (old_op >= MIN_INSTRUMENTED_OPCODE) { \
|
||||
/* Lost race with instrumentation */ \
|
||||
return; \
|
||||
} \
|
||||
if (!_Py_atomic_compare_exchange_uint8(&(instr)->op.code, &old_op, \
|
||||
(opcode))) { \
|
||||
/* Lost race with instrumentation */ \
|
||||
assert(old_op >= MIN_INSTRUMENTED_OPCODE); \
|
||||
return; \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define SET_OPCODE_OR_RETURN(instr, opcode) (instr)->op.code = (opcode)
|
||||
#endif
|
||||
|
||||
#ifdef Py_STATS
|
||||
GCStats _py_gc_stats[NUM_GENERATIONS] = { 0 };
|
||||
static PyStats _Py_stats_struct = { .gc_stats = _py_gc_stats };
|
||||
|
@ -436,16 +455,25 @@ do { \
|
|||
# define SPECIALIZATION_FAIL(opcode, kind) ((void)0)
|
||||
#endif
|
||||
|
||||
// Initialize warmup counters and insert superinstructions. This cannot fail.
|
||||
// Initialize warmup counters and optimize instructions. This cannot fail.
|
||||
void
|
||||
_PyCode_Quicken(PyCodeObject *code)
|
||||
_PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, PyObject *consts,
|
||||
int enable_counters)
|
||||
{
|
||||
#if ENABLE_SPECIALIZATION
|
||||
#if ENABLE_SPECIALIZATION_FT
|
||||
_Py_BackoffCounter jump_counter, adaptive_counter;
|
||||
if (enable_counters) {
|
||||
jump_counter = initial_jump_backoff_counter();
|
||||
adaptive_counter = adaptive_counter_warmup();
|
||||
}
|
||||
else {
|
||||
jump_counter = initial_unreachable_backoff_counter();
|
||||
adaptive_counter = initial_unreachable_backoff_counter();
|
||||
}
|
||||
int opcode = 0;
|
||||
int oparg = 0;
|
||||
_Py_CODEUNIT *instructions = _PyCode_CODE(code);
|
||||
/* The last code unit cannot have a cache, so we don't need to check it */
|
||||
for (int i = 0; i < Py_SIZE(code)-1; i++) {
|
||||
for (Py_ssize_t i = 0; i < size-1; i++) {
|
||||
opcode = instructions[i].op.code;
|
||||
int caches = _PyOpcode_Caches[opcode];
|
||||
oparg = (oparg << 8) | instructions[i].op.arg;
|
||||
|
@ -453,7 +481,7 @@ _PyCode_Quicken(PyCodeObject *code)
|
|||
// The initial value depends on the opcode
|
||||
switch (opcode) {
|
||||
case JUMP_BACKWARD:
|
||||
instructions[i + 1].counter = initial_jump_backoff_counter();
|
||||
instructions[i + 1].counter = jump_counter;
|
||||
break;
|
||||
case POP_JUMP_IF_FALSE:
|
||||
case POP_JUMP_IF_TRUE:
|
||||
|
@ -462,7 +490,7 @@ _PyCode_Quicken(PyCodeObject *code)
|
|||
instructions[i + 1].cache = 0x5555; // Alternating 0, 1 bits
|
||||
break;
|
||||
default:
|
||||
instructions[i + 1].counter = adaptive_counter_warmup();
|
||||
instructions[i + 1].counter = adaptive_counter;
|
||||
break;
|
||||
}
|
||||
i += caches;
|
||||
|
@ -471,7 +499,7 @@ _PyCode_Quicken(PyCodeObject *code)
|
|||
/* We can't do this in the bytecode compiler as
|
||||
* marshalling can intern strings and make them immortal. */
|
||||
|
||||
PyObject *obj = PyTuple_GET_ITEM(code->co_consts, oparg);
|
||||
PyObject *obj = PyTuple_GET_ITEM(consts, oparg);
|
||||
if (_Py_IsImmortal(obj)) {
|
||||
instructions[i].op.code = LOAD_CONST_IMMORTAL;
|
||||
}
|
||||
|
@ -480,7 +508,7 @@ _PyCode_Quicken(PyCodeObject *code)
|
|||
oparg = 0;
|
||||
}
|
||||
}
|
||||
#endif /* ENABLE_SPECIALIZATION */
|
||||
#endif /* ENABLE_SPECIALIZATION_FT */
|
||||
}
|
||||
|
||||
#define SIMPLE_FUNCTION 0
|
||||
|
@ -2243,9 +2271,10 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in
|
|||
{
|
||||
PyObject *lhs = PyStackRef_AsPyObjectBorrow(lhs_st);
|
||||
PyObject *rhs = PyStackRef_AsPyObjectBorrow(rhs_st);
|
||||
assert(ENABLE_SPECIALIZATION);
|
||||
assert(ENABLE_SPECIALIZATION_FT);
|
||||
assert(_PyOpcode_Caches[BINARY_OP] == INLINE_CACHE_ENTRIES_BINARY_OP);
|
||||
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)(instr + 1);
|
||||
uint8_t specialized_op;
|
||||
switch (oparg) {
|
||||
case NB_ADD:
|
||||
case NB_INPLACE_ADD:
|
||||
|
@ -2256,18 +2285,18 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in
|
|||
_Py_CODEUNIT next = instr[INLINE_CACHE_ENTRIES_BINARY_OP + 1];
|
||||
bool to_store = (next.op.code == STORE_FAST);
|
||||
if (to_store && PyStackRef_AsPyObjectBorrow(locals[next.op.arg]) == lhs) {
|
||||
instr->op.code = BINARY_OP_INPLACE_ADD_UNICODE;
|
||||
specialized_op = BINARY_OP_INPLACE_ADD_UNICODE;
|
||||
goto success;
|
||||
}
|
||||
instr->op.code = BINARY_OP_ADD_UNICODE;
|
||||
specialized_op = BINARY_OP_ADD_UNICODE;
|
||||
goto success;
|
||||
}
|
||||
if (PyLong_CheckExact(lhs)) {
|
||||
instr->op.code = BINARY_OP_ADD_INT;
|
||||
specialized_op = BINARY_OP_ADD_INT;
|
||||
goto success;
|
||||
}
|
||||
if (PyFloat_CheckExact(lhs)) {
|
||||
instr->op.code = BINARY_OP_ADD_FLOAT;
|
||||
specialized_op = BINARY_OP_ADD_FLOAT;
|
||||
goto success;
|
||||
}
|
||||
break;
|
||||
|
@ -2277,11 +2306,11 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in
|
|||
break;
|
||||
}
|
||||
if (PyLong_CheckExact(lhs)) {
|
||||
instr->op.code = BINARY_OP_MULTIPLY_INT;
|
||||
specialized_op = BINARY_OP_MULTIPLY_INT;
|
||||
goto success;
|
||||
}
|
||||
if (PyFloat_CheckExact(lhs)) {
|
||||
instr->op.code = BINARY_OP_MULTIPLY_FLOAT;
|
||||
specialized_op = BINARY_OP_MULTIPLY_FLOAT;
|
||||
goto success;
|
||||
}
|
||||
break;
|
||||
|
@ -2291,22 +2320,23 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in
|
|||
break;
|
||||
}
|
||||
if (PyLong_CheckExact(lhs)) {
|
||||
instr->op.code = BINARY_OP_SUBTRACT_INT;
|
||||
specialized_op = BINARY_OP_SUBTRACT_INT;
|
||||
goto success;
|
||||
}
|
||||
if (PyFloat_CheckExact(lhs)) {
|
||||
instr->op.code = BINARY_OP_SUBTRACT_FLOAT;
|
||||
specialized_op = BINARY_OP_SUBTRACT_FLOAT;
|
||||
goto success;
|
||||
}
|
||||
break;
|
||||
}
|
||||
SPECIALIZATION_FAIL(BINARY_OP, binary_op_fail_kind(oparg, lhs, rhs));
|
||||
STAT_INC(BINARY_OP, failure);
|
||||
instr->op.code = BINARY_OP;
|
||||
SET_OPCODE_OR_RETURN(instr, BINARY_OP);
|
||||
cache->counter = adaptive_counter_backoff(cache->counter);
|
||||
return;
|
||||
success:
|
||||
STAT_INC(BINARY_OP, success);
|
||||
SET_OPCODE_OR_RETURN(instr, specialized_op);
|
||||
cache->counter = adaptive_counter_cooldown();
|
||||
}
|
||||
|
||||
|
|
|
@ -2174,6 +2174,11 @@ sys__clear_internal_caches_impl(PyObject *module)
|
|||
#ifdef _Py_TIER2
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
_Py_Executors_InvalidateAll(interp, 0);
|
||||
#endif
|
||||
#ifdef Py_GIL_DISABLED
|
||||
if (_Py_ClearUnusedTLBC(_PyInterpreterState_GET()) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
PyType_ClearCache();
|
||||
Py_RETURN_NONE;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue