mirror of
https://github.com/python/cpython.git
synced 2025-07-08 03:45:36 +00:00
gh-115999: Enable specialization of CALL
instructions in free-threaded builds (#127123)
The CALL family of instructions were mostly thread-safe already and only required a small number of changes, which are documented below. A few changes were needed to make CALL_ALLOC_AND_ENTER_INIT thread-safe: Added _PyType_LookupRefAndVersion, which returns the type version corresponding to the returned ref. Added _PyType_CacheInitForSpecialization, which takes an init method and the corresponding type version and only populates the specialization cache if the current type version matches the supplied version. This prevents potentially caching a stale value in free-threaded builds if we race with an update to __init__. Only cache __init__ functions that are deferred in free-threaded builds. This ensures that the reference to __init__ that is stored in the specialization cache is valid if the type version guard in _CHECK_AND_ALLOCATE_OBJECT passes. Fix a bug in _CREATE_INIT_FRAME where the frame is pushed to the stack on failure. A few other miscellaneous changes were also needed: Use {LOCK,UNLOCK}_OBJECT in LIST_APPEND. This ensures that the list's per-object lock is held while we are appending to it. Add missing co_tlbc for _Py_InitCleanup. Stop/start the world around setting the eval frame hook. This allows us to read interp->eval_frame non-atomically and preserves the behavior of _CHECK_PEP_523 documented below.
This commit is contained in:
parent
fc5a0dc224
commit
dabcecfd6d
11 changed files with 220 additions and 92 deletions
|
@ -3329,7 +3329,7 @@ dummy_func(
|
|||
};
|
||||
|
||||
specializing op(_SPECIALIZE_CALL, (counter/1, callable[1], self_or_null[1], args[oparg] -- callable[1], self_or_null[1], args[oparg])) {
|
||||
#if ENABLE_SPECIALIZATION
|
||||
#if ENABLE_SPECIALIZATION_FT
|
||||
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
|
||||
next_instr = this_instr;
|
||||
_Py_Specialize_Call(callable[0], next_instr, oparg + !PyStackRef_IsNull(self_or_null[0]));
|
||||
|
@ -3337,7 +3337,7 @@ dummy_func(
|
|||
}
|
||||
OPCODE_DEFERRED_INC(CALL);
|
||||
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
|
||||
#endif /* ENABLE_SPECIALIZATION */
|
||||
#endif /* ENABLE_SPECIALIZATION_FT */
|
||||
}
|
||||
|
||||
op(_MAYBE_EXPAND_METHOD, (callable[1], self_or_null[1], args[oparg] -- func[1], maybe_self[1], args[oparg])) {
|
||||
|
@ -3722,10 +3722,10 @@ dummy_func(
|
|||
DEOPT_IF(!PyStackRef_IsNull(null[0]));
|
||||
DEOPT_IF(!PyType_Check(callable_o));
|
||||
PyTypeObject *tp = (PyTypeObject *)callable_o;
|
||||
DEOPT_IF(tp->tp_version_tag != type_version);
|
||||
DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version);
|
||||
assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES);
|
||||
PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o;
|
||||
PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init;
|
||||
PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init);
|
||||
PyCodeObject *code = (PyCodeObject *)init_func->func_code;
|
||||
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize));
|
||||
STAT_INC(CALL, hit);
|
||||
|
@ -3743,17 +3743,19 @@ dummy_func(
|
|||
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
|
||||
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
|
||||
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
|
||||
assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE);
|
||||
/* Push self onto stack of shim */
|
||||
shim->localsplus[0] = PyStackRef_DUP(self[0]);
|
||||
DEAD(init);
|
||||
DEAD(self);
|
||||
init_frame = _PyEvalFramePushAndInit(
|
||||
_PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
|
||||
tstate, init[0], NULL, args-1, oparg+1, NULL, shim);
|
||||
SYNC_SP();
|
||||
if (init_frame == NULL) {
|
||||
if (temp == NULL) {
|
||||
_PyEval_FrameClearAndPop(tstate, shim);
|
||||
ERROR_NO_POP();
|
||||
}
|
||||
init_frame = temp;
|
||||
frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL;
|
||||
/* Account for pushing the extra frame.
|
||||
* We don't check recursion depth here,
|
||||
|
@ -4000,8 +4002,10 @@ dummy_func(
|
|||
DEOPT_IF(callable_o != interp->callable_cache.list_append);
|
||||
assert(self_o != NULL);
|
||||
DEOPT_IF(!PyList_Check(self_o));
|
||||
DEOPT_IF(!LOCK_OBJECT(self_o));
|
||||
STAT_INC(CALL, hit);
|
||||
int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg));
|
||||
UNLOCK_OBJECT(self_o);
|
||||
PyStackRef_CLOSE(self);
|
||||
PyStackRef_CLOSE(callable);
|
||||
ERROR_IF(err, error);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue