gh-123923: Defer refcounting for f_executable in _PyInterpreterFrame (#123924)

Use a `_PyStackRef` and defer the reference to `f_executable` when
possible. This avoids some reference count contention in the common case
of executing the same code object from multiple threads concurrently in
the free-threaded build.
This commit is contained in:
Sam Gross 2024-09-12 12:37:06 -04:00 committed by GitHub
parent 4ed7d1d6ac
commit b2afe2aae4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 177 additions and 99 deletions

View file

@ -3598,7 +3598,7 @@ dummy_func(
op(_CREATE_INIT_FRAME, (self, init, args[oparg] -- init_frame: _PyInterpreterFrame *)) {
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
assert(_PyCode_CODE((PyCodeObject *)shim->f_executable)[0].op.code == EXIT_INIT_CHECK);
assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK);
/* Push self onto stack of shim */
shim->localsplus[0] = PyStackRef_DUP(self);
PyFunctionObject *init_func = (PyFunctionObject *)PyStackRef_AsPyObjectSteal(init);
@ -4798,7 +4798,7 @@ dummy_func(
#endif
_PyExecutorObject *executor;
if (target->op.code == ENTER_EXECUTOR) {
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
PyCodeObject *code = _PyFrame_GetCode(frame);
executor = code->co_executors->executors[target->op.arg];
Py_INCREF(executor);
}

View file

@ -194,7 +194,7 @@ static void
lltrace_resume_frame(_PyInterpreterFrame *frame)
{
PyObject *fobj = frame->f_funcobj;
if (!PyCode_Check(frame->f_executable) ||
if (!PyStackRef_CodeCheck(frame->f_executable) ||
fobj == NULL ||
!PyFunction_Check(fobj)
) {
@ -784,7 +784,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
entry_frame.f_globals = (PyObject*)0xaaa3;
entry_frame.f_builtins = (PyObject*)0xaaa4;
#endif
entry_frame.f_executable = Py_None;
entry_frame.f_executable = PyStackRef_None;
entry_frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1;
entry_frame.stackpointer = entry_frame.localsplus;
entry_frame.owner = FRAME_OWNED_BY_CSTACK;
@ -1681,7 +1681,7 @@ clear_thread_frame(PyThreadState *tstate, _PyInterpreterFrame * frame)
tstate->c_recursion_remaining--;
assert(frame->frame_obj == NULL || frame->frame_obj->f_frame == frame);
_PyFrame_ClearExceptCode(frame);
Py_DECREF(frame->f_executable);
PyStackRef_CLEAR(frame->f_executable);
tstate->c_recursion_remaining++;
_PyThreadState_PopFrame(tstate, frame);
}

View file

@ -4143,7 +4143,7 @@
self = stack_pointer[-2 - oparg];
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
assert(_PyCode_CODE((PyCodeObject *)shim->f_executable)[0].op.code == EXIT_INIT_CHECK);
assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK);
/* Push self onto stack of shim */
shim->localsplus[0] = PyStackRef_DUP(self);
PyFunctionObject *init_func = (PyFunctionObject *)PyStackRef_AsPyObjectSteal(init);
@ -5413,7 +5413,7 @@
#endif
_PyExecutorObject *executor;
if (target->op.code == ENTER_EXECUTOR) {
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
PyCodeObject *code = _PyFrame_GetCode(frame);
executor = code->co_executors->executors[target->op.arg];
Py_INCREF(executor);
}

View file

@ -14,7 +14,10 @@ _PyFrame_Traverse(_PyInterpreterFrame *frame, visitproc visit, void *arg)
Py_VISIT(frame->frame_obj);
Py_VISIT(frame->f_locals);
Py_VISIT(frame->f_funcobj);
Py_VISIT(_PyFrame_GetCode(frame));
int err = _PyGC_VisitStackRef(&frame->f_executable, visit, arg);
if (err) {
return err;
}
return _PyGC_VisitFrameStack(frame, visit, arg);
}
@ -53,10 +56,10 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame)
assert(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT);
assert(frame->owner != FRAME_CLEARED);
Py_ssize_t size = ((char*)frame->stackpointer) - (char *)frame;
Py_INCREF(_PyFrame_GetCode(frame));
memcpy((_PyInterpreterFrame *)f->_f_frame_data, frame, size);
frame = (_PyInterpreterFrame *)f->_f_frame_data;
frame->stackpointer = (_PyStackRef *)(((char *)frame) + size);
frame->f_executable = PyStackRef_DUP(frame->f_executable);
f->f_frame = frame;
frame->owner = FRAME_OWNED_BY_FRAME_OBJECT;
if (_PyFrame_IsIncomplete(frame)) {
@ -131,9 +134,7 @@ _PyFrame_ClearExceptCode(_PyInterpreterFrame *frame)
PyObject *
PyUnstable_InterpreterFrame_GetCode(struct _PyInterpreterFrame *frame)
{
PyObject *code = frame->f_executable;
Py_INCREF(code);
return code;
return PyStackRef_AsPyObjectNew(frame->f_executable);
}
int

View file

@ -534,6 +534,13 @@ visit_decref(PyObject *op, void *parent)
return 0;
}
int
_PyGC_VisitStackRef(_PyStackRef *ref, visitproc visit, void *arg)
{
Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref));
return 0;
}
int
_PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg)
{

View file

@ -161,39 +161,6 @@ gc_decref(PyObject *op)
op->ob_tid -= 1;
}
static void
disable_deferred_refcounting(PyObject *op)
{
if (!_PyObject_HasDeferredRefcount(op)) {
return;
}
op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED;
op->ob_ref_shared -= _Py_REF_SHARED(_Py_REF_DEFERRED, 0);
if (PyType_Check(op)) {
// Disable thread-local refcounting for heap types
PyTypeObject *type = (PyTypeObject *)op;
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
_PyType_ReleaseId((PyHeapTypeObject *)op);
}
}
else if (PyGen_CheckExact(op) || PyCoro_CheckExact(op) || PyAsyncGen_CheckExact(op)) {
// Ensure any non-refcounted pointers in locals are converted to
// strong references. This ensures that the generator/coroutine is not
// freed before its locals.
PyGenObject *gen = (PyGenObject *)op;
struct _PyInterpreterFrame *frame = &gen->gi_iframe;
assert(frame->stackpointer != NULL);
for (_PyStackRef *ref = frame->localsplus; ref < frame->stackpointer; ref++) {
if (!PyStackRef_IsNull(*ref) && PyStackRef_IsDeferred(*ref)) {
// Convert a deferred reference to a strong reference.
*ref = PyStackRef_FromPyObjectSteal(PyStackRef_AsPyObjectSteal(*ref));
}
}
}
}
static Py_ssize_t
merge_refcount(PyObject *op, Py_ssize_t extra)
{
@ -213,6 +180,51 @@ merge_refcount(PyObject *op, Py_ssize_t extra)
return refcount;
}
static void
frame_disable_deferred_refcounting(_PyInterpreterFrame *frame)
{
// Convert locals, variables, and the executable object to strong
// references from (possibly) deferred references.
assert(frame->stackpointer != NULL);
frame->f_executable = PyStackRef_AsStrongReference(frame->f_executable);
for (_PyStackRef *ref = frame->localsplus; ref < frame->stackpointer; ref++) {
if (!PyStackRef_IsNull(*ref) && PyStackRef_IsDeferred(*ref)) {
*ref = PyStackRef_AsStrongReference(*ref);
}
}
}
static void
disable_deferred_refcounting(PyObject *op)
{
if (_PyObject_HasDeferredRefcount(op)) {
op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED;
op->ob_ref_shared -= _Py_REF_SHARED(_Py_REF_DEFERRED, 0);
merge_refcount(op, 0);
}
// Heap types also use thread-local refcounting -- disable it here.
if (PyType_Check(op)) {
// Disable thread-local refcounting for heap types
PyTypeObject *type = (PyTypeObject *)op;
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
_PyType_ReleaseId((PyHeapTypeObject *)op);
}
}
// Generators and frame objects may contain deferred references to other
// objects. If the pointed-to objects are part of cyclic trash, we may
// have disabled deferred refcounting on them and need to ensure that we
// use strong references, in case the generator or frame object is
// resurrected by a finalizer.
if (PyGen_CheckExact(op) || PyCoro_CheckExact(op) || PyAsyncGen_CheckExact(op)) {
frame_disable_deferred_refcounting(&((PyGenObject *)op)->gi_iframe);
}
else if (PyFrame_Check(op)) {
frame_disable_deferred_refcounting(((PyFrameObject *)op)->f_frame);
}
}
static void
gc_restore_tid(PyObject *op)
{
@ -349,16 +361,18 @@ gc_visit_thread_stacks(PyInterpreterState *interp)
{
HEAD_LOCK(&_PyRuntime);
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
_PyInterpreterFrame *f = p->current_frame;
while (f != NULL) {
if (f->f_executable != NULL && PyCode_Check(f->f_executable)) {
PyCodeObject *co = (PyCodeObject *)f->f_executable;
int max_stack = co->co_nlocalsplus + co->co_stacksize;
for (int i = 0; i < max_stack; i++) {
gc_visit_stackref(f->localsplus[i]);
}
for (_PyInterpreterFrame *f = p->current_frame; f != NULL; f = f->previous) {
PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable);
if (executable == NULL || !PyCode_Check(executable)) {
continue;
}
PyCodeObject *co = (PyCodeObject *)executable;
int max_stack = co->co_nlocalsplus + co->co_stacksize;
gc_visit_stackref(f->f_executable);
for (int i = 0; i < max_stack; i++) {
gc_visit_stackref(f->localsplus[i]);
}
f = f->previous;
}
}
HEAD_UNLOCK(&_PyRuntime);
@ -623,23 +637,19 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
else {
worklist_push(&state->unreachable, op);
}
return true;
}
else if (state->reason == _Py_GC_REASON_SHUTDOWN &&
_PyObject_HasDeferredRefcount(op))
{
if (state->reason == _Py_GC_REASON_SHUTDOWN) {
// Disable deferred refcounting for reachable objects as well during
// interpreter shutdown. This ensures that these objects are collected
// immediately when their last reference is removed.
disable_deferred_refcounting(op);
merge_refcount(op, 0);
state->long_lived_total++;
}
else {
// object is reachable, restore `ob_tid`; we're done with these objects
gc_restore_tid(op);
state->long_lived_total++;
}
// object is reachable, restore `ob_tid`; we're done with these objects
gc_restore_tid(op);
state->long_lived_total++;
return true;
}
@ -951,20 +961,29 @@ visit_decref_unreachable(PyObject *op, void *data)
return 0;
}
int
_PyGC_VisitStackRef(_PyStackRef *ref, visitproc visit, void *arg)
{
// This is a bit tricky! We want to ignore deferred references when
// computing the incoming references, but otherwise treat them like
// regular references.
if (!PyStackRef_IsDeferred(*ref) ||
(visit != visit_decref && visit != visit_decref_unreachable))
{
Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref));
}
return 0;
}
int
_PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg)
{
_PyStackRef *ref = _PyFrame_GetLocalsArray(frame);
/* locals and stack */
for (; ref < frame->stackpointer; ref++) {
// This is a bit tricky! We want to ignore deferred references when
// computing the incoming references, but otherwise treat them like
// regular references.
if (PyStackRef_IsDeferred(*ref) &&
(visit == visit_decref || visit == visit_decref_unreachable)) {
continue;
if (_PyGC_VisitStackRef(ref, visit, arg) < 0) {
return -1;
}
Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref));
}
return 0;
}

View file

@ -1018,7 +1018,7 @@
{
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
assert(_PyCode_CODE((PyCodeObject *)shim->f_executable)[0].op.code == EXIT_INIT_CHECK);
assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK);
/* Push self onto stack of shim */
shim->localsplus[0] = PyStackRef_DUP(self);
PyFunctionObject *init_func = (PyFunctionObject *)PyStackRef_AsPyObjectSteal(init);

View file

@ -143,9 +143,8 @@ _PyEval_SetOpcodeTrace(
bool enable
) {
assert(frame != NULL);
assert(PyCode_Check(frame->f_frame->f_executable));
PyCodeObject *code = (PyCodeObject *)frame->f_frame->f_executable;
PyCodeObject *code = _PyFrame_GetCode(frame->f_frame);
_PyMonitoringEventSet events = 0;
if (_PyMonitoring_GetLocalEvents(code, PY_MONITORING_SYS_TRACE_ID, &events) < 0) {

View file

@ -250,7 +250,7 @@ hashtable_compare_traceback(const void *key1, const void *key2)
static void
tracemalloc_get_frame(_PyInterpreterFrame *pyframe, frame_t *frame)
{
assert(PyCode_Check(pyframe->f_executable));
assert(PyStackRef_CodeCheck(pyframe->f_executable));
frame->filename = &_Py_STR(anon_unknown);
int lineno = PyUnstable_InterpreterFrame_GetLine(pyframe);
if (lineno < 0) {
@ -258,7 +258,7 @@ tracemalloc_get_frame(_PyInterpreterFrame *pyframe, frame_t *frame)
}
frame->lineno = (unsigned int)lineno;
PyObject *filename = filename = ((PyCodeObject *)pyframe->f_executable)->co_filename;
PyObject *filename = filename = _PyFrame_GetCode(pyframe)->co_filename;
if (filename == NULL) {
#ifdef TRACE_DEBUG