gh-131586: Avoid refcount contention in some "special" calls (#131588)

In the free threaded build, the `_PyObject_LookupSpecial()` call can lead to
reference count contention on the returned function object becuase it
doesn't use stackrefs. Refactor some of the callers to use
`_PyObject_MaybeCallSpecialNoArgs`, which uses stackrefs internally.

This fixes the scaling bottleneck in the "lookup_special" microbenchmark
in `ftscalingbench.py`. However, the are still some uses of
`_PyObject_LookupSpecial()` that need to be addressed in future PRs.
This commit is contained in:
Sam Gross 2025-03-26 14:38:47 -04:00 committed by GitHub
parent 3d4ac1a2c2
commit 67fbfb42bd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 450 additions and 374 deletions

View file

@ -598,6 +598,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__bytes__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__call__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__cantrace__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__ceil__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__class__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__class_getitem__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__classcell__));
@ -622,6 +623,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__file__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__firstlineno__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__float__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__floor__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__floordiv__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__format__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__fspath__));
@ -727,6 +729,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__subclasscheck__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__subclasshook__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__truediv__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__trunc__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__type_params__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__typing_is_unpacked_typevartuple__));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__typing_prepare_subst__));

View file

@ -89,6 +89,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(__bytes__)
STRUCT_FOR_ID(__call__)
STRUCT_FOR_ID(__cantrace__)
STRUCT_FOR_ID(__ceil__)
STRUCT_FOR_ID(__class__)
STRUCT_FOR_ID(__class_getitem__)
STRUCT_FOR_ID(__classcell__)
@ -113,6 +114,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(__file__)
STRUCT_FOR_ID(__firstlineno__)
STRUCT_FOR_ID(__float__)
STRUCT_FOR_ID(__floor__)
STRUCT_FOR_ID(__floordiv__)
STRUCT_FOR_ID(__format__)
STRUCT_FOR_ID(__fspath__)
@ -218,6 +220,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(__subclasscheck__)
STRUCT_FOR_ID(__subclasshook__)
STRUCT_FOR_ID(__truediv__)
STRUCT_FOR_ID(__trunc__)
STRUCT_FOR_ID(__type_params__)
STRUCT_FOR_ID(__typing_is_unpacked_typevartuple__)
STRUCT_FOR_ID(__typing_prepare_subst__)

View file

@ -891,6 +891,12 @@ extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name,
extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *,
unsigned int *);
// Internal API to look for a name through the MRO.
// This stores a stack reference in out and returns the value of
// type->tp_version or zero if name is missing. It doesn't set an exception!
extern unsigned int
_PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out);
// Cache the provided init method in the specialization cache of type if the
// provided type version matches the current version of the type.
//
@ -946,6 +952,14 @@ extern int _PyObject_IsInstanceDictEmpty(PyObject *);
PyAPI_FUNC(PyObject*) _PyObject_LookupSpecial(PyObject *, PyObject *);
PyAPI_FUNC(PyObject*) _PyObject_LookupSpecialMethod(PyObject *self, PyObject *attr, PyObject **self_or_null);
// Calls the method named `attr` on `self`, but does not set an exception if
// the attribute does not exist.
PyAPI_FUNC(PyObject *)
_PyObject_MaybeCallSpecialNoArgs(PyObject *self, PyObject *attr);
PyAPI_FUNC(PyObject *)
_PyObject_MaybeCallSpecialOneArg(PyObject *self, PyObject *attr, PyObject *arg);
extern int _PyObject_IsAbstract(PyObject *);
PyAPI_FUNC(int) _PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method);

View file

@ -596,6 +596,7 @@ extern "C" {
INIT_ID(__bytes__), \
INIT_ID(__call__), \
INIT_ID(__cantrace__), \
INIT_ID(__ceil__), \
INIT_ID(__class__), \
INIT_ID(__class_getitem__), \
INIT_ID(__classcell__), \
@ -620,6 +621,7 @@ extern "C" {
INIT_ID(__file__), \
INIT_ID(__firstlineno__), \
INIT_ID(__float__), \
INIT_ID(__floor__), \
INIT_ID(__floordiv__), \
INIT_ID(__format__), \
INIT_ID(__fspath__), \
@ -725,6 +727,7 @@ extern "C" {
INIT_ID(__subclasscheck__), \
INIT_ID(__subclasshook__), \
INIT_ID(__truediv__), \
INIT_ID(__trunc__), \
INIT_ID(__type_params__), \
INIT_ID(__typing_is_unpacked_typevartuple__), \
INIT_ID(__typing_prepare_subst__), \

View file

@ -592,7 +592,7 @@ PyStackRef_XCLOSE(_PyStackRef ref)
// Note: this is a macro because MSVC (Windows) has trouble inlining it.
#define PyStackRef_Is(a, b) (((a).bits & (~Py_TAG_REFCNT)) == ((b).bits & (~Py_TAG_REFCNT)))
#define PyStackRef_Is(a, b) (((a).bits & (~Py_TAG_BITS)) == ((b).bits & (~Py_TAG_BITS)))
#endif // !defined(Py_GIL_DISABLED) && defined(Py_STACKREF_DEBUG)
@ -640,6 +640,28 @@ PyStackRef_FunctionCheck(_PyStackRef stackref)
return PyFunction_Check(PyStackRef_AsPyObjectBorrow(stackref));
}
static inline void
_PyThreadState_PushCStackRef(PyThreadState *tstate, _PyCStackRef *ref)
{
#ifdef Py_GIL_DISABLED
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
ref->next = tstate_impl->c_stack_refs;
tstate_impl->c_stack_refs = ref;
#endif
ref->ref = PyStackRef_NULL;
}
static inline void
_PyThreadState_PopCStackRef(PyThreadState *tstate, _PyCStackRef *ref)
{
#ifdef Py_GIL_DISABLED
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
assert(tstate_impl->c_stack_refs == ref);
tstate_impl->c_stack_refs = ref->next;
#endif
PyStackRef_XCLOSE(ref->ref);
}
#ifdef Py_GIL_DISABLED
static inline int
@ -656,6 +678,17 @@ _Py_TryIncrefCompareStackRef(PyObject **src, PyObject *op, _PyStackRef *out)
return 0;
}
static inline int
_Py_TryXGetStackRef(PyObject **src, _PyStackRef *out)
{
PyObject *op = _Py_atomic_load_ptr_relaxed(src);
if (op == NULL) {
*out = PyStackRef_NULL;
return 1;
}
return _Py_TryIncrefCompareStackRef(src, op, out);
}
#endif
// Like Py_VISIT but for _PyStackRef fields

View file

@ -65,6 +65,16 @@ typedef union _PyStackRef {
#endif
} _PyStackRef;
// A stackref that can be stored in a regular C local variable and be visible
// to the GC in the free threading build.
// Used in combination with _PyThreadState_PushCStackRef().
typedef struct _PyCStackRef {
_PyStackRef ref;
#ifdef Py_GIL_DISABLED
struct _PyCStackRef *next;
#endif
} _PyCStackRef;
#ifdef __cplusplus
}

View file

@ -47,8 +47,9 @@ typedef struct _PyThreadStateImpl {
struct _qsbr_thread_state *qsbr; // only used by free-threaded build
struct llist_node mem_free_queue; // delayed free queue
#ifdef Py_GIL_DISABLED
// Stack references for the current thread that exist on the C stack
struct _PyCStackRef *c_stack_refs;
struct _gc_thread_state gc;
struct _mimalloc_thread_state mimalloc;
struct _Py_freelists freelists;

View file

@ -144,6 +144,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(__ceil__);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(__class__);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
@ -240,6 +244,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(__floor__);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(__floordiv__);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
@ -660,6 +668,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(__trunc__);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(__type_params__);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));