gh-124218: Refactor per-thread reference counting (#124844)

Currently, we only use per-thread reference counting for heap type objects and
the naming reflects that. We will extend it to a few additional types in an
upcoming change to avoid scaling bottlenecks when creating nested functions.

Rename some of the files and functions in preparation for this change.
This commit is contained in:
Sam Gross 2024-10-01 13:05:42 -04:00 committed by GitHub
parent 5aa91c56bf
commit b482538523
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 168 additions and 167 deletions

View file

@ -15,7 +15,7 @@
#include "pycore_tstate.h" // _PyThreadStateImpl
#include "pycore_weakref.h" // _PyWeakref_ClearRef()
#include "pydtrace.h"
#include "pycore_typeid.h" // _PyType_MergeThreadLocalRefcounts
#include "pycore_uniqueid.h" // _PyType_MergeThreadLocalRefcounts
#ifdef Py_GIL_DISABLED
@ -217,12 +217,12 @@ disable_deferred_refcounting(PyObject *op)
merge_refcount(op, 0);
}
// Heap types also use thread-local refcounting -- disable it here.
// Heap types also use per-thread refcounting -- disable it here.
if (PyType_Check(op)) {
// Disable thread-local refcounting for heap types
PyTypeObject *type = (PyTypeObject *)op;
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
_PyType_ReleaseId((PyHeapTypeObject *)op);
if (PyType_HasFeature((PyTypeObject *)op, Py_TPFLAGS_HEAPTYPE)) {
PyHeapTypeObject *ht = (PyHeapTypeObject *)op;
_PyObject_ReleaseUniqueId(ht->unique_id);
ht->unique_id = -1;
}
}
@ -1221,7 +1221,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)p;
// merge per-thread refcount for types into the type's actual refcount
_PyType_MergeThreadLocalRefcounts(tstate);
_PyObject_MergePerThreadRefcounts(tstate);
// merge refcounts for all queued objects
merge_queued_objects(tstate, state);

View file

@ -28,7 +28,7 @@
#include "pycore_sliceobject.h" // _PySlice_Fini()
#include "pycore_sysmodule.h" // _PySys_ClearAuditHooks()
#include "pycore_traceback.h" // _Py_DumpTracebackThreads()
#include "pycore_typeid.h" // _PyType_FinalizeIdPool()
#include "pycore_uniqueid.h" // _PyType_FinalizeIdPool()
#include "pycore_typeobject.h" // _PyTypes_InitTypes()
#include "pycore_typevarobject.h" // _Py_clear_generic_types()
#include "pycore_unicodeobject.h" // _PyUnicode_InitTypes()
@ -1834,7 +1834,7 @@ finalize_interp_types(PyInterpreterState *interp)
_PyTypes_Fini(interp);
#ifdef Py_GIL_DISABLED
_PyType_FinalizeIdPool(interp);
_PyObject_FinalizeUniqueIdPool(interp);
#endif
_PyCode_Fini(interp);

View file

@ -20,7 +20,7 @@
#include "pycore_runtime_init.h" // _PyRuntimeState_INIT
#include "pycore_sysmodule.h" // _PySys_Audit()
#include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap()
#include "pycore_typeid.h" // _PyType_FinalizeThreadLocalRefcounts()
#include "pycore_uniqueid.h" // _PyType_FinalizeThreadLocalRefcounts()
/* --------------------------------------------------------------------------
CAUTION
@ -1745,7 +1745,7 @@ PyThreadState_Clear(PyThreadState *tstate)
// Merge our thread-local refcounts into the type's own refcount and
// free our local refcount array.
_PyType_FinalizeThreadLocalRefcounts((_PyThreadStateImpl *)tstate);
_PyObject_FinalizePerThreadRefcounts((_PyThreadStateImpl *)tstate);
// Remove ourself from the biased reference counting table of threads.
_Py_brc_remove_thread(tstate);
@ -1805,7 +1805,7 @@ tstate_delete_common(PyThreadState *tstate, int release_gil)
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
tstate->interp->object_state.reftotal += tstate_impl->reftotal;
tstate_impl->reftotal = 0;
assert(tstate_impl->types.refcounts == NULL);
assert(tstate_impl->refcounts.values == NULL);
#endif
HEAD_UNLOCK(runtime);

View file

@ -3,12 +3,14 @@
#include "pycore_lock.h" // PyMutex_LockFlags()
#include "pycore_pystate.h" // _PyThreadState_GET()
#include "pycore_object.h" // _Py_IncRefTotal
#include "pycore_typeid.h"
#include "pycore_uniqueid.h"
// This contains code for allocating unique ids to heap type objects
// and re-using those ids when the type is deallocated.
// This contains code for allocating unique ids for per-thread reference
// counting and re-using those ids when an object is deallocated.
//
// See Include/internal/pycore_typeid.h for more details.
// Currently, per-thread reference counting is only used for heap types.
//
// See Include/internal/pycore_uniqueid.h for more details.
#ifdef Py_GIL_DISABLED
@ -18,7 +20,7 @@
#define UNLOCK_POOL(pool) PyMutex_Unlock(&pool->mutex)
static int
resize_interp_type_id_pool(struct _Py_type_id_pool *pool)
resize_interp_type_id_pool(struct _Py_unique_id_pool *pool)
{
if ((size_t)pool->size > PY_SSIZE_T_MAX / (2 * sizeof(*pool->table))) {
return -1;
@ -29,8 +31,8 @@ resize_interp_type_id_pool(struct _Py_type_id_pool *pool)
new_size = POOL_MIN_SIZE;
}
_Py_type_id_entry *table = PyMem_Realloc(pool->table,
new_size * sizeof(*pool->table));
_Py_unique_id_entry *table = PyMem_Realloc(pool->table,
new_size * sizeof(*pool->table));
if (table == NULL) {
return -1;
}
@ -50,70 +52,67 @@ resize_interp_type_id_pool(struct _Py_type_id_pool *pool)
static int
resize_local_refcounts(_PyThreadStateImpl *tstate)
{
if (tstate->types.is_finalized) {
if (tstate->refcounts.is_finalized) {
return -1;
}
struct _Py_type_id_pool *pool = &tstate->base.interp->type_ids;
struct _Py_unique_id_pool *pool = &tstate->base.interp->unique_ids;
Py_ssize_t size = _Py_atomic_load_ssize(&pool->size);
Py_ssize_t *refcnts = PyMem_Realloc(tstate->types.refcounts,
Py_ssize_t *refcnts = PyMem_Realloc(tstate->refcounts.values,
size * sizeof(Py_ssize_t));
if (refcnts == NULL) {
return -1;
}
Py_ssize_t old_size = tstate->types.size;
Py_ssize_t old_size = tstate->refcounts.size;
if (old_size < size) {
memset(refcnts + old_size, 0, (size - old_size) * sizeof(Py_ssize_t));
}
tstate->types.refcounts = refcnts;
tstate->types.size = size;
tstate->refcounts.values = refcnts;
tstate->refcounts.size = size;
return 0;
}
void
_PyType_AssignId(PyHeapTypeObject *type)
Py_ssize_t
_PyObject_AssignUniqueId(PyObject *obj)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
struct _Py_type_id_pool *pool = &interp->type_ids;
struct _Py_unique_id_pool *pool = &interp->unique_ids;
LOCK_POOL(pool);
if (pool->freelist == NULL) {
if (resize_interp_type_id_pool(pool) < 0) {
type->unique_id = -1;
UNLOCK_POOL(pool);
return;
return -1;
}
}
_Py_type_id_entry *entry = pool->freelist;
_Py_unique_id_entry *entry = pool->freelist;
pool->freelist = entry->next;
entry->type = type;
_PyObject_SetDeferredRefcount((PyObject *)type);
type->unique_id = (entry - pool->table);
entry->obj = obj;
_PyObject_SetDeferredRefcount(obj);
Py_ssize_t unique_id = (entry - pool->table);
UNLOCK_POOL(pool);
return unique_id;
}
void
_PyType_ReleaseId(PyHeapTypeObject *type)
_PyObject_ReleaseUniqueId(Py_ssize_t unique_id)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
struct _Py_type_id_pool *pool = &interp->type_ids;
struct _Py_unique_id_pool *pool = &interp->unique_ids;
if (type->unique_id < 0) {
// The type doesn't have an id assigned.
if (unique_id < 0) {
// The id is not assigned
return;
}
LOCK_POOL(pool);
_Py_type_id_entry *entry = &pool->table[type->unique_id];
assert(entry->type == type);
_Py_unique_id_entry *entry = &pool->table[unique_id];
entry->next = pool->freelist;
pool->freelist = entry;
type->unique_id = -1;
UNLOCK_POOL(pool);
}
@ -127,8 +126,8 @@ _PyType_IncrefSlow(PyHeapTypeObject *type)
return;
}
assert(type->unique_id < tstate->types.size);
tstate->types.refcounts[type->unique_id]++;
assert(type->unique_id < tstate->refcounts.size);
tstate->refcounts.values[type->unique_id]++;
#ifdef Py_REF_DEBUG
_Py_IncRefTotal((PyThreadState *)tstate);
#endif
@ -136,59 +135,64 @@ _PyType_IncrefSlow(PyHeapTypeObject *type)
}
void
_PyType_MergeThreadLocalRefcounts(_PyThreadStateImpl *tstate)
_PyObject_MergePerThreadRefcounts(_PyThreadStateImpl *tstate)
{
if (tstate->types.refcounts == NULL) {
if (tstate->refcounts.values == NULL) {
return;
}
struct _Py_type_id_pool *pool = &tstate->base.interp->type_ids;
struct _Py_unique_id_pool *pool = &tstate->base.interp->unique_ids;
LOCK_POOL(pool);
for (Py_ssize_t i = 0, n = tstate->types.size; i < n; i++) {
Py_ssize_t refcnt = tstate->types.refcounts[i];
for (Py_ssize_t i = 0, n = tstate->refcounts.size; i < n; i++) {
Py_ssize_t refcnt = tstate->refcounts.values[i];
if (refcnt != 0) {
PyObject *type = (PyObject *)pool->table[i].type;
assert(PyType_Check(type));
_Py_atomic_add_ssize(&type->ob_ref_shared,
PyObject *obj = pool->table[i].obj;
_Py_atomic_add_ssize(&obj->ob_ref_shared,
refcnt << _Py_REF_SHARED_SHIFT);
tstate->types.refcounts[i] = 0;
tstate->refcounts.values[i] = 0;
}
}
UNLOCK_POOL(pool);
}
void
_PyType_FinalizeThreadLocalRefcounts(_PyThreadStateImpl *tstate)
_PyObject_FinalizePerThreadRefcounts(_PyThreadStateImpl *tstate)
{
_PyType_MergeThreadLocalRefcounts(tstate);
_PyObject_MergePerThreadRefcounts(tstate);
PyMem_Free(tstate->types.refcounts);
tstate->types.refcounts = NULL;
tstate->types.size = 0;
tstate->types.is_finalized = 1;
PyMem_Free(tstate->refcounts.values);
tstate->refcounts.values = NULL;
tstate->refcounts.size = 0;
tstate->refcounts.is_finalized = 1;
}
void
_PyType_FinalizeIdPool(PyInterpreterState *interp)
_PyObject_FinalizeUniqueIdPool(PyInterpreterState *interp)
{
struct _Py_type_id_pool *pool = &interp->type_ids;
struct _Py_unique_id_pool *pool = &interp->unique_ids;
// First, set the free-list to NULL values
while (pool->freelist) {
_Py_type_id_entry *next = pool->freelist->next;
pool->freelist->type = NULL;
_Py_unique_id_entry *next = pool->freelist->next;
pool->freelist->obj = NULL;
pool->freelist = next;
}
// Now everything non-NULL is a type. Set the type's id to -1 in case it
// outlives the interpreter.
for (Py_ssize_t i = 0; i < pool->size; i++) {
PyHeapTypeObject *ht = pool->table[i].type;
if (ht) {
ht->unique_id = -1;
pool->table[i].type = NULL;
PyObject *obj = pool->table[i].obj;
pool->table[i].obj = NULL;
if (obj == NULL) {
continue;
}
if (PyType_Check(obj)) {
assert(PyType_HasFeature((PyTypeObject *)obj, Py_TPFLAGS_HEAPTYPE));
((PyHeapTypeObject *)obj)->unique_id = -1;
}
else {
Py_UNREACHABLE();
}
}
PyMem_Free(pool->table);