gh-128807: Add marking phase for free-threaded cyclic GC (gh-128808)

This commit is contained in:
Neil Schemenauer 2025-01-15 11:27:28 -08:00 committed by GitHub
parent 8d8b854824
commit 080f444a58
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 336 additions and 22 deletions

View file

@ -17,6 +17,17 @@
#include "pydtrace.h"
#include "pycore_uniqueid.h" // _PyObject_MergeThreadLocalRefcounts()
// enable the "mark alive" pass of GC
#define GC_ENABLE_MARK_ALIVE 1
// include additional roots in "mark alive" pass
#define GC_MARK_ALIVE_EXTRA_ROOTS 1
// include Python stacks as set of known roots
#define GC_MARK_ALIVE_STACKS 1
#ifdef Py_GIL_DISABLED
typedef struct _gc_runtime_state GCState;
@ -113,28 +124,66 @@ worklist_remove(struct worklist_iter *iter)
iter->next = iter->ptr;
}
static inline int
gc_has_bit(PyObject *op, uint8_t bit)
{
return (op->ob_gc_bits & bit) != 0;
}
static inline void
gc_set_bit(PyObject *op, uint8_t bit)
{
op->ob_gc_bits |= bit;
}
static inline void
gc_clear_bit(PyObject *op, uint8_t bit)
{
op->ob_gc_bits &= ~bit;
}
static inline int
gc_is_frozen(PyObject *op)
{
return (op->ob_gc_bits & _PyGC_BITS_FROZEN) != 0;
return gc_has_bit(op, _PyGC_BITS_FROZEN);
}
static inline int
gc_is_unreachable(PyObject *op)
{
return (op->ob_gc_bits & _PyGC_BITS_UNREACHABLE) != 0;
return gc_has_bit(op, _PyGC_BITS_UNREACHABLE);
}
static void
static inline void
gc_set_unreachable(PyObject *op)
{
op->ob_gc_bits |= _PyGC_BITS_UNREACHABLE;
gc_set_bit(op, _PyGC_BITS_UNREACHABLE);
}
static void
static inline void
gc_clear_unreachable(PyObject *op)
{
op->ob_gc_bits &= ~_PyGC_BITS_UNREACHABLE;
gc_clear_bit(op, _PyGC_BITS_UNREACHABLE);
}
static inline int
gc_is_alive(PyObject *op)
{
return gc_has_bit(op, _PyGC_BITS_ALIVE);
}
#ifdef GC_ENABLE_MARK_ALIVE
static inline void
gc_set_alive(PyObject *op)
{
gc_set_bit(op, _PyGC_BITS_ALIVE);
}
#endif
static inline void
gc_clear_alive(PyObject *op)
{
gc_clear_bit(op, _PyGC_BITS_ALIVE);
}
// Initialize the `ob_tid` field to zero if the object is not already
@ -143,6 +192,7 @@ static void
gc_maybe_init_refs(PyObject *op)
{
if (!gc_is_unreachable(op)) {
assert(!gc_is_alive(op));
gc_set_unreachable(op);
op->ob_tid = 0;
}
@ -264,9 +314,13 @@ static void
gc_restore_refs(PyObject *op)
{
if (gc_is_unreachable(op)) {
assert(!gc_is_alive(op));
gc_restore_tid(op);
gc_clear_unreachable(op);
}
else {
gc_clear_alive(op);
}
}
// Given a mimalloc memory block return the PyObject stored in it or NULL if
@ -392,6 +446,119 @@ gc_visit_thread_stacks(PyInterpreterState *interp)
_Py_FOR_EACH_TSTATE_END(interp);
}
// Untrack objects that can never create reference cycles.
// Return true if the object was untracked.
static bool
gc_maybe_untrack(PyObject *op)
{
// Currently we only check for tuples containing only non-GC objects. In
// theory we could check other immutable objects that contain references
// to non-GC objects.
if (PyTuple_CheckExact(op)) {
_PyTuple_MaybeUntrack(op);
if (!_PyObject_GC_IS_TRACKED(op)) {
return true;
}
}
return false;
}
#ifdef GC_ENABLE_MARK_ALIVE
static int
mark_alive_stack_push(PyObject *op, _PyObjectStack *stack)
{
if (op == NULL) {
return 0;
}
if (!_PyObject_GC_IS_TRACKED(op)) {
return 0;
}
if (gc_is_alive(op)) {
return 0; // already visited this object
}
if (gc_maybe_untrack(op)) {
return 0; // was untracked, don't visit it
}
// Need to call tp_traverse on this object. Add to stack and mark it
// alive so we don't traverse it a second time.
gc_set_alive(op);
if (_PyObjectStack_Push(stack, op) < 0) {
return -1;
}
return 0;
}
static bool
gc_clear_alive_bits(const mi_heap_t *heap, const mi_heap_area_t *area,
void *block, size_t block_size, void *args)
{
PyObject *op = op_from_block(block, args, false);
if (op == NULL) {
return true;
}
if (gc_is_alive(op)) {
gc_clear_alive(op);
}
return true;
}
static void
gc_abort_mark_alive(PyInterpreterState *interp,
struct collection_state *state,
_PyObjectStack *stack)
{
// We failed to allocate memory for "stack" while doing the "mark
// alive" phase. In that case, free the object stack and make sure
// that no objects have the alive bit set.
_PyObjectStack_Clear(stack);
gc_visit_heaps(interp, &gc_clear_alive_bits, &state->base);
}
#ifdef GC_MARK_ALIVE_STACKS
static int
gc_visit_stackref_mark_alive(_PyObjectStack *stack, _PyStackRef stackref)
{
// Note: we MUST check that it is deferred before checking the rest.
// Otherwise we might read into invalid memory due to non-deferred references
// being dead already.
if (PyStackRef_IsDeferred(stackref) && !PyStackRef_IsNull(stackref)) {
PyObject *op = PyStackRef_AsPyObjectBorrow(stackref);
if (mark_alive_stack_push(op, stack) < 0) {
return -1;
}
}
return 0;
}
static int
gc_visit_thread_stacks_mark_alive(PyInterpreterState *interp, _PyObjectStack *stack)
{
_Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
for (_PyInterpreterFrame *f = p->current_frame; f != NULL; f = f->previous) {
PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable);
if (executable == NULL || !PyCode_Check(executable)) {
continue;
}
PyCodeObject *co = (PyCodeObject *)executable;
int max_stack = co->co_nlocalsplus + co->co_stacksize;
if (gc_visit_stackref_mark_alive(stack, f->f_executable) < 0) {
return -1;
}
for (int i = 0; i < max_stack; i++) {
if (gc_visit_stackref_mark_alive(stack, f->localsplus[i]) < 0) {
return -1;
}
}
}
}
_Py_FOR_EACH_TSTATE_END(interp);
return 0;
}
#endif // GC_MARK_ALIVE_STACKS
#endif // GC_ENABLE_MARK_ALIVE
static void
queue_untracked_obj_decref(PyObject *op, struct collection_state *state)
{
@ -460,7 +627,8 @@ visit_decref(PyObject *op, void *arg)
{
if (_PyObject_GC_IS_TRACKED(op)
&& !_Py_IsImmortal(op)
&& !gc_is_frozen(op))
&& !gc_is_frozen(op)
&& !gc_is_alive(op))
{
// If update_refs hasn't reached this object yet, mark it
// as (tentatively) unreachable and initialize ob_tid to zero.
@ -482,6 +650,10 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
return true;
}
if (gc_is_alive(op)) {
return true;
}
// Exclude immortal objects from garbage collection
if (_Py_IsImmortal(op)) {
op->ob_tid = 0;
@ -497,14 +669,9 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
_PyObject_ASSERT(op, refcount >= 0);
if (refcount > 0 && !_PyObject_HasDeferredRefcount(op)) {
// Untrack tuples and dicts as necessary in this pass, but not objects
// with zero refcount, which we will want to collect.
if (PyTuple_CheckExact(op)) {
_PyTuple_MaybeUntrack(op);
if (!_PyObject_GC_IS_TRACKED(op)) {
gc_restore_refs(op);
return true;
}
if (gc_maybe_untrack(op)) {
gc_restore_refs(op);
return true;
}
}
@ -553,6 +720,21 @@ mark_reachable(PyObject *op)
}
#ifdef GC_DEBUG
static bool
validate_alive_bits(const mi_heap_t *heap, const mi_heap_area_t *area,
void *block, size_t block_size, void *args)
{
PyObject *op = op_from_block(block, args, false);
if (op == NULL) {
return true;
}
_PyObject_ASSERT_WITH_MSG(op, !gc_is_alive(op),
"object should not be marked as alive yet");
return true;
}
static bool
validate_refcounts(const mi_heap_t *heap, const mi_heap_area_t *area,
void *block, size_t block_size, void *args)
@ -586,6 +768,11 @@ validate_gc_objects(const mi_heap_t *heap, const mi_heap_area_t *area,
return true;
}
if (gc_is_alive(op)) {
_PyObject_ASSERT(op, !gc_is_unreachable(op));
return true;
}
_PyObject_ASSERT(op, gc_is_unreachable(op));
_PyObject_ASSERT_WITH_MSG(op, gc_get_refs(op) >= 0,
"refcount is too small");
@ -602,6 +789,10 @@ mark_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
return true;
}
if (gc_is_alive(op)) {
return true;
}
_PyObject_ASSERT_WITH_MSG(op, gc_get_refs(op) >= 0,
"refcount is too small");
@ -630,6 +821,7 @@ restore_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
}
gc_restore_tid(op);
gc_clear_unreachable(op);
gc_clear_alive(op);
return true;
}
@ -679,6 +871,7 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
// object is reachable, restore `ob_tid`; we're done with these objects
gc_restore_tid(op);
gc_clear_alive(op);
state->long_lived_total++;
return true;
}
@ -686,6 +879,89 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
static int
move_legacy_finalizer_reachable(struct collection_state *state);
#ifdef GC_ENABLE_MARK_ALIVE
static int
propagate_alive_bits(_PyObjectStack *stack)
{
for (;;) {
PyObject *op = _PyObjectStack_Pop(stack);
if (op == NULL) {
break;
}
assert(_PyObject_GC_IS_TRACKED(op));
assert(gc_is_alive(op));
traverseproc traverse = Py_TYPE(op)->tp_traverse;
if (traverse(op, (visitproc)&mark_alive_stack_push, stack) < 0) {
return -1;
}
}
return 0;
}
// Using tp_traverse, mark everything reachable from known root objects
// (which must be non-garbage) as alive (_PyGC_BITS_ALIVE is set). In
// most programs, this marks nearly all objects that are not actually
// unreachable.
//
// Actually alive objects can be missed in this pass if they are alive
// due to being referenced from an unknown root (e.g. an extension
// module global), some tp_traverse methods are either missing or not
// accurate, or objects that have been untracked. Objects that are only
// reachable from the aforementioned are also missed.
//
// If gc.freeze() is used, this pass is disabled since it is unlikely to
// help much. The next stages of cyclic GC will ignore objects with the
// alive bit set.
//
// Returns -1 on failure (out of memory).
static int
mark_alive_from_roots(PyInterpreterState *interp,
struct collection_state *state)
{
#ifdef GC_DEBUG
// Check that all objects don't have alive bit set
gc_visit_heaps(interp, &validate_alive_bits, &state->base);
#endif
_PyObjectStack stack = { NULL };
#define STACK_PUSH(op) \
if (mark_alive_stack_push(op, &stack) < 0) { \
gc_abort_mark_alive(interp, state, &stack); \
return -1; \
}
STACK_PUSH(interp->sysdict);
#ifdef GC_MARK_ALIVE_EXTRA_ROOTS
STACK_PUSH(interp->builtins);
STACK_PUSH(interp->dict);
struct types_state *types = &interp->types;
for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) {
STACK_PUSH(types->builtins.initialized[i].tp_dict);
STACK_PUSH(types->builtins.initialized[i].tp_subclasses);
}
for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) {
STACK_PUSH(types->for_extensions.initialized[i].tp_dict);
STACK_PUSH(types->for_extensions.initialized[i].tp_subclasses);
}
#endif
#ifdef GC_MARK_ALIVE_STACKS
if (gc_visit_thread_stacks_mark_alive(interp, &stack) < 0) {
gc_abort_mark_alive(interp, state, &stack);
return -1;
}
#endif
#undef STACK_PUSH
// Use tp_traverse to find everything reachable from roots.
if (propagate_alive_bits(&stack) < 0) {
gc_abort_mark_alive(interp, state, &stack);
return -1;
}
return 0;
}
#endif // GC_ENABLE_MARK_ALIVE
static int
deduce_unreachable_heap(PyInterpreterState *interp,
struct collection_state *state)
@ -1245,6 +1521,25 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
process_delayed_frees(interp, state);
#ifdef GC_ENABLE_MARK_ALIVE
// If gc.freeze() was used, it seems likely that doing this "mark alive"
// pass will not be a performance win. Typically the majority of alive
// objects will be marked as frozen and will be skipped anyhow, without
// doing this extra work. Doing this pass also defeats one of the
// purposes of using freeze: avoiding writes to objects that are frozen.
// So, we just skip this if gc.freeze() was used.
if (!state->gcstate->freeze_active) {
// Mark objects reachable from known roots as "alive". These will
// be ignored for rest of the GC pass.
int err = mark_alive_from_roots(interp, state);
if (err < 0) {
_PyEval_StartTheWorld(interp);
PyErr_NoMemory();
return;
}
}
#endif
// Find unreachable objects
int err = deduce_unreachable_heap(interp, state);
if (err < 0) {
@ -1253,6 +1548,11 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
return;
}
#ifdef GC_DEBUG
// At this point, no object should have the alive bit set
gc_visit_heaps(interp, &validate_alive_bits, &state->base);
#endif
// Print debugging information.
if (interp->gc.debug & _PyGC_DEBUG_COLLECTABLE) {
PyObject *op;
@ -1564,6 +1864,8 @@ _PyGC_Freeze(PyInterpreterState *interp)
{
struct visitor_args args;
_PyEval_StopTheWorld(interp);
GCState *gcstate = get_gc_state();
gcstate->freeze_active = true;
gc_visit_heaps(interp, &visit_freeze, &args);
_PyEval_StartTheWorld(interp);
}
@ -1574,7 +1876,7 @@ visit_unfreeze(const mi_heap_t *heap, const mi_heap_area_t *area,
{
PyObject *op = op_from_block(block, args, true);
if (op != NULL) {
op->ob_gc_bits &= ~_PyGC_BITS_FROZEN;
gc_clear_bit(op, _PyGC_BITS_FROZEN);
}
return true;
}
@ -1584,6 +1886,8 @@ _PyGC_Unfreeze(PyInterpreterState *interp)
{
struct visitor_args args;
_PyEval_StopTheWorld(interp);
GCState *gcstate = get_gc_state();
gcstate->freeze_active = false;
gc_visit_heaps(interp, &visit_unfreeze, &args);
_PyEval_StartTheWorld(interp);
}