GH-126491: GC: Mark objects reachable from roots before doing cycle collection (GH-127110)

* Mark almost all reachable objects before doing collection phase

* Add stats for objects marked

* Visit new frames before each increment

* Update docs

* Clearer calculation of work to do.
This commit is contained in:
Mark Shannon 2024-12-02 10:12:17 +00:00 committed by GitHub
parent 2a373da770
commit a8dd821d5b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 365 additions and 113 deletions

View file

@ -818,6 +818,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
entry_frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1;
entry_frame.stackpointer = entry_frame.localsplus;
entry_frame.owner = FRAME_OWNED_BY_CSTACK;
entry_frame.visited = 0;
entry_frame.return_offset = 0;
/* Push frame */
entry_frame.previous = tstate->current_frame;

View file

@ -106,7 +106,7 @@ gc_old_space(PyGC_Head *g)
}
static inline int
flip_old_space(int space)
other_space(int space)
{
assert(space == 0 || space == 1);
return space ^ _PyGC_NEXT_MASK_OLD_SPACE_1;
@ -430,38 +430,7 @@ validate_list(PyGC_Head *head, enum flagstates flags)
#endif
#ifdef GC_EXTRA_DEBUG
static void
validate_old(GCState *gcstate)
{
for (int space = 0; space < 2; space++) {
PyGC_Head *head = &gcstate->old[space].head;
PyGC_Head *gc = GC_NEXT(head);
while (gc != head) {
PyGC_Head *next = GC_NEXT(gc);
assert(gc_old_space(gc) == space);
gc = next;
}
}
}
static void
validate_consistent_old_space(PyGC_Head *head)
{
PyGC_Head *prev = head;
PyGC_Head *gc = GC_NEXT(head);
if (gc == head) {
return;
}
int old_space = gc_old_space(gc);
while (gc != head) {
PyGC_Head *truenext = GC_NEXT(gc);
assert(truenext != NULL);
assert(gc_old_space(gc) == old_space);
prev = gc;
gc = truenext;
}
assert(prev == GC_PREV(head));
}
static void
gc_list_validate_space(PyGC_Head *head, int space) {
@ -472,8 +441,37 @@ gc_list_validate_space(PyGC_Head *head, int space) {
}
}
static void
validate_spaces(GCState *gcstate)
{
int visited = gcstate->visited_space;
int not_visited = other_space(visited);
gc_list_validate_space(&gcstate->young.head, not_visited);
for (int space = 0; space < 2; space++) {
gc_list_validate_space(&gcstate->old[space].head, space);
}
gc_list_validate_space(&gcstate->permanent_generation.head, visited);
}
static void
validate_consistent_old_space(PyGC_Head *head)
{
PyGC_Head *gc = GC_NEXT(head);
if (gc == head) {
return;
}
int old_space = gc_old_space(gc);
while (gc != head) {
PyGC_Head *truenext = GC_NEXT(gc);
assert(truenext != NULL);
assert(gc_old_space(gc) == old_space);
gc = truenext;
}
}
#else
#define validate_old(g) do{}while(0)
#define validate_spaces(g) do{}while(0)
#define validate_consistent_old_space(l) do{}while(0)
#define gc_list_validate_space(l, s) do{}while(0)
#endif
@ -494,7 +492,7 @@ update_refs(PyGC_Head *containers)
next = GC_NEXT(gc);
PyObject *op = FROM_GC(gc);
if (_Py_IsImmortal(op)) {
gc_list_move(gc, &get_gc_state()->permanent_generation.head);
_PyObject_GC_UNTRACK(op);
gc = next;
continue;
}
@ -733,13 +731,25 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
unreachable->_gc_next &= _PyGC_PREV_MASK;
}
/* In theory, all tuples should be younger than the
* objects they refer to, as tuples are immortal.
* Therefore, untracking tuples in oldest-first order in the
* young generation before promoting them should have tracked
* all the tuples that can be untracked.
*
* Unfortunately, the C API allows tuples to be created
* and then filled in. So this won't untrack all tuples
* that can be untracked. It should untrack most of them
* and is much faster than a more complex approach that
* would untrack all relevant tuples.
*/
static void
untrack_tuples(PyGC_Head *head)
{
PyGC_Head *next, *gc = GC_NEXT(head);
PyGC_Head *gc = GC_NEXT(head);
while (gc != head) {
PyObject *op = FROM_GC(gc);
next = GC_NEXT(gc);
PyGC_Head *next = GC_NEXT(gc);
if (PyTuple_CheckExact(op)) {
_PyTuple_MaybeUntrack(op);
}
@ -1293,8 +1303,10 @@ gc_collect_young(PyThreadState *tstate,
struct gc_collection_stats *stats)
{
GCState *gcstate = &tstate->interp->gc;
validate_spaces(gcstate);
PyGC_Head *young = &gcstate->young.head;
PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
untrack_tuples(young);
GC_STAT_ADD(0, collections, 1);
#ifdef Py_STATS
{
@ -1308,39 +1320,21 @@ gc_collect_young(PyThreadState *tstate,
PyGC_Head survivors;
gc_list_init(&survivors);
gc_list_set_space(young, gcstate->visited_space);
gc_collect_region(tstate, young, &survivors, stats);
Py_ssize_t survivor_count = 0;
if (gcstate->visited_space) {
/* objects in visited space have bit set, so we set it here */
survivor_count = gc_list_set_space(&survivors, 1);
}
else {
PyGC_Head *gc;
for (gc = GC_NEXT(&survivors); gc != &survivors; gc = GC_NEXT(gc)) {
#ifdef GC_DEBUG
assert(gc_old_space(gc) == 0);
#endif
survivor_count++;
}
}
(void)survivor_count; // Silence compiler warning
gc_list_merge(&survivors, visited);
validate_old(gcstate);
validate_spaces(gcstate);
gcstate->young.count = 0;
gcstate->old[gcstate->visited_space].count++;
Py_ssize_t scale_factor = gcstate->old[0].threshold;
if (scale_factor < 1) {
scale_factor = 1;
}
gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
add_stats(gcstate, 0, stats);
validate_spaces(gcstate);
}
#ifndef NDEBUG
static inline int
IS_IN_VISITED(PyGC_Head *gc, int visited_space)
{
assert(visited_space == 0 || flip_old_space(visited_space) == 0);
assert(visited_space == 0 || other_space(visited_space) == 0);
return gc_old_space(gc) == visited_space;
}
#endif
@ -1348,7 +1342,7 @@ IS_IN_VISITED(PyGC_Head *gc, int visited_space)
struct container_and_flag {
PyGC_Head *container;
int visited_space;
uintptr_t size;
intptr_t size;
};
/* A traversal callback for adding to container) */
@ -1371,7 +1365,7 @@ visit_add_to_container(PyObject *op, void *arg)
return 0;
}
static uintptr_t
static intptr_t
expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate)
{
struct container_and_flag arg = {
@ -1385,6 +1379,7 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat
* have been marked as visited */
assert(IS_IN_VISITED(gc, gcstate->visited_space));
PyObject *op = FROM_GC(gc);
assert(_PyObject_GC_IS_TRACKED(op));
if (_Py_IsImmortal(op)) {
PyGC_Head *next = GC_NEXT(gc);
gc_list_move(gc, &get_gc_state()->permanent_generation.head);
@ -1402,22 +1397,191 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat
/* Do bookkeeping for a completed GC cycle */
static void
completed_cycle(GCState *gcstate)
completed_scavenge(GCState *gcstate)
{
#ifdef Py_DEBUG
PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
assert(gc_list_is_empty(not_visited));
#endif
gcstate->visited_space = flip_old_space(gcstate->visited_space);
/* Make sure all young objects have old space bit set correctly */
PyGC_Head *young = &gcstate->young.head;
PyGC_Head *gc = GC_NEXT(young);
while (gc != young) {
PyGC_Head *next = GC_NEXT(gc);
gc_set_old_space(gc, gcstate->visited_space);
gc = next;
/* We must observe two invariants:
* 1. Members of the permanent generation must be marked visited.
* 2. We cannot touch members of the permanent generation. */
int visited;
if (gc_list_is_empty(&gcstate->permanent_generation.head)) {
/* Permanent generation is empty so we can flip spaces bit */
int not_visited = gcstate->visited_space;
visited = other_space(not_visited);
gcstate->visited_space = visited;
/* Make sure all objects have visited bit set correctly */
gc_list_set_space(&gcstate->young.head, not_visited);
}
else {
/* We must move the objects from visited to pending space. */
visited = gcstate->visited_space;
int not_visited = other_space(visited);
assert(gc_list_is_empty(&gcstate->old[not_visited].head));
gc_list_merge(&gcstate->old[visited].head, &gcstate->old[not_visited].head);
gc_list_set_space(&gcstate->old[not_visited].head, not_visited);
}
assert(gc_list_is_empty(&gcstate->old[visited].head));
gcstate->work_to_do = 0;
gcstate->phase = GC_PHASE_MARK;
}
static intptr_t
move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space)
{
if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
PyGC_Head *gc = AS_GC(op);
if (_PyObject_GC_IS_TRACKED(op) &&
gc_old_space(gc) != visited_space) {
gc_flip_old_space(gc);
gc_list_move(gc, reachable);
return 1;
}
}
return 0;
}
static intptr_t
mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space)
{
// Transitively traverse all objects from reachable, until empty
struct container_and_flag arg = {
.container = reachable,
.visited_space = visited_space,
.size = 0
};
while (!gc_list_is_empty(reachable)) {
PyGC_Head *gc = _PyGCHead_NEXT(reachable);
assert(gc_old_space(gc) == visited_space);
gc_list_move(gc, visited);
PyObject *op = FROM_GC(gc);
traverseproc traverse = Py_TYPE(op)->tp_traverse;
(void) traverse(op,
visit_add_to_container,
&arg);
}
gc_list_validate_space(visited, visited_space);
return arg.size;
}
static intptr_t
mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start)
{
PyGC_Head reachable;
gc_list_init(&reachable);
Py_ssize_t objects_marked = 0;
// Move all objects on stacks to reachable
_PyRuntimeState *runtime = &_PyRuntime;
HEAD_LOCK(runtime);
PyThreadState* ts = PyInterpreterState_ThreadHead(interp);
HEAD_UNLOCK(runtime);
while (ts) {
_PyInterpreterFrame *frame = ts->current_frame;
while (frame) {
if (frame->owner == FRAME_OWNED_BY_CSTACK) {
frame = frame->previous;
continue;
}
_PyStackRef *locals = frame->localsplus;
_PyStackRef *sp = frame->stackpointer;
objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space);
PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
objects_marked += move_to_reachable(func, &reachable, visited_space);
while (sp > locals) {
sp--;
if (PyStackRef_IsNull(*sp)) {
continue;
}
PyObject *op = PyStackRef_AsPyObjectBorrow(*sp);
if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
PyGC_Head *gc = AS_GC(op);
if (_PyObject_GC_IS_TRACKED(op) &&
gc_old_space(gc) != visited_space) {
gc_flip_old_space(gc);
objects_marked++;
gc_list_move(gc, &reachable);
}
}
}
if (!start && frame->visited) {
// If this frame has already been visited, then the lower frames
// will have already been visited and will not have changed
break;
}
frame->visited = 1;
frame = frame->previous;
}
HEAD_LOCK(runtime);
ts = PyThreadState_Next(ts);
HEAD_UNLOCK(runtime);
}
objects_marked += mark_all_reachable(&reachable, visited, visited_space);
assert(gc_list_is_empty(&reachable));
return objects_marked;
}
static intptr_t
mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space)
{
PyGC_Head reachable;
gc_list_init(&reachable);
Py_ssize_t objects_marked = 0;
objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space);
objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space);
objects_marked += move_to_reachable(interp->dict, &reachable, visited_space);
struct types_state *types = &interp->types;
for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) {
objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space);
objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space);
}
for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) {
objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space);
objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space);
}
objects_marked += mark_all_reachable(&reachable, visited, visited_space);
assert(gc_list_is_empty(&reachable));
return objects_marked;
}
static intptr_t
mark_at_start(PyThreadState *tstate)
{
// TO DO -- Make this incremental
GCState *gcstate = &tstate->interp->gc;
PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space);
objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true);
gcstate->work_to_do -= objects_marked;
gcstate->phase = GC_PHASE_COLLECT;
validate_spaces(gcstate);
return objects_marked;
}
static intptr_t
assess_work_to_do(GCState *gcstate)
{
/* The amount of work we want to do depends on three things.
* 1. The number of new objects created
* 2. The growth in heap size since the last collection
* 3. The heap size (up to the number of new objects, to avoid quadratic effects)
*
* For a steady state heap, the amount of work to do is three times the number
* of new objects added to the heap. This ensures that we stay ahead in the
* worst case of all new objects being garbage.
*
* This could be improved by tracking survival rates, but it is still a
* large improvement on the non-marking approach.
*/
intptr_t scale_factor = gcstate->old[0].threshold;
if (scale_factor < 2) {
scale_factor = 2;
}
intptr_t new_objects = gcstate->young.count;
intptr_t max_heap_fraction = new_objects*3/2;
intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
if (heap_fraction > max_heap_fraction) {
heap_fraction = max_heap_fraction;
}
gcstate->young.count = 0;
return new_objects + heap_fraction;
}
static void
@ -1425,18 +1589,30 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
{
GC_STAT_ADD(1, collections, 1);
GCState *gcstate = &tstate->interp->gc;
gcstate->work_to_do += assess_work_to_do(gcstate);
untrack_tuples(&gcstate->young.head);
if (gcstate->phase == GC_PHASE_MARK) {
Py_ssize_t objects_marked = mark_at_start(tstate);
GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
gcstate->work_to_do -= objects_marked;
validate_spaces(gcstate);
return;
}
PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
PyGC_Head increment;
gc_list_init(&increment);
Py_ssize_t scale_factor = gcstate->old[0].threshold;
if (scale_factor < 1) {
scale_factor = 1;
int scale_factor = gcstate->old[0].threshold;
if (scale_factor < 2) {
scale_factor = 2;
}
intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false);
GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
gcstate->work_to_do -= objects_marked;
gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
gc_list_merge(&gcstate->young.head, &increment);
gcstate->young.count = 0;
gc_list_validate_space(&increment, gcstate->visited_space);
Py_ssize_t increment_size = 0;
Py_ssize_t increment_size = gc_list_size(&increment);
while (increment_size < gcstate->work_to_do) {
if (gc_list_is_empty(not_visited)) {
break;
@ -1444,54 +1620,56 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
PyGC_Head *gc = _PyGCHead_NEXT(not_visited);
gc_list_move(gc, &increment);
increment_size++;
assert(!_Py_IsImmortal(FROM_GC(gc)));
gc_set_old_space(gc, gcstate->visited_space);
increment_size += expand_region_transitively_reachable(&increment, gc, gcstate);
}
GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size);
validate_list(&increment, collecting_clear_unreachable_clear);
gc_list_validate_space(&increment, gcstate->visited_space);
PyGC_Head survivors;
gc_list_init(&survivors);
gc_collect_region(tstate, &increment, &survivors, stats);
gc_list_validate_space(&survivors, gcstate->visited_space);
gc_list_merge(&survivors, visited);
assert(gc_list_is_empty(&increment));
gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
gcstate->work_to_do -= increment_size;
validate_old(gcstate);
add_stats(gcstate, 1, stats);
if (gc_list_is_empty(not_visited)) {
completed_cycle(gcstate);
completed_scavenge(gcstate);
}
validate_spaces(gcstate);
}
static void
gc_collect_full(PyThreadState *tstate,
struct gc_collection_stats *stats)
{
GC_STAT_ADD(2, collections, 1);
GCState *gcstate = &tstate->interp->gc;
validate_old(gcstate);
validate_spaces(gcstate);
PyGC_Head *young = &gcstate->young.head;
PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head;
PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
untrack_tuples(young);
/* merge all generations into visited */
gc_list_validate_space(young, gcstate->visited_space);
gc_list_set_space(pending, gcstate->visited_space);
gc_list_merge(young, pending);
gc_list_validate_space(pending, 1-gcstate->visited_space);
gc_list_set_space(pending, gcstate->visited_space);
gcstate->young.count = 0;
gc_list_merge(pending, visited);
validate_spaces(gcstate);
gc_collect_region(tstate, visited, visited,
stats);
validate_spaces(gcstate);
gcstate->young.count = 0;
gcstate->old[0].count = 0;
gcstate->old[1].count = 0;
gcstate->work_to_do = - gcstate->young.threshold * 2;
completed_scavenge(gcstate);
_PyGC_ClearAllFreeLists(tstate->interp);
validate_old(gcstate);
validate_spaces(gcstate);
add_stats(gcstate, 2, stats);
}
@ -1733,20 +1911,23 @@ void
_PyGC_Freeze(PyInterpreterState *interp)
{
GCState *gcstate = &interp->gc;
/* The permanent_generation has its old space bit set to zero */
if (gcstate->visited_space) {
gc_list_set_space(&gcstate->young.head, 0);
}
/* The permanent_generation must be visited */
gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head);
gcstate->young.count = 0;
PyGC_Head*old0 = &gcstate->old[0].head;
PyGC_Head*old1 = &gcstate->old[1].head;
if (gcstate->visited_space) {
gc_list_set_space(old0, 1);
}
else {
gc_list_set_space(old1, 0);
}
gc_list_merge(old0, &gcstate->permanent_generation.head);
gcstate->old[0].count = 0;
gc_list_set_space(old1, 0);
gc_list_merge(old1, &gcstate->permanent_generation.head);
gcstate->old[1].count = 0;
validate_old(gcstate);
validate_spaces(gcstate);
}
void
@ -1754,8 +1935,8 @@ _PyGC_Unfreeze(PyInterpreterState *interp)
{
GCState *gcstate = &interp->gc;
gc_list_merge(&gcstate->permanent_generation.head,
&gcstate->old[0].head);
validate_old(gcstate);
&gcstate->old[gcstate->visited_space].head);
validate_spaces(gcstate);
}
Py_ssize_t
@ -1860,7 +2041,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
_Py_stats->object_stats.object_visits = 0;
}
#endif
validate_old(gcstate);
validate_spaces(gcstate);
_Py_atomic_store_int(&gcstate->collecting, 0);
return stats.uncollectable + stats.collected;
}

View file

@ -231,6 +231,8 @@ print_gc_stats(FILE *out, GCStats *stats)
fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections);
fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits);
fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected);
fprintf(out, "GC[%d] objects reachable from roots: %" PRIu64 "\n", i, stats[i].objects_transitively_reachable);
fprintf(out, "GC[%d] objects not reachable from roots: %" PRIu64 "\n", i, stats[i].objects_not_transitively_reachable);
}
}