gh-126491: Revert "GH-126491: Lower heap size limit with faster marking (GH-127519)" (GH-127770)

Revert "GH-126491: Lower heap size limit with faster marking (GH-127519)"

This reverts commit 023b7d2141, which introduced
a refleak.
This commit is contained in:
Petr Viktorin 2024-12-10 11:53:56 +01:00 committed by GitHub
parent ae31df354d
commit 690fe077f6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 248 additions and 214 deletions

View file

@ -199,22 +199,22 @@ unreachable:
```pycon ```pycon
>>> import gc >>> import gc
>>> >>>
>>> class Link: >>> class Link:
... def __init__(self, next_link=None): ... def __init__(self, next_link=None):
... self.next_link = next_link ... self.next_link = next_link
... ...
>>> link_3 = Link() >>> link_3 = Link()
>>> link_2 = Link(link_3) >>> link_2 = Link(link_3)
>>> link_1 = Link(link_2) >>> link_1 = Link(link_2)
>>> link_3.next_link = link_1 >>> link_3.next_link = link_1
>>> A = link_1 >>> A = link_1
>>> del link_1, link_2, link_3 >>> del link_1, link_2, link_3
>>> >>>
>>> link_4 = Link() >>> link_4 = Link()
>>> link_4.next_link = link_4 >>> link_4.next_link = link_4
>>> del link_4 >>> del link_4
>>> >>>
>>> # Collect the unreachable Link object (and its .__dict__ dict). >>> # Collect the unreachable Link object (and its .__dict__ dict).
>>> gc.collect() >>> gc.collect()
2 2
@ -459,11 +459,11 @@ specifically in a generation by calling `gc.collect(generation=NUM)`.
>>> # Create a reference cycle. >>> # Create a reference cycle.
>>> x = MyObj() >>> x = MyObj()
>>> x.self = x >>> x.self = x
>>> >>>
>>> # Initially the object is in the young generation. >>> # Initially the object is in the young generation.
>>> gc.get_objects(generation=0) >>> gc.get_objects(generation=0)
[..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...]
>>> >>>
>>> # After a collection of the youngest generation the object >>> # After a collection of the youngest generation the object
>>> # moves to the old generation. >>> # moves to the old generation.
>>> gc.collect(generation=0) >>> gc.collect(generation=0)
@ -515,45 +515,6 @@ increment. All objects directly referred to from those stack frames are
added to the working set. added to the working set.
Then the above algorithm is repeated, starting from step 2. Then the above algorithm is repeated, starting from step 2.
Determining how much work to do
-------------------------------
We need to do a certain amount of work to ensure that garbage is collected,
but doing too much work slows down execution.
To work out how much work we need to do, consider a heap with `L` live objects
and `G0` garbage objects at the start of a full scavenge and `G1` garbage objects
at the end of the scavenge. We don't want the amount of garbage to grow, `G1 ≤ G0`, and
we don't want too much garbage (say 1/3 of the heap maximum), `G0 ≤ L/2`.
For each full scavenge we must visit all objects, `T == L + G0 + G1`, during which
`G1` garbage objects are created.
The number of new objects created `N` must be at least the new garbage created, `N ≥ G1`,
assuming that the number of live objects remains roughly constant.
If we set `T == 4*N` we get `T > 4*G1` and `T = L + G0 + G1` => `L + G0 > 3G1`
For a steady state heap (`G0 == G1`) we get `L > 2G0` and the desired garbage ratio.
In other words, to keep the garbage fraction to 1/3 or less we need to visit
4 times as many objects as are newly created.
We can do better than this though. Not all new objects will be garbage.
Consider the heap at the end of the scavenge with `L1` live objects and `G1`
garbage. Also, note that `T == M + I` where `M` is the number of objects marked
as reachable and `I` is the number of objects visited in increments.
Everything in `M` is live, so `I ≥ G0` and in practice `I` is closer to `G0 + G1`.
If we choose the amount of work done such that `2*M + I == 6N` then we can do
less work in most cases, but are still guaranteed to keep up.
Since `I ≳ G0 + G1` (not strictly true, but close enough)
`T == M + I == (6N + I)/2` and `(6N + I)/2 ≳ 4G`, so we can keep up.
The reason that this improves performance is that `M` is usually much larger
than `I`. If `M == 10I`, then `T ≅ 3N`.
Finally, instead of using a fixed multiple of 8, we gradually increase it as the
heap grows. This avoids wasting work for small heaps and during startup.
Optimization: reusing fields to save memory Optimization: reusing fields to save memory
=========================================== ===========================================

View file

@ -1161,19 +1161,27 @@ class IncrementalGCTests(unittest.TestCase):
return head return head
head = make_ll(1000) head = make_ll(1000)
count = 1000
# There will be some objects we aren't counting,
# e.g. the gc stats dicts. This test checks
# that the counts don't grow, so we try to
# correct for the uncounted objects
# This is just an estimate.
CORRECTION = 20
enabled = gc.isenabled() enabled = gc.isenabled()
gc.enable() gc.enable()
olds = [] olds = []
initial_heap_size = _testinternalcapi.get_tracked_heap_size() initial_heap_size = _testinternalcapi.get_tracked_heap_size()
iterations = max(20_000, initial_heap_size) for i in range(20_000):
for i in range(iterations):
newhead = make_ll(20) newhead = make_ll(20)
count += 20
newhead.surprise = head newhead.surprise = head
olds.append(newhead) olds.append(newhead)
if len(olds) == 20: if len(olds) == 20:
new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size
self.assertLess(new_objects, initial_heap_size/2, f"Heap growing. Reached limit after {i} iterations") self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations")
del olds[:] del olds[:]
if not enabled: if not enabled:
gc.disable() gc.disable()

View file

@ -7064,7 +7064,9 @@ int
PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg) PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg)
{ {
PyTypeObject *tp = Py_TYPE(obj); PyTypeObject *tp = Py_TYPE(obj);
assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); if((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) {
return 0;
}
if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) { if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
PyDictValues *values = _PyObject_InlineValues(obj); PyDictValues *values = _PyObject_InlineValues(obj);
if (values->valid) { if (values->valid) {

View file

@ -882,7 +882,25 @@ PyTypeObject PyGen_Type = {
gen_methods, /* tp_methods */ gen_methods, /* tp_methods */
gen_memberlist, /* tp_members */ gen_memberlist, /* tp_members */
gen_getsetlist, /* tp_getset */ gen_getsetlist, /* tp_getset */
.tp_finalize = _PyGen_Finalize, 0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
0, /* tp_free */
0, /* tp_is_gc */
0, /* tp_bases */
0, /* tp_mro */
0, /* tp_cache */
0, /* tp_subclasses */
0, /* tp_weaklist */
0, /* tp_del */
0, /* tp_version_tag */
_PyGen_Finalize, /* tp_finalize */
}; };
static PyObject * static PyObject *
@ -1224,7 +1242,24 @@ PyTypeObject PyCoro_Type = {
coro_methods, /* tp_methods */ coro_methods, /* tp_methods */
coro_memberlist, /* tp_members */ coro_memberlist, /* tp_members */
coro_getsetlist, /* tp_getset */ coro_getsetlist, /* tp_getset */
.tp_finalize = _PyGen_Finalize, 0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
0, /* tp_free */
0, /* tp_is_gc */
0, /* tp_bases */
0, /* tp_mro */
0, /* tp_cache */
0, /* tp_subclasses */
0, /* tp_weaklist */
0, /* tp_del */
0, /* tp_version_tag */
_PyGen_Finalize, /* tp_finalize */
}; };
static void static void
@ -1429,6 +1464,7 @@ typedef struct _PyAsyncGenWrappedValue {
(assert(_PyAsyncGenWrappedValue_CheckExact(op)), \ (assert(_PyAsyncGenWrappedValue_CheckExact(op)), \
_Py_CAST(_PyAsyncGenWrappedValue*, (op))) _Py_CAST(_PyAsyncGenWrappedValue*, (op)))
static int static int
async_gen_traverse(PyObject *self, visitproc visit, void *arg) async_gen_traverse(PyObject *self, visitproc visit, void *arg)
{ {
@ -1637,7 +1673,24 @@ PyTypeObject PyAsyncGen_Type = {
async_gen_methods, /* tp_methods */ async_gen_methods, /* tp_methods */
async_gen_memberlist, /* tp_members */ async_gen_memberlist, /* tp_members */
async_gen_getsetlist, /* tp_getset */ async_gen_getsetlist, /* tp_getset */
.tp_finalize = _PyGen_Finalize, 0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
0, /* tp_free */
0, /* tp_is_gc */
0, /* tp_bases */
0, /* tp_mro */
0, /* tp_cache */
0, /* tp_subclasses */
0, /* tp_weaklist */
0, /* tp_del */
0, /* tp_version_tag */
_PyGen_Finalize, /* tp_finalize */
}; };
@ -1882,6 +1935,16 @@ PyTypeObject _PyAsyncGenASend_Type = {
PyObject_SelfIter, /* tp_iter */ PyObject_SelfIter, /* tp_iter */
async_gen_asend_iternext, /* tp_iternext */ async_gen_asend_iternext, /* tp_iternext */
async_gen_asend_methods, /* tp_methods */ async_gen_asend_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
.tp_finalize = async_gen_asend_finalize, .tp_finalize = async_gen_asend_finalize,
}; };

View file

@ -2355,16 +2355,6 @@ subtype_traverse(PyObject *self, visitproc visit, void *arg)
return 0; return 0;
} }
static int
plain_object_traverse(PyObject *self, visitproc visit, void *arg)
{
PyTypeObject *type = Py_TYPE(self);
assert(type->tp_flags & Py_TPFLAGS_MANAGED_DICT);
Py_VISIT(type);
return PyObject_VisitManagedDict(self, visit, arg);
}
static void static void
clear_slots(PyTypeObject *type, PyObject *self) clear_slots(PyTypeObject *type, PyObject *self)
{ {
@ -4157,9 +4147,6 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type)
assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0);
type->tp_flags |= Py_TPFLAGS_MANAGED_DICT; type->tp_flags |= Py_TPFLAGS_MANAGED_DICT;
type->tp_dictoffset = -1; type->tp_dictoffset = -1;
if (type->tp_basicsize == sizeof(PyObject)) {
type->tp_traverse = plain_object_traverse;
}
} }
type->tp_basicsize = slotoffset; type->tp_basicsize = slotoffset;

View file

@ -1277,13 +1277,18 @@ gc_list_set_space(PyGC_Head *list, int space)
* space faster than objects are added to the old space. * space faster than objects are added to the old space.
* *
* Each young or incremental collection adds a number of * Each young or incremental collection adds a number of
* new objects (N) to the heap, and incremental collectors * objects, S (for survivors) to the old space, and
* scan I objects from the old space. * incremental collectors scan I objects from the old space.
* I > N must be true. We also want I > N * K to be where * I > S must be true. We also want I > S * N to be where
* K > 2. Higher values of K mean that the old space is * N > 1. Higher values of N mean that the old space is
* scanned more rapidly. * scanned more rapidly.
* The default incremental threshold of 10 translates to
* N == 1.4 (1 + 4/threshold)
*/ */
#define SCAN_RATE_DIVISOR 5
/* Divide by 10, so that the default incremental threshold of 10
* scans objects at 1% of the heap size */
#define SCAN_RATE_DIVISOR 10
static void static void
add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats)
@ -1325,78 +1330,71 @@ gc_collect_young(PyThreadState *tstate,
validate_spaces(gcstate); validate_spaces(gcstate);
} }
typedef struct work_stack { #ifndef NDEBUG
PyGC_Head *top; static inline int
IS_IN_VISITED(PyGC_Head *gc, int visited_space)
{
assert(visited_space == 0 || other_space(visited_space) == 0);
return gc_old_space(gc) == visited_space;
}
#endif
struct container_and_flag {
PyGC_Head *container;
int visited_space; int visited_space;
} WorkStack; intptr_t size;
};
/* Remove gc from the list it is currently in and push it to the stack */ /* A traversal callback for adding to container) */
static inline void
push_to_stack(PyGC_Head *gc, WorkStack *stack)
{
PyGC_Head *prev = GC_PREV(gc);
PyGC_Head *next = GC_NEXT(gc);
_PyGCHead_SET_NEXT(prev, next);
_PyGCHead_SET_PREV(next, prev);
_PyGCHead_SET_PREV(gc, stack->top);
stack->top = gc;
}
static inline PyGC_Head *
pop_from_stack(WorkStack *stack)
{
PyGC_Head *gc = stack->top;
stack->top = _PyGCHead_PREV(gc);
return gc;
}
/* append list `from` to `stack`; `from` becomes an empty list */
static void
move_list_to_stack(PyGC_Head *from, WorkStack *stack)
{
if (!gc_list_is_empty(from)) {
PyGC_Head *from_head = GC_NEXT(from);
PyGC_Head *from_tail = GC_PREV(from);
_PyGCHead_SET_PREV(from_head, stack->top);
stack->top = from_tail;
gc_list_init(from);
}
}
static inline void
move_to_stack(PyObject *op, WorkStack *stack, int visited_space)
{
assert(op != NULL);
if (_PyObject_IS_GC(op)) {
PyGC_Head *gc = AS_GC(op);
if (_PyObject_GC_IS_TRACKED(op) &&
gc_old_space(gc) != visited_space) {
assert(!_Py_IsImmortal(op));
gc_flip_old_space(gc);
push_to_stack(gc, stack);
}
}
}
static void
move_unvisited(PyObject *op, WorkStack *stack, int visited_space)
{
move_to_stack(op, stack, visited_space);
}
#define MOVE_UNVISITED(O, T, V) if ((O) != NULL) move_unvisited((O), (T), (V))
/* A traversal callback for adding to container */
static int static int
visit_add_to_container(PyObject *op, void *arg) visit_add_to_container(PyObject *op, void *arg)
{ {
OBJECT_STAT_INC(object_visits); OBJECT_STAT_INC(object_visits);
WorkStack *stack = (WorkStack *)arg; struct container_and_flag *cf = (struct container_and_flag *)arg;
assert(stack->visited_space == get_gc_state()->visited_space); int visited = cf->visited_space;
move_to_stack(op, stack, stack->visited_space); assert(visited == get_gc_state()->visited_space);
if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
PyGC_Head *gc = AS_GC(op);
if (_PyObject_GC_IS_TRACKED(op) &&
gc_old_space(gc) != visited) {
gc_flip_old_space(gc);
gc_list_move(gc, cf->container);
cf->size++;
}
}
return 0; return 0;
} }
static intptr_t
expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate)
{
struct container_and_flag arg = {
.container = container,
.visited_space = gcstate->visited_space,
.size = 0
};
assert(GC_NEXT(gc) == container);
while (gc != container) {
/* Survivors will be moved to visited space, so they should
* have been marked as visited */
assert(IS_IN_VISITED(gc, gcstate->visited_space));
PyObject *op = FROM_GC(gc);
assert(_PyObject_GC_IS_TRACKED(op));
if (_Py_IsImmortal(op)) {
PyGC_Head *next = GC_NEXT(gc);
gc_list_move(gc, &get_gc_state()->permanent_generation.head);
gc = next;
continue;
}
traverseproc traverse = Py_TYPE(op)->tp_traverse;
(void) traverse(op,
visit_add_to_container,
&arg);
gc = GC_NEXT(gc);
}
return arg.size;
}
/* Do bookkeeping for a completed GC cycle */ /* Do bookkeeping for a completed GC cycle */
static void static void
completed_scavenge(GCState *gcstate) completed_scavenge(GCState *gcstate)
@ -1422,62 +1420,54 @@ completed_scavenge(GCState *gcstate)
gc_list_set_space(&gcstate->old[not_visited].head, not_visited); gc_list_set_space(&gcstate->old[not_visited].head, not_visited);
} }
assert(gc_list_is_empty(&gcstate->old[visited].head)); assert(gc_list_is_empty(&gcstate->old[visited].head));
gcstate->work_to_do = 0;
gcstate->phase = GC_PHASE_MARK; gcstate->phase = GC_PHASE_MARK;
} }
static void static intptr_t
frame_move_unvisited(_PyInterpreterFrame *frame, WorkStack *stack, int visited_space) move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space)
{ {
_PyStackRef *locals = frame->localsplus; if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
_PyStackRef *sp = frame->stackpointer; PyGC_Head *gc = AS_GC(op);
if (frame->f_locals != NULL) { if (_PyObject_GC_IS_TRACKED(op) &&
move_unvisited(frame->f_locals, stack, visited_space); gc_old_space(gc) != visited_space) {
} gc_flip_old_space(gc);
PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); gc_list_move(gc, reachable);
move_unvisited(func, stack, visited_space); return 1;
while (sp > locals) {
sp--;
_PyStackRef ref = *sp;
if (!PyStackRef_IsNull(ref)) {
PyObject *op = PyStackRef_AsPyObjectBorrow(ref);
if (!_Py_IsImmortal(op)) {
move_unvisited(op, stack, visited_space);
}
} }
} }
return 0;
} }
static Py_ssize_t static intptr_t
move_all_transitively_reachable(WorkStack *stack, PyGC_Head *visited, int visited_space) mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space)
{ {
// Transitively traverse all objects from reachable, until empty // Transitively traverse all objects from reachable, until empty
Py_ssize_t objects_marked = 0; struct container_and_flag arg = {
while (stack->top != NULL) { .container = reachable,
PyGC_Head *gc = pop_from_stack(stack); .visited_space = visited_space,
.size = 0
};
while (!gc_list_is_empty(reachable)) {
PyGC_Head *gc = _PyGCHead_NEXT(reachable);
assert(gc_old_space(gc) == visited_space); assert(gc_old_space(gc) == visited_space);
gc_list_append(gc, visited); gc_list_move(gc, visited);
objects_marked++;
PyObject *op = FROM_GC(gc); PyObject *op = FROM_GC(gc);
assert(PyObject_IS_GC(op)); traverseproc traverse = Py_TYPE(op)->tp_traverse;
assert(_PyObject_GC_IS_TRACKED(op)); (void) traverse(op,
if (_Py_IsImmortal(op)) { visit_add_to_container,
_PyObject_GC_UNTRACK(op); &arg);
}
else {
traverseproc traverse = Py_TYPE(op)->tp_traverse;
(void) traverse(op, visit_add_to_container, stack);
}
} }
gc_list_validate_space(visited, visited_space); gc_list_validate_space(visited, visited_space);
return objects_marked; return arg.size;
} }
static intptr_t static intptr_t
mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start)
{ {
WorkStack stack; PyGC_Head reachable;
stack.top = NULL; gc_list_init(&reachable);
stack.visited_space = visited_space; Py_ssize_t objects_marked = 0;
// Move all objects on stacks to reachable // Move all objects on stacks to reachable
_PyRuntimeState *runtime = &_PyRuntime; _PyRuntimeState *runtime = &_PyRuntime;
HEAD_LOCK(runtime); HEAD_LOCK(runtime);
@ -1490,7 +1480,27 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b
frame = frame->previous; frame = frame->previous;
continue; continue;
} }
frame_move_unvisited(frame, &stack, visited_space); _PyStackRef *locals = frame->localsplus;
_PyStackRef *sp = frame->stackpointer;
objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space);
PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
objects_marked += move_to_reachable(func, &reachable, visited_space);
while (sp > locals) {
sp--;
if (PyStackRef_IsNull(*sp)) {
continue;
}
PyObject *op = PyStackRef_AsPyObjectBorrow(*sp);
if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
PyGC_Head *gc = AS_GC(op);
if (_PyObject_GC_IS_TRACKED(op) &&
gc_old_space(gc) != visited_space) {
gc_flip_old_space(gc);
objects_marked++;
gc_list_move(gc, &reachable);
}
}
}
if (!start && frame->visited) { if (!start && frame->visited) {
// If this frame has already been visited, then the lower frames // If this frame has already been visited, then the lower frames
// will have already been visited and will not have changed // will have already been visited and will not have changed
@ -1503,31 +1513,31 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b
ts = PyThreadState_Next(ts); ts = PyThreadState_Next(ts);
HEAD_UNLOCK(runtime); HEAD_UNLOCK(runtime);
} }
Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); objects_marked += mark_all_reachable(&reachable, visited, visited_space);
assert(stack.top == NULL); assert(gc_list_is_empty(&reachable));
return objects_marked; return objects_marked;
} }
static intptr_t static intptr_t
mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space)
{ {
WorkStack stack; PyGC_Head reachable;
stack.top = NULL; gc_list_init(&reachable);
stack.visited_space = visited_space; Py_ssize_t objects_marked = 0;
MOVE_UNVISITED(interp->sysdict, &stack, visited_space); objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space);
MOVE_UNVISITED(interp->builtins, &stack, visited_space); objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space);
MOVE_UNVISITED(interp->dict, &stack, visited_space); objects_marked += move_to_reachable(interp->dict, &reachable, visited_space);
struct types_state *types = &interp->types; struct types_state *types = &interp->types;
for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) {
MOVE_UNVISITED(types->builtins.initialized[i].tp_dict, &stack, visited_space); objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space);
MOVE_UNVISITED(types->builtins.initialized[i].tp_subclasses, &stack, visited_space); objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space);
} }
for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) {
MOVE_UNVISITED(types->for_extensions.initialized[i].tp_dict, &stack, visited_space); objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space);
MOVE_UNVISITED(types->for_extensions.initialized[i].tp_subclasses, &stack, visited_space); objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space);
} }
Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); objects_marked += mark_all_reachable(&reachable, visited, visited_space);
assert(stack.top == NULL); assert(gc_list_is_empty(&reachable));
return objects_marked; return objects_marked;
} }
@ -1539,35 +1549,39 @@ mark_at_start(PyThreadState *tstate)
PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space);
objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true);
gcstate->work_to_do -= objects_marked;
gcstate->phase = GC_PHASE_COLLECT; gcstate->phase = GC_PHASE_COLLECT;
validate_spaces(gcstate); validate_spaces(gcstate);
return objects_marked; return objects_marked;
} }
/* See InternalDocs/garbage_collector.md for more details. */
#define MAX_HEAP_PORTION_MULTIPLIER 5
#define MARKING_PROGRESS_MULTIPLIER 2
static intptr_t static intptr_t
assess_work_to_do(GCState *gcstate) assess_work_to_do(GCState *gcstate)
{ {
/* The amount of work we want to do depends on two things. /* The amount of work we want to do depends on three things.
* 1. The number of new objects created * 1. The number of new objects created
* 2. The heap size (up to a multiple of the number of new objects, to avoid quadratic effects) * 2. The growth in heap size since the last collection
* 3. The heap size (up to the number of new objects, to avoid quadratic effects)
*
* For a steady state heap, the amount of work to do is three times the number
* of new objects added to the heap. This ensures that we stay ahead in the
* worst case of all new objects being garbage.
*
* This could be improved by tracking survival rates, but it is still a
* large improvement on the non-marking approach.
*/ */
intptr_t scale_factor = gcstate->old[0].threshold; intptr_t scale_factor = gcstate->old[0].threshold;
if (scale_factor < 2) { if (scale_factor < 2) {
scale_factor = 2; scale_factor = 2;
} }
intptr_t new_objects = gcstate->young.count; intptr_t new_objects = gcstate->young.count;
intptr_t max_heap_portion = new_objects * MAX_HEAP_PORTION_MULTIPLIER; intptr_t max_heap_fraction = new_objects*3/2;
intptr_t heap_portion = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
if (heap_portion > max_heap_portion) { if (heap_fraction > max_heap_fraction) {
heap_portion = max_heap_portion; heap_fraction = max_heap_fraction;
} }
gcstate->young.count = 0; gcstate->young.count = 0;
return new_objects + heap_portion; return new_objects + heap_fraction;
} }
static void static void
@ -1580,37 +1594,36 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
if (gcstate->phase == GC_PHASE_MARK) { if (gcstate->phase == GC_PHASE_MARK) {
Py_ssize_t objects_marked = mark_at_start(tstate); Py_ssize_t objects_marked = mark_at_start(tstate);
GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; gcstate->work_to_do -= objects_marked;
validate_spaces(gcstate); validate_spaces(gcstate);
return; return;
} }
PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false);
GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER;
gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
PyGC_Head increment; PyGC_Head increment;
gc_list_init(&increment); gc_list_init(&increment);
WorkStack working; int scale_factor = gcstate->old[0].threshold;
working.top = 0; if (scale_factor < 2) {
working.visited_space = gcstate->visited_space; scale_factor = 2;
move_list_to_stack(&gcstate->young.head, &working); }
Py_ssize_t increment_size = move_all_transitively_reachable(&working, &increment, gcstate->visited_space); intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false);
GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
gcstate->work_to_do -= objects_marked;
gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
gc_list_merge(&gcstate->young.head, &increment);
gc_list_validate_space(&increment, gcstate->visited_space); gc_list_validate_space(&increment, gcstate->visited_space);
assert(working.top == NULL); Py_ssize_t increment_size = gc_list_size(&increment);
while (increment_size < gcstate->work_to_do) { while (increment_size < gcstate->work_to_do) {
if (gc_list_is_empty(not_visited)) { if (gc_list_is_empty(not_visited)) {
break; break;
} }
PyGC_Head *gc = _PyGCHead_NEXT(not_visited); PyGC_Head *gc = _PyGCHead_NEXT(not_visited);
gc_set_old_space(gc, gcstate->visited_space); gc_list_move(gc, &increment);
push_to_stack(gc, &working); increment_size++;
assert(!_Py_IsImmortal(FROM_GC(gc))); assert(!_Py_IsImmortal(FROM_GC(gc)));
increment_size += move_all_transitively_reachable(&working, &increment, gcstate->visited_space); gc_set_old_space(gc, gcstate->visited_space);
assert(working.top == NULL); increment_size += expand_region_transitively_reachable(&increment, gc, gcstate);
} }
assert(increment_size == gc_list_size(&increment));
GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size);
validate_list(&increment, collecting_clear_unreachable_clear); validate_list(&increment, collecting_clear_unreachable_clear);
gc_list_validate_space(&increment, gcstate->visited_space); gc_list_validate_space(&increment, gcstate->visited_space);
@ -1619,6 +1632,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
gc_collect_region(tstate, &increment, &survivors, stats); gc_collect_region(tstate, &increment, &survivors, stats);
gc_list_merge(&survivors, visited); gc_list_merge(&survivors, visited);
assert(gc_list_is_empty(&increment)); assert(gc_list_is_empty(&increment));
gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
gcstate->work_to_do -= increment_size; gcstate->work_to_do -= increment_size;
add_stats(gcstate, 1, stats); add_stats(gcstate, 1, stats);
@ -1654,7 +1668,6 @@ gc_collect_full(PyThreadState *tstate,
gcstate->old[0].count = 0; gcstate->old[0].count = 0;
gcstate->old[1].count = 0; gcstate->old[1].count = 0;
completed_scavenge(gcstate); completed_scavenge(gcstate);
gcstate->work_to_do = -gcstate->young.threshold;
_PyGC_ClearAllFreeLists(tstate->interp); _PyGC_ClearAllFreeLists(tstate->interp);
validate_spaces(gcstate); validate_spaces(gcstate);
add_stats(gcstate, 2, stats); add_stats(gcstate, 2, stats);