GH-117108: Set the "old space bit" to "visited" for all young objects (#117213)

Change old space bit of young objects from 0 to gcstate->visited_space.
This ensures that any object created *and* collected during cycle GC has the bit set correctly.
This commit is contained in:
Mark Shannon 2024-03-26 11:11:42 +00:00 committed by GitHub
parent bf82f77957
commit 8bef34f625
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 56 additions and 43 deletions

View file

@ -113,7 +113,19 @@ static inline void _PyObject_GC_SET_SHARED_INLINE(PyObject *op) {
/* Bit 1 is set when the object is in generation which is GCed currently. */ /* Bit 1 is set when the object is in generation which is GCed currently. */
#define _PyGC_PREV_MASK_COLLECTING 2 #define _PyGC_PREV_MASK_COLLECTING 2
/* Bit 0 is set if the object belongs to old space 1 */ /* Bit 0 in _gc_next is the old space bit.
* It is set as follows:
* Young: gcstate->visited_space
* old[0]: 0
* old[1]: 1
* permanent: 0
*
* During a collection all objects handled should have the bit set to
* gcstate->visited_space, as objects are moved from the young gen
* and the increment into old[gcstate->visited_space].
* When object are moved from the pending space, old[gcstate->visited_space^1]
* into the increment, the old space bit is flipped.
*/
#define _PyGC_NEXT_MASK_OLD_SPACE_1 1 #define _PyGC_NEXT_MASK_OLD_SPACE_1 1
#define _PyGC_PREV_SHIFT 2 #define _PyGC_PREV_SHIFT 2

View file

@ -318,8 +318,8 @@ static inline void _PyObject_GC_TRACK(
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev); PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
_PyGCHead_SET_NEXT(last, gc); _PyGCHead_SET_NEXT(last, gc);
_PyGCHead_SET_PREV(gc, last); _PyGCHead_SET_PREV(gc, last);
_PyGCHead_SET_NEXT(gc, generation0); /* Young objects will be moved into the visited space during GC, so set the bit here */
assert((gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1) == 0); gc->_gc_next = ((uintptr_t)generation0) | interp->gc.visited_space;
generation0->_gc_prev = (uintptr_t)gc; generation0->_gc_prev = (uintptr_t)gc;
#endif #endif
} }

View file

@ -823,32 +823,10 @@ class GCTests(unittest.TestCase):
self.assertTrue( self.assertTrue(
any(l is element for element in gc.get_objects(generation=0)) any(l is element for element in gc.get_objects(generation=0))
) )
self.assertFalse( gc.collect()
any(l is element for element in gc.get_objects(generation=1))
)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=2))
)
gc.collect(generation=0)
self.assertFalse( self.assertFalse(
any(l is element for element in gc.get_objects(generation=0)) any(l is element for element in gc.get_objects(generation=0))
) )
self.assertTrue(
any(l is element for element in gc.get_objects(generation=1))
)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=2))
)
gc.collect(generation=2)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=0))
)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=1))
)
self.assertTrue(
any(l is element for element in gc.get_objects(generation=2))
)
del l del l
gc.collect() gc.collect()

View file

@ -0,0 +1,3 @@
Change the old space bit of objects in the young generation from 0 to
gcstate->visited, so that any objects created during GC will have the old
bit set correctly if they get moved into the old generation.

View file

@ -455,10 +455,20 @@ validate_consistent_old_space(PyGC_Head *head)
assert(prev == GC_PREV(head)); assert(prev == GC_PREV(head));
} }
static void
gc_list_validate_space(PyGC_Head *head, int space) {
PyGC_Head *gc = GC_NEXT(head);
while (gc != head) {
assert(gc_old_space(gc) == space);
gc = GC_NEXT(gc);
}
}
#else #else
#define validate_list(x, y) do{}while(0) #define validate_list(x, y) do{}while(0)
#define validate_old(g) do{}while(0) #define validate_old(g) do{}while(0)
#define validate_consistent_old_space(l) do{}while(0) #define validate_consistent_old_space(l) do{}while(0)
#define gc_list_validate_space(l, s) do{}while(0)
#endif #endif
/*** end of list stuff ***/ /*** end of list stuff ***/
@ -949,6 +959,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
/* Invoke the callbacks we decided to honor. It's safe to invoke them /* Invoke the callbacks we decided to honor. It's safe to invoke them
* because they can't reference unreachable objects. * because they can't reference unreachable objects.
*/ */
int visited_space = get_gc_state()->visited_space;
while (! gc_list_is_empty(&wrcb_to_call)) { while (! gc_list_is_empty(&wrcb_to_call)) {
PyObject *temp; PyObject *temp;
PyObject *callback; PyObject *callback;
@ -983,6 +994,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
Py_DECREF(op); Py_DECREF(op);
if (wrcb_to_call._gc_next == (uintptr_t)gc) { if (wrcb_to_call._gc_next == (uintptr_t)gc) {
/* object is still alive -- move it */ /* object is still alive -- move it */
gc_set_old_space(gc, visited_space);
gc_list_move(gc, old); gc_list_move(gc, old);
} }
else { else {
@ -1389,6 +1401,14 @@ completed_cycle(GCState *gcstate)
assert(gc_list_is_empty(not_visited)); assert(gc_list_is_empty(not_visited));
#endif #endif
gcstate->visited_space = flip_old_space(gcstate->visited_space); gcstate->visited_space = flip_old_space(gcstate->visited_space);
/* Make sure all young objects have old space bit set correctly */
PyGC_Head *young = &gcstate->young.head;
PyGC_Head *gc = GC_NEXT(young);
while (gc != young) {
PyGC_Head *next = GC_NEXT(gc);
gc_set_old_space(gc, gcstate->visited_space);
gc = next;
}
gcstate->work_to_do = 0; gcstate->work_to_do = 0;
} }
@ -1406,10 +1426,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
} }
gc_list_merge(&gcstate->young.head, &increment); gc_list_merge(&gcstate->young.head, &increment);
gcstate->young.count = 0; gcstate->young.count = 0;
if (gcstate->visited_space) { gc_list_validate_space(&increment, gcstate->visited_space);
/* objects in visited space have bit set, so we set it here */
gc_list_set_space(&increment, 1);
}
Py_ssize_t increment_size = 0; Py_ssize_t increment_size = 0;
while (increment_size < gcstate->work_to_do) { while (increment_size < gcstate->work_to_do) {
if (gc_list_is_empty(not_visited)) { if (gc_list_is_empty(not_visited)) {
@ -1421,9 +1438,11 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
gc_set_old_space(gc, gcstate->visited_space); gc_set_old_space(gc, gcstate->visited_space);
increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); increment_size += expand_region_transitively_reachable(&increment, gc, gcstate);
} }
gc_list_validate_space(&increment, gcstate->visited_space);
PyGC_Head survivors; PyGC_Head survivors;
gc_list_init(&survivors); gc_list_init(&survivors);
gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats); gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats);
gc_list_validate_space(&survivors, gcstate->visited_space);
gc_list_merge(&survivors, visited); gc_list_merge(&survivors, visited);
assert(gc_list_is_empty(&increment)); assert(gc_list_is_empty(&increment));
gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
@ -1444,23 +1463,18 @@ gc_collect_full(PyThreadState *tstate,
GCState *gcstate = &tstate->interp->gc; GCState *gcstate = &tstate->interp->gc;
validate_old(gcstate); validate_old(gcstate);
PyGC_Head *young = &gcstate->young.head; PyGC_Head *young = &gcstate->young.head;
PyGC_Head *old0 = &gcstate->old[0].head; PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head;
PyGC_Head *old1 = &gcstate->old[1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
/* merge all generations into old0 */ /* merge all generations into visited */
gc_list_merge(young, old0); gc_list_validate_space(young, gcstate->visited_space);
gc_list_set_space(pending, gcstate->visited_space);
gc_list_merge(young, pending);
gcstate->young.count = 0; gcstate->young.count = 0;
PyGC_Head *gc = GC_NEXT(old1); gc_list_merge(pending, visited);
while (gc != old1) {
PyGC_Head *next = GC_NEXT(gc);
gc_set_old_space(gc, 0);
gc = next;
}
gc_list_merge(old1, old0);
gc_collect_region(tstate, old0, old0, gc_collect_region(tstate, visited, visited,
UNTRACK_TUPLES | UNTRACK_DICTS, UNTRACK_TUPLES | UNTRACK_DICTS,
stats); stats);
gcstate->visited_space = 1;
gcstate->young.count = 0; gcstate->young.count = 0;
gcstate->old[0].count = 0; gcstate->old[0].count = 0;
gcstate->old[1].count = 0; gcstate->old[1].count = 0;
@ -1527,6 +1541,7 @@ gc_collect_region(PyThreadState *tstate,
/* Clear weakrefs and invoke callbacks as necessary. */ /* Clear weakrefs and invoke callbacks as necessary. */
stats->collected += handle_weakrefs(&unreachable, to); stats->collected += handle_weakrefs(&unreachable, to);
gc_list_validate_space(to, gcstate->visited_space);
validate_list(to, collecting_clear_unreachable_clear); validate_list(to, collecting_clear_unreachable_clear);
validate_list(&unreachable, collecting_set_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear);
@ -1560,6 +1575,7 @@ gc_collect_region(PyThreadState *tstate,
* this if they insist on creating this type of structure. * this if they insist on creating this type of structure.
*/ */
handle_legacy_finalizers(tstate, gcstate, &finalizers, to); handle_legacy_finalizers(tstate, gcstate, &finalizers, to);
gc_list_validate_space(to, gcstate->visited_space);
validate_list(to, collecting_clear_unreachable_clear); validate_list(to, collecting_clear_unreachable_clear);
} }
@ -1708,6 +1724,10 @@ void
_PyGC_Freeze(PyInterpreterState *interp) _PyGC_Freeze(PyInterpreterState *interp)
{ {
GCState *gcstate = &interp->gc; GCState *gcstate = &interp->gc;
/* The permanent_generation has its old space bit set to zero */
if (gcstate->visited_space) {
gc_list_set_space(&gcstate->young.head, 0);
}
gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head); gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head);
gcstate->young.count = 0; gcstate->young.count = 0;
PyGC_Head*old0 = &gcstate->old[0].head; PyGC_Head*old0 = &gcstate->old[0].head;