mirror of
https://github.com/python/cpython.git
synced 2025-11-24 04:17:38 +00:00
[3.14] GH-139951: Fix major GC performance regression. Backport of GH-140262 (GH-140447)
* Count number of actually tracked objects, instead of trackable objects. This ensures that untracking tuples has the desired effect of reducing GC overhead * Do not track most untrackable tuples during creation. This prevents large numbers of small tuples causing execessive GCs.
This commit is contained in:
parent
0fdae5f590
commit
d1a434f7b2
5 changed files with 94 additions and 36 deletions
|
|
@ -205,6 +205,12 @@ static inline void _PyGC_CLEAR_FINALIZED(PyObject *op) {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern void _Py_ScheduleGC(PyThreadState *tstate);
|
||||||
|
|
||||||
|
#ifndef Py_GIL_DISABLED
|
||||||
|
extern void _Py_TriggerGC(struct _gc_runtime_state *gcstate);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* Tell the GC to track this object.
|
/* Tell the GC to track this object.
|
||||||
*
|
*
|
||||||
|
|
@ -238,14 +244,19 @@ static inline void _PyObject_GC_TRACK(
|
||||||
"object is in generation which is garbage collected",
|
"object is in generation which is garbage collected",
|
||||||
filename, lineno, __func__);
|
filename, lineno, __func__);
|
||||||
|
|
||||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc;
|
||||||
PyGC_Head *generation0 = &interp->gc.young.head;
|
PyGC_Head *generation0 = &gcstate->young.head;
|
||||||
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
|
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
|
||||||
_PyGCHead_SET_NEXT(last, gc);
|
_PyGCHead_SET_NEXT(last, gc);
|
||||||
_PyGCHead_SET_PREV(gc, last);
|
_PyGCHead_SET_PREV(gc, last);
|
||||||
uintptr_t not_visited = 1 ^ interp->gc.visited_space;
|
uintptr_t not_visited = 1 ^ gcstate->visited_space;
|
||||||
gc->_gc_next = ((uintptr_t)generation0) | not_visited;
|
gc->_gc_next = ((uintptr_t)generation0) | not_visited;
|
||||||
generation0->_gc_prev = (uintptr_t)gc;
|
generation0->_gc_prev = (uintptr_t)gc;
|
||||||
|
gcstate->young.count++; /* number of tracked GC objects */
|
||||||
|
gcstate->heap_size++;
|
||||||
|
if (gcstate->young.count > gcstate->young.threshold) {
|
||||||
|
_Py_TriggerGC(gcstate);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -280,6 +291,11 @@ static inline void _PyObject_GC_UNTRACK(
|
||||||
_PyGCHead_SET_PREV(next, prev);
|
_PyGCHead_SET_PREV(next, prev);
|
||||||
gc->_gc_next = 0;
|
gc->_gc_next = 0;
|
||||||
gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED;
|
gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED;
|
||||||
|
struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc;
|
||||||
|
if (gcstate->young.count > 0) {
|
||||||
|
gcstate->young.count--;
|
||||||
|
}
|
||||||
|
gcstate->heap_size--;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -343,7 +359,6 @@ extern PyObject *_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs);
|
||||||
|
|
||||||
// Functions to clear types free lists
|
// Functions to clear types free lists
|
||||||
extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp);
|
extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp);
|
||||||
extern void _Py_ScheduleGC(PyThreadState *tstate);
|
|
||||||
extern void _Py_RunGC(PyThreadState *tstate);
|
extern void _Py_RunGC(PyThreadState *tstate);
|
||||||
|
|
||||||
union _PyStackRef;
|
union _PyStackRef;
|
||||||
|
|
|
||||||
|
|
@ -1329,6 +1329,7 @@ class GCTogglingTests(unittest.TestCase):
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
gc.disable()
|
gc.disable()
|
||||||
|
|
||||||
|
@unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments")
|
||||||
def test_bug1055820c(self):
|
def test_bug1055820c(self):
|
||||||
# Corresponds to temp2c.py in the bug report. This is pretty
|
# Corresponds to temp2c.py in the bug report. This is pretty
|
||||||
# elaborate.
|
# elaborate.
|
||||||
|
|
@ -1390,10 +1391,11 @@ class GCTogglingTests(unittest.TestCase):
|
||||||
# The free-threaded build doesn't have multiple generations, so
|
# The free-threaded build doesn't have multiple generations, so
|
||||||
# just trigger a GC manually.
|
# just trigger a GC manually.
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
assert not detector.gc_happened
|
||||||
while not detector.gc_happened:
|
while not detector.gc_happened:
|
||||||
i += 1
|
i += 1
|
||||||
if i > 10000:
|
if i > 100000:
|
||||||
self.fail("gc didn't happen after 10000 iterations")
|
self.fail("gc didn't happen after 100000 iterations")
|
||||||
self.assertEqual(len(ouch), 0)
|
self.assertEqual(len(ouch), 0)
|
||||||
junk.append([]) # this will eventually trigger gc
|
junk.append([]) # this will eventually trigger gc
|
||||||
|
|
||||||
|
|
@ -1464,8 +1466,8 @@ class GCTogglingTests(unittest.TestCase):
|
||||||
gc.collect()
|
gc.collect()
|
||||||
while not detector.gc_happened:
|
while not detector.gc_happened:
|
||||||
i += 1
|
i += 1
|
||||||
if i > 10000:
|
if i > 50000:
|
||||||
self.fail("gc didn't happen after 10000 iterations")
|
self.fail("gc didn't happen after 50000 iterations")
|
||||||
self.assertEqual(len(ouch), 0)
|
self.assertEqual(len(ouch), 0)
|
||||||
junk.append([]) # this will eventually trigger gc
|
junk.append([]) # this will eventually trigger gc
|
||||||
|
|
||||||
|
|
@ -1482,8 +1484,8 @@ class GCTogglingTests(unittest.TestCase):
|
||||||
detector = GC_Detector()
|
detector = GC_Detector()
|
||||||
while not detector.gc_happened:
|
while not detector.gc_happened:
|
||||||
i += 1
|
i += 1
|
||||||
if i > 10000:
|
if i > 100000:
|
||||||
self.fail("gc didn't happen after 10000 iterations")
|
self.fail("gc didn't happen after 100000 iterations")
|
||||||
junk.append([]) # this will eventually trigger gc
|
junk.append([]) # this will eventually trigger gc
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -1493,11 +1495,11 @@ class GCTogglingTests(unittest.TestCase):
|
||||||
detector = GC_Detector()
|
detector = GC_Detector()
|
||||||
while not detector.gc_happened:
|
while not detector.gc_happened:
|
||||||
i += 1
|
i += 1
|
||||||
if i > 10000:
|
if i > 100000:
|
||||||
break
|
break
|
||||||
junk.append([]) # this may eventually trigger gc (if it is enabled)
|
junk.append([]) # this may eventually trigger gc (if it is enabled)
|
||||||
|
|
||||||
self.assertEqual(i, 10001)
|
self.assertEqual(i, 100001)
|
||||||
finally:
|
finally:
|
||||||
gc.enable()
|
gc.enable()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
Fixes a regression in GC performance for a growing heap composed mostly of
|
||||||
|
small tuples.
|
||||||
|
|
||||||
|
* Counts number of actually tracked objects, instead of trackable objects.
|
||||||
|
This ensures that untracking tuples has the desired effect of reducing GC overhead.
|
||||||
|
* Does not track most untrackable tuples during creation.
|
||||||
|
This prevents large numbers of small tuples causing excessive GCs.
|
||||||
|
|
@ -156,6 +156,18 @@ _PyTuple_MaybeUntrack(PyObject *op)
|
||||||
_PyObject_GC_UNTRACK(op);
|
_PyObject_GC_UNTRACK(op);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Fast, but conservative check if an object maybe tracked
|
||||||
|
May return true for an object that is not tracked,
|
||||||
|
Will always return true for an object that is tracked.
|
||||||
|
This is a temporary workaround until _PyObject_GC_IS_TRACKED
|
||||||
|
becomes fast and safe to call on non-GC objects.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
maybe_tracked(PyObject *ob)
|
||||||
|
{
|
||||||
|
return _PyType_IS_GC(Py_TYPE(ob));
|
||||||
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
PyTuple_Pack(Py_ssize_t n, ...)
|
PyTuple_Pack(Py_ssize_t n, ...)
|
||||||
{
|
{
|
||||||
|
|
@ -163,6 +175,7 @@ PyTuple_Pack(Py_ssize_t n, ...)
|
||||||
PyObject *o;
|
PyObject *o;
|
||||||
PyObject **items;
|
PyObject **items;
|
||||||
va_list vargs;
|
va_list vargs;
|
||||||
|
bool track = false;
|
||||||
|
|
||||||
if (n == 0) {
|
if (n == 0) {
|
||||||
return tuple_get_empty();
|
return tuple_get_empty();
|
||||||
|
|
@ -177,10 +190,15 @@ PyTuple_Pack(Py_ssize_t n, ...)
|
||||||
items = result->ob_item;
|
items = result->ob_item;
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
o = va_arg(vargs, PyObject *);
|
o = va_arg(vargs, PyObject *);
|
||||||
|
if (!track && maybe_tracked(o)) {
|
||||||
|
track = true;
|
||||||
|
}
|
||||||
items[i] = Py_NewRef(o);
|
items[i] = Py_NewRef(o);
|
||||||
}
|
}
|
||||||
va_end(vargs);
|
va_end(vargs);
|
||||||
|
if (track) {
|
||||||
_PyObject_GC_TRACK(result);
|
_PyObject_GC_TRACK(result);
|
||||||
|
}
|
||||||
return (PyObject *)result;
|
return (PyObject *)result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -377,11 +395,17 @@ _PyTuple_FromArray(PyObject *const *src, Py_ssize_t n)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
PyObject **dst = tuple->ob_item;
|
PyObject **dst = tuple->ob_item;
|
||||||
|
bool track = false;
|
||||||
for (Py_ssize_t i = 0; i < n; i++) {
|
for (Py_ssize_t i = 0; i < n; i++) {
|
||||||
PyObject *item = src[i];
|
PyObject *item = src[i];
|
||||||
|
if (!track && maybe_tracked(item)) {
|
||||||
|
track = true;
|
||||||
|
}
|
||||||
dst[i] = Py_NewRef(item);
|
dst[i] = Py_NewRef(item);
|
||||||
}
|
}
|
||||||
|
if (track) {
|
||||||
_PyObject_GC_TRACK(tuple);
|
_PyObject_GC_TRACK(tuple);
|
||||||
|
}
|
||||||
return (PyObject *)tuple;
|
return (PyObject *)tuple;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -396,10 +420,17 @@ _PyTuple_FromStackRefStealOnSuccess(const _PyStackRef *src, Py_ssize_t n)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
PyObject **dst = tuple->ob_item;
|
PyObject **dst = tuple->ob_item;
|
||||||
|
bool track = false;
|
||||||
for (Py_ssize_t i = 0; i < n; i++) {
|
for (Py_ssize_t i = 0; i < n; i++) {
|
||||||
dst[i] = PyStackRef_AsPyObjectSteal(src[i]);
|
PyObject *item = PyStackRef_AsPyObjectSteal(src[i]);
|
||||||
|
if (!track && maybe_tracked(item)) {
|
||||||
|
track = true;
|
||||||
}
|
}
|
||||||
|
dst[i] = item;
|
||||||
|
}
|
||||||
|
if (track) {
|
||||||
_PyObject_GC_TRACK(tuple);
|
_PyObject_GC_TRACK(tuple);
|
||||||
|
}
|
||||||
return (PyObject *)tuple;
|
return (PyObject *)tuple;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
43
Python/gc.c
43
Python/gc.c
|
|
@ -1590,7 +1590,7 @@ assess_work_to_do(GCState *gcstate)
|
||||||
scale_factor = 2;
|
scale_factor = 2;
|
||||||
}
|
}
|
||||||
intptr_t new_objects = gcstate->young.count;
|
intptr_t new_objects = gcstate->young.count;
|
||||||
intptr_t max_heap_fraction = new_objects*3/2;
|
intptr_t max_heap_fraction = new_objects*2;
|
||||||
intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
|
intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
|
||||||
if (heap_fraction > max_heap_fraction) {
|
if (heap_fraction > max_heap_fraction) {
|
||||||
heap_fraction = max_heap_fraction;
|
heap_fraction = max_heap_fraction;
|
||||||
|
|
@ -1605,6 +1605,9 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
|
||||||
GC_STAT_ADD(1, collections, 1);
|
GC_STAT_ADD(1, collections, 1);
|
||||||
GCState *gcstate = &tstate->interp->gc;
|
GCState *gcstate = &tstate->interp->gc;
|
||||||
gcstate->work_to_do += assess_work_to_do(gcstate);
|
gcstate->work_to_do += assess_work_to_do(gcstate);
|
||||||
|
if (gcstate->work_to_do < 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
untrack_tuples(&gcstate->young.head);
|
untrack_tuples(&gcstate->young.head);
|
||||||
if (gcstate->phase == GC_PHASE_MARK) {
|
if (gcstate->phase == GC_PHASE_MARK) {
|
||||||
Py_ssize_t objects_marked = mark_at_start(tstate);
|
Py_ssize_t objects_marked = mark_at_start(tstate);
|
||||||
|
|
@ -1647,7 +1650,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
|
||||||
gc_collect_region(tstate, &increment, &survivors, stats);
|
gc_collect_region(tstate, &increment, &survivors, stats);
|
||||||
gc_list_merge(&survivors, visited);
|
gc_list_merge(&survivors, visited);
|
||||||
assert(gc_list_is_empty(&increment));
|
assert(gc_list_is_empty(&increment));
|
||||||
gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
|
|
||||||
gcstate->work_to_do -= increment_size;
|
gcstate->work_to_do -= increment_size;
|
||||||
|
|
||||||
add_stats(gcstate, 1, stats);
|
add_stats(gcstate, 1, stats);
|
||||||
|
|
@ -2231,21 +2233,11 @@ _Py_ScheduleGC(PyThreadState *tstate)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
_PyObject_GC_Link(PyObject *op)
|
_Py_TriggerGC(struct _gc_runtime_state *gcstate)
|
||||||
{
|
{
|
||||||
PyGC_Head *gc = AS_GC(op);
|
|
||||||
// gc must be correctly aligned
|
|
||||||
_PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0);
|
|
||||||
|
|
||||||
PyThreadState *tstate = _PyThreadState_GET();
|
PyThreadState *tstate = _PyThreadState_GET();
|
||||||
GCState *gcstate = &tstate->interp->gc;
|
if (gcstate->enabled &&
|
||||||
gc->_gc_next = 0;
|
gcstate->young.threshold != 0 &&
|
||||||
gc->_gc_prev = 0;
|
|
||||||
gcstate->young.count++; /* number of allocated GC objects */
|
|
||||||
gcstate->heap_size++;
|
|
||||||
if (gcstate->young.count > gcstate->young.threshold &&
|
|
||||||
gcstate->enabled &&
|
|
||||||
gcstate->young.threshold &&
|
|
||||||
!_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
|
!_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
|
||||||
!_PyErr_Occurred(tstate))
|
!_PyErr_Occurred(tstate))
|
||||||
{
|
{
|
||||||
|
|
@ -2253,6 +2245,17 @@ _PyObject_GC_Link(PyObject *op)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
_PyObject_GC_Link(PyObject *op)
|
||||||
|
{
|
||||||
|
PyGC_Head *gc = AS_GC(op);
|
||||||
|
// gc must be correctly aligned
|
||||||
|
_PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0);
|
||||||
|
gc->_gc_next = 0;
|
||||||
|
gc->_gc_prev = 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
_Py_RunGC(PyThreadState *tstate)
|
_Py_RunGC(PyThreadState *tstate)
|
||||||
{
|
{
|
||||||
|
|
@ -2359,6 +2362,11 @@ PyObject_GC_Del(void *op)
|
||||||
PyGC_Head *g = AS_GC(op);
|
PyGC_Head *g = AS_GC(op);
|
||||||
if (_PyObject_GC_IS_TRACKED(op)) {
|
if (_PyObject_GC_IS_TRACKED(op)) {
|
||||||
gc_list_remove(g);
|
gc_list_remove(g);
|
||||||
|
GCState *gcstate = get_gc_state();
|
||||||
|
if (gcstate->young.count > 0) {
|
||||||
|
gcstate->young.count--;
|
||||||
|
}
|
||||||
|
gcstate->heap_size--;
|
||||||
#ifdef Py_DEBUG
|
#ifdef Py_DEBUG
|
||||||
PyObject *exc = PyErr_GetRaisedException();
|
PyObject *exc = PyErr_GetRaisedException();
|
||||||
if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
|
if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
|
||||||
|
|
@ -2372,11 +2380,6 @@ PyObject_GC_Del(void *op)
|
||||||
PyErr_SetRaisedException(exc);
|
PyErr_SetRaisedException(exc);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
GCState *gcstate = get_gc_state();
|
|
||||||
if (gcstate->young.count > 0) {
|
|
||||||
gcstate->young.count--;
|
|
||||||
}
|
|
||||||
gcstate->heap_size--;
|
|
||||||
PyObject_Free(((char *)op)-presize);
|
PyObject_Free(((char *)op)-presize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue