bpo-30860: Consolidate stateful runtime globals. (#3397)

* group the (stateful) runtime globals into various topical structs
* consolidate the topical structs under a single top-level _PyRuntimeState struct
* add a check-c-globals.py script that helps identify runtime globals

Other globals are excluded (see globals.txt and check-c-globals.py).
This commit is contained in:
Eric Snow 2017-09-07 23:51:28 -06:00 committed by GitHub
parent bab21faded
commit 2ebc5ce42a
72 changed files with 2746 additions and 1312 deletions

View file

@ -24,6 +24,8 @@
*/
#include "Python.h"
#include "internal/mem.h"
#include "internal/pystate.h"
#include "frameobject.h" /* for PyFrame_ClearFreeList */
#include "pydtrace.h"
#include "pytime.h" /* for _PyTime_GetMonotonicClock() */
@ -39,133 +41,9 @@ module gc
/* Get the object given the GC head */
#define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
/*** Global GC state ***/
struct gc_generation {
PyGC_Head head;
int threshold; /* collection threshold */
int count; /* count of allocations or collections of younger
generations */
};
/* If we change this, we need to change the default value in the signature of
gc.collect. */
#define NUM_GENERATIONS 3
#define GEN_HEAD(n) (&generations[n].head)
/* linked lists of container objects */
static struct gc_generation generations[NUM_GENERATIONS] = {
/* PyGC_Head, threshold, count */
{{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0},
{{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0},
{{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0},
};
PyGC_Head *_PyGC_generation0 = GEN_HEAD(0);
static int enabled = 1; /* automatic collection enabled? */
/* true if we are currently running the collector */
static int collecting = 0;
/* list of uncollectable objects */
static PyObject *garbage = NULL;
/* Python string to use if unhandled exception occurs */
static PyObject *gc_str = NULL;
/* a list of callbacks to be invoked when collection is performed */
static PyObject *callbacks = NULL;
/* This is the number of objects that survived the last full collection. It
approximates the number of long lived objects tracked by the GC.
(by "full collection", we mean a collection of the oldest generation).
*/
static Py_ssize_t long_lived_total = 0;
/* This is the number of objects that survived all "non-full" collections,
and are awaiting to undergo a full collection for the first time.
*/
static Py_ssize_t long_lived_pending = 0;
/*
NOTE: about the counting of long-lived objects.
To limit the cost of garbage collection, there are two strategies;
- make each collection faster, e.g. by scanning fewer objects
- do less collections
This heuristic is about the latter strategy.
In addition to the various configurable thresholds, we only trigger a
full collection if the ratio
long_lived_pending / long_lived_total
is above a given value (hardwired to 25%).
The reason is that, while "non-full" collections (i.e., collections of
the young and middle generations) will always examine roughly the same
number of objects -- determined by the aforementioned thresholds --,
the cost of a full collection is proportional to the total number of
long-lived objects, which is virtually unbounded.
Indeed, it has been remarked that doing a full collection every
<constant number> of object creations entails a dramatic performance
degradation in workloads which consist in creating and storing lots of
long-lived objects (e.g. building a large list of GC-tracked objects would
show quadratic performance, instead of linear as expected: see issue #4074).
Using the above ratio, instead, yields amortized linear performance in
the total number of objects (the effect of which can be summarized
thusly: "each full garbage collection is more and more costly as the
number of objects grows, but we do fewer and fewer of them").
This heuristic was suggested by Martin von Löwis on python-dev in
June 2008. His original analysis and proposal can be found at:
http://mail.python.org/pipermail/python-dev/2008-June/080579.html
*/
/*
NOTE: about untracking of mutable objects.
Certain types of container cannot participate in a reference cycle, and
so do not need to be tracked by the garbage collector. Untracking these
objects reduces the cost of garbage collections. However, determining
which objects may be untracked is not free, and the costs must be
weighed against the benefits for garbage collection.
There are two possible strategies for when to untrack a container:
i) When the container is created.
ii) When the container is examined by the garbage collector.
Tuples containing only immutable objects (integers, strings etc, and
recursively, tuples of immutable objects) do not need to be tracked.
The interpreter creates a large number of tuples, many of which will
not survive until garbage collection. It is therefore not worthwhile
to untrack eligible tuples at creation time.
Instead, all tuples except the empty tuple are tracked when created.
During garbage collection it is determined whether any surviving tuples
can be untracked. A tuple can be untracked if all of its contents are
already not tracked. Tuples are examined for untracking in all garbage
collection cycles. It may take more than one cycle to untrack a tuple.
Dictionaries containing only immutable objects also do not need to be
tracked. Dictionaries are untracked when created. If a tracked item is
inserted into a dictionary (either as a key or value), the dictionary
becomes tracked. During a full garbage collection (all generations),
the collector will untrack any dictionaries whose contents are not
tracked.
The module provides the python function is_tracked(obj), which returns
the CURRENT tracking status of the object. Subsequent garbage
collections may change the tracking status of the object.
Untracking of certain containers was introduced in issue #4688, and
the algorithm was refined in response to issue #14775.
*/
/* set for debugging information */
#define DEBUG_STATS (1<<0) /* print collection statistics */
#define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */
@ -174,19 +52,26 @@ static Py_ssize_t long_lived_pending = 0;
#define DEBUG_LEAK DEBUG_COLLECTABLE | \
DEBUG_UNCOLLECTABLE | \
DEBUG_SAVEALL
static int debug;
/* Running stats per generation */
struct gc_generation_stats {
/* total number of collections */
Py_ssize_t collections;
/* total number of collected objects */
Py_ssize_t collected;
/* total number of uncollectable objects (put into gc.garbage) */
Py_ssize_t uncollectable;
};
#define GEN_HEAD(n) (&_PyRuntime.gc.generations[n].head)
static struct gc_generation_stats generation_stats[NUM_GENERATIONS];
void
_PyGC_Initialize(struct _gc_runtime_state *state)
{
state->enabled = 1; /* automatic collection enabled? */
#define _GEN_HEAD(n) (&state->generations[n].head)
struct gc_generation generations[NUM_GENERATIONS] = {
/* PyGC_Head, threshold, count */
{{{_GEN_HEAD(0), _GEN_HEAD(0), 0}}, 700, 0},
{{{_GEN_HEAD(1), _GEN_HEAD(1), 0}}, 10, 0},
{{{_GEN_HEAD(2), _GEN_HEAD(2), 0}}, 10, 0},
};
for (int i = 0; i < NUM_GENERATIONS; i++) {
state->generations[i] = generations[i];
};
state->generation0 = GEN_HEAD(0);
}
/*--------------------------------------------------------------------------
gc_refs values.
@ -766,16 +651,16 @@ handle_legacy_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
{
PyGC_Head *gc = finalizers->gc.gc_next;
if (garbage == NULL) {
garbage = PyList_New(0);
if (garbage == NULL)
if (_PyRuntime.gc.garbage == NULL) {
_PyRuntime.gc.garbage = PyList_New(0);
if (_PyRuntime.gc.garbage == NULL)
Py_FatalError("gc couldn't create gc.garbage list");
}
for (; gc != finalizers; gc = gc->gc.gc_next) {
PyObject *op = FROM_GC(gc);
if ((debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
if (PyList_Append(garbage, op) < 0)
if ((_PyRuntime.gc.debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
if (PyList_Append(_PyRuntime.gc.garbage, op) < 0)
return -1;
}
}
@ -865,8 +750,8 @@ delete_garbage(PyGC_Head *collectable, PyGC_Head *old)
PyGC_Head *gc = collectable->gc.gc_next;
PyObject *op = FROM_GC(gc);
if (debug & DEBUG_SAVEALL) {
PyList_Append(garbage, op);
if (_PyRuntime.gc.debug & DEBUG_SAVEALL) {
PyList_Append(_PyRuntime.gc.garbage, op);
}
else {
if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
@ -919,9 +804,9 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
PyGC_Head *gc;
_PyTime_t t1 = 0; /* initialize to prevent a compiler warning */
struct gc_generation_stats *stats = &generation_stats[generation];
struct gc_generation_stats *stats = &_PyRuntime.gc.generation_stats[generation];
if (debug & DEBUG_STATS) {
if (_PyRuntime.gc.debug & DEBUG_STATS) {
PySys_WriteStderr("gc: collecting generation %d...\n",
generation);
PySys_WriteStderr("gc: objects in each generation:");
@ -938,9 +823,9 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
/* update collection and allocation counters */
if (generation+1 < NUM_GENERATIONS)
generations[generation+1].count += 1;
_PyRuntime.gc.generations[generation+1].count += 1;
for (i = 0; i <= generation; i++)
generations[i].count = 0;
_PyRuntime.gc.generations[i].count = 0;
/* merge younger generations with one we are currently collecting */
for (i = 0; i < generation; i++) {
@ -974,7 +859,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
/* Move reachable objects to next generation. */
if (young != old) {
if (generation == NUM_GENERATIONS - 2) {
long_lived_pending += gc_list_size(young);
_PyRuntime.gc.long_lived_pending += gc_list_size(young);
}
gc_list_merge(young, old);
}
@ -982,8 +867,8 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
/* We only untrack dicts in full collections, to avoid quadratic
dict build-up. See issue #14775. */
untrack_dicts(young);
long_lived_pending = 0;
long_lived_total = gc_list_size(young);
_PyRuntime.gc.long_lived_pending = 0;
_PyRuntime.gc.long_lived_total = gc_list_size(young);
}
/* All objects in unreachable are trash, but objects reachable from
@ -1003,7 +888,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
for (gc = unreachable.gc.gc_next; gc != &unreachable;
gc = gc->gc.gc_next) {
m++;
if (debug & DEBUG_COLLECTABLE) {
if (_PyRuntime.gc.debug & DEBUG_COLLECTABLE) {
debug_cycle("collectable", FROM_GC(gc));
}
}
@ -1032,10 +917,10 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
gc != &finalizers;
gc = gc->gc.gc_next) {
n++;
if (debug & DEBUG_UNCOLLECTABLE)
if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE)
debug_cycle("uncollectable", FROM_GC(gc));
}
if (debug & DEBUG_STATS) {
if (_PyRuntime.gc.debug & DEBUG_STATS) {
_PyTime_t t2 = _PyTime_GetMonotonicClock();
if (m == 0 && n == 0)
@ -1098,11 +983,11 @@ invoke_gc_callback(const char *phase, int generation,
PyObject *info = NULL;
/* we may get called very early */
if (callbacks == NULL)
if (_PyRuntime.gc.callbacks == NULL)
return;
/* The local variable cannot be rebound, check it for sanity */
assert(callbacks != NULL && PyList_CheckExact(callbacks));
if (PyList_GET_SIZE(callbacks) != 0) {
assert(_PyRuntime.gc.callbacks != NULL && PyList_CheckExact(_PyRuntime.gc.callbacks));
if (PyList_GET_SIZE(_PyRuntime.gc.callbacks) != 0) {
info = Py_BuildValue("{sisnsn}",
"generation", generation,
"collected", collected,
@ -1112,8 +997,8 @@ invoke_gc_callback(const char *phase, int generation,
return;
}
}
for (i=0; i<PyList_GET_SIZE(callbacks); i++) {
PyObject *r, *cb = PyList_GET_ITEM(callbacks, i);
for (i=0; i<PyList_GET_SIZE(_PyRuntime.gc.callbacks); i++) {
PyObject *r, *cb = PyList_GET_ITEM(_PyRuntime.gc.callbacks, i);
Py_INCREF(cb); /* make sure cb doesn't go away */
r = PyObject_CallFunction(cb, "sO", phase, info);
Py_XDECREF(r);
@ -1147,13 +1032,13 @@ collect_generations(void)
* exceeds the threshold. Objects in the that generation and
* generations younger than it will be collected. */
for (i = NUM_GENERATIONS-1; i >= 0; i--) {
if (generations[i].count > generations[i].threshold) {
if (_PyRuntime.gc.generations[i].count > _PyRuntime.gc.generations[i].threshold) {
/* Avoid quadratic performance degradation in number
of tracked objects. See comments at the beginning
of this file, and issue #4074.
*/
if (i == NUM_GENERATIONS - 1
&& long_lived_pending < long_lived_total / 4)
&& _PyRuntime.gc.long_lived_pending < _PyRuntime.gc.long_lived_total / 4)
continue;
n = collect_with_callback(i);
break;
@ -1174,7 +1059,7 @@ static PyObject *
gc_enable_impl(PyObject *module)
/*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/
{
enabled = 1;
_PyRuntime.gc.enabled = 1;
Py_RETURN_NONE;
}
@ -1188,7 +1073,7 @@ static PyObject *
gc_disable_impl(PyObject *module)
/*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/
{
enabled = 0;
_PyRuntime.gc.enabled = 0;
Py_RETURN_NONE;
}
@ -1202,7 +1087,7 @@ static int
gc_isenabled_impl(PyObject *module)
/*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/
{
return enabled;
return _PyRuntime.gc.enabled;
}
/*[clinic input]
@ -1230,12 +1115,12 @@ gc_collect_impl(PyObject *module, int generation)
return -1;
}
if (collecting)
if (_PyRuntime.gc.collecting)
n = 0; /* already collecting, don't do anything */
else {
collecting = 1;
_PyRuntime.gc.collecting = 1;
n = collect_with_callback(generation);
collecting = 0;
_PyRuntime.gc.collecting = 0;
}
return n;
@ -1263,7 +1148,7 @@ static PyObject *
gc_set_debug_impl(PyObject *module, int flags)
/*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/
{
debug = flags;
_PyRuntime.gc.debug = flags;
Py_RETURN_NONE;
}
@ -1278,7 +1163,7 @@ static int
gc_get_debug_impl(PyObject *module)
/*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/
{
return debug;
return _PyRuntime.gc.debug;
}
PyDoc_STRVAR(gc_set_thresh__doc__,
@ -1292,13 +1177,13 @@ gc_set_thresh(PyObject *self, PyObject *args)
{
int i;
if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
&generations[0].threshold,
&generations[1].threshold,
&generations[2].threshold))
&_PyRuntime.gc.generations[0].threshold,
&_PyRuntime.gc.generations[1].threshold,
&_PyRuntime.gc.generations[2].threshold))
return NULL;
for (i = 2; i < NUM_GENERATIONS; i++) {
/* generations higher than 2 get the same threshold */
generations[i].threshold = generations[2].threshold;
_PyRuntime.gc.generations[i].threshold = _PyRuntime.gc.generations[2].threshold;
}
Py_RETURN_NONE;
@ -1315,9 +1200,9 @@ gc_get_threshold_impl(PyObject *module)
/*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/
{
return Py_BuildValue("(iii)",
generations[0].threshold,
generations[1].threshold,
generations[2].threshold);
_PyRuntime.gc.generations[0].threshold,
_PyRuntime.gc.generations[1].threshold,
_PyRuntime.gc.generations[2].threshold);
}
/*[clinic input]
@ -1331,9 +1216,9 @@ gc_get_count_impl(PyObject *module)
/*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
{
return Py_BuildValue("(iii)",
generations[0].count,
generations[1].count,
generations[2].count);
_PyRuntime.gc.generations[0].count,
_PyRuntime.gc.generations[1].count,
_PyRuntime.gc.generations[2].count);
}
static int
@ -1464,7 +1349,7 @@ gc_get_stats_impl(PyObject *module)
/* To get consistent values despite allocations while constructing
the result list, we use a snapshot of the running stats. */
for (i = 0; i < NUM_GENERATIONS; i++) {
stats[i] = generation_stats[i];
stats[i] = _PyRuntime.gc.generation_stats[i];
}
result = PyList_New(0);
@ -1581,22 +1466,22 @@ PyInit_gc(void)
if (m == NULL)
return NULL;
if (garbage == NULL) {
garbage = PyList_New(0);
if (garbage == NULL)
if (_PyRuntime.gc.garbage == NULL) {
_PyRuntime.gc.garbage = PyList_New(0);
if (_PyRuntime.gc.garbage == NULL)
return NULL;
}
Py_INCREF(garbage);
if (PyModule_AddObject(m, "garbage", garbage) < 0)
Py_INCREF(_PyRuntime.gc.garbage);
if (PyModule_AddObject(m, "garbage", _PyRuntime.gc.garbage) < 0)
return NULL;
if (callbacks == NULL) {
callbacks = PyList_New(0);
if (callbacks == NULL)
if (_PyRuntime.gc.callbacks == NULL) {
_PyRuntime.gc.callbacks = PyList_New(0);
if (_PyRuntime.gc.callbacks == NULL)
return NULL;
}
Py_INCREF(callbacks);
if (PyModule_AddObject(m, "callbacks", callbacks) < 0)
Py_INCREF(_PyRuntime.gc.callbacks);
if (PyModule_AddObject(m, "callbacks", _PyRuntime.gc.callbacks) < 0)
return NULL;
#define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return NULL
@ -1615,12 +1500,12 @@ PyGC_Collect(void)
{
Py_ssize_t n;
if (collecting)
if (_PyRuntime.gc.collecting)
n = 0; /* already collecting, don't do anything */
else {
collecting = 1;
_PyRuntime.gc.collecting = 1;
n = collect_with_callback(NUM_GENERATIONS - 1);
collecting = 0;
_PyRuntime.gc.collecting = 0;
}
return n;
@ -1629,7 +1514,7 @@ PyGC_Collect(void)
Py_ssize_t
_PyGC_CollectIfEnabled(void)
{
if (!enabled)
if (!_PyRuntime.gc.enabled)
return 0;
return PyGC_Collect();
@ -1646,12 +1531,12 @@ _PyGC_CollectNoFail(void)
during interpreter shutdown (and then never finish it).
See http://bugs.python.org/issue8713#msg195178 for an example.
*/
if (collecting)
if (_PyRuntime.gc.collecting)
n = 0;
else {
collecting = 1;
_PyRuntime.gc.collecting = 1;
n = collect(NUM_GENERATIONS - 1, NULL, NULL, 1);
collecting = 0;
_PyRuntime.gc.collecting = 0;
}
return n;
}
@ -1659,10 +1544,10 @@ _PyGC_CollectNoFail(void)
void
_PyGC_DumpShutdownStats(void)
{
if (!(debug & DEBUG_SAVEALL)
&& garbage != NULL && PyList_GET_SIZE(garbage) > 0) {
if (!(_PyRuntime.gc.debug & DEBUG_SAVEALL)
&& _PyRuntime.gc.garbage != NULL && PyList_GET_SIZE(_PyRuntime.gc.garbage) > 0) {
char *message;
if (debug & DEBUG_UNCOLLECTABLE)
if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE)
message = "gc: %zd uncollectable objects at " \
"shutdown";
else
@ -1673,13 +1558,13 @@ _PyGC_DumpShutdownStats(void)
already. */
if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
"gc", NULL, message,
PyList_GET_SIZE(garbage)))
PyList_GET_SIZE(_PyRuntime.gc.garbage)))
PyErr_WriteUnraisable(NULL);
if (debug & DEBUG_UNCOLLECTABLE) {
if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE) {
PyObject *repr = NULL, *bytes = NULL;
repr = PyObject_Repr(garbage);
repr = PyObject_Repr(_PyRuntime.gc.garbage);
if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr)))
PyErr_WriteUnraisable(garbage);
PyErr_WriteUnraisable(_PyRuntime.gc.garbage);
else {
PySys_WriteStderr(
" %s\n",
@ -1695,7 +1580,7 @@ _PyGC_DumpShutdownStats(void)
void
_PyGC_Fini(void)
{
Py_CLEAR(callbacks);
Py_CLEAR(_PyRuntime.gc.callbacks);
}
/* for debugging */
@ -1746,15 +1631,15 @@ _PyObject_GC_Alloc(int use_calloc, size_t basicsize)
return PyErr_NoMemory();
g->gc.gc_refs = 0;
_PyGCHead_SET_REFS(g, GC_UNTRACKED);
generations[0].count++; /* number of allocated GC objects */
if (generations[0].count > generations[0].threshold &&
enabled &&
generations[0].threshold &&
!collecting &&
_PyRuntime.gc.generations[0].count++; /* number of allocated GC objects */
if (_PyRuntime.gc.generations[0].count > _PyRuntime.gc.generations[0].threshold &&
_PyRuntime.gc.enabled &&
_PyRuntime.gc.generations[0].threshold &&
!_PyRuntime.gc.collecting &&
!PyErr_Occurred()) {
collecting = 1;
_PyRuntime.gc.collecting = 1;
collect_generations();
collecting = 0;
_PyRuntime.gc.collecting = 0;
}
op = FROM_GC(g);
return op;
@ -1819,8 +1704,8 @@ PyObject_GC_Del(void *op)
PyGC_Head *g = AS_GC(op);
if (IS_TRACKED(op))
gc_list_remove(g);
if (generations[0].count > 0) {
generations[0].count--;
if (_PyRuntime.gc.generations[0].count > 0) {
_PyRuntime.gc.generations[0].count--;
}
PyObject_FREE(g);
}