gh-112532: Use separate mimalloc heaps for GC objects (gh-113263)

* gh-112532: Use separate mimalloc heaps for GC objects

In `--disable-gil` builds, we now use four separate heaps in
anticipation of using mimalloc to find GC objects when the GIL is
disabled. To support this, we also make a few changes to mimalloc:

* `mi_heap_t` and `mi_tld_t` initialization is split from allocation.
  This allows us to have a `mi_tld_t` per-`PyThreadState`, which is
  important to keep interpreter isolation, since the same OS thread may
  run in multiple interpreters (using different PyThreadStates.)

* Heap abandoning (mi_heap_collect_ex) can now be called from a
  different thread than the one that created the heap. This is necessary
  because we may clear and delete the containing PyThreadStates from a
  different thread during finalization and after fork().

* Use enum instead of defines and guard mimalloc includes.

* The enum typedef will be convenient for future PRs that use the type.
* Guarding the mimalloc includes allows us to unconditionally include
  pycore_mimalloc.h from other header files that rely on things like
  `struct _mimalloc_thread_state`.

* Only define _mimalloc_thread_state in Py_GIL_DISABLED builds
This commit is contained in:
Sam Gross 2023-12-26 11:53:20 -05:00 committed by GitHub
parent 8f5b998706
commit acf3bcc886
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 163 additions and 25 deletions

View file

@ -1794,6 +1794,10 @@ finalize_interp_clear(PyThreadState *tstate)
}
finalize_interp_types(tstate->interp);
/* finalize_interp_types may allocate Python objects so we may need to
abandon mimalloc segments again */
_PyThreadState_ClearMimallocHeaps(tstate);
}

View file

@ -236,6 +236,8 @@ tstate_is_bound(PyThreadState *tstate)
static void bind_gilstate_tstate(PyThreadState *);
static void unbind_gilstate_tstate(PyThreadState *);
static void tstate_mimalloc_bind(PyThreadState *);
static void
bind_tstate(PyThreadState *tstate)
{
@ -256,6 +258,9 @@ bind_tstate(PyThreadState *tstate)
tstate->native_thread_id = PyThread_get_thread_native_id();
#endif
// mimalloc state needs to be initialized from the active thread.
tstate_mimalloc_bind(tstate);
tstate->_status.bound = 1;
}
@ -1533,6 +1538,8 @@ PyThreadState_Clear(PyThreadState *tstate)
tstate->on_delete(tstate->on_delete_data);
}
_PyThreadState_ClearMimallocHeaps(tstate);
tstate->_status.cleared = 1;
// XXX Call _PyThreadStateSwap(runtime, NULL) here if "current".
@ -2509,3 +2516,51 @@ _PyThreadState_MustExit(PyThreadState *tstate)
}
return 1;
}
/********************/
/* mimalloc support */
/********************/
static void
tstate_mimalloc_bind(PyThreadState *tstate)
{
#ifdef Py_GIL_DISABLED
struct _mimalloc_thread_state *mts = &((_PyThreadStateImpl*)tstate)->mimalloc;
// Initialize the mimalloc thread state. This must be called from the
// same thread that will use the thread state. The "mem" heap doubles as
// the "backing" heap.
mi_tld_t *tld = &mts->tld;
_mi_tld_init(tld, &mts->heaps[_Py_MIMALLOC_HEAP_MEM]);
// Initialize each heap
for (Py_ssize_t i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
_mi_heap_init_ex(&mts->heaps[i], tld, _mi_arena_id_none());
}
// By default, object allocations use _Py_MIMALLOC_HEAP_OBJECT.
// _PyObject_GC_New() and similar functions temporarily override this to
// use one of the GC heaps.
mts->current_object_heap = &mts->heaps[_Py_MIMALLOC_HEAP_OBJECT];
#endif
}
void
_PyThreadState_ClearMimallocHeaps(PyThreadState *tstate)
{
#ifdef Py_GIL_DISABLED
if (!tstate->_status.bound) {
// The mimalloc heaps are only initialized when the thread is bound.
return;
}
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
for (Py_ssize_t i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
// Abandon all segments in use by this thread. This pushes them to
// a shared pool to later be reclaimed by other threads. It's important
// to do this before the thread state is destroyed so that objects
// remain visible to the GC.
_mi_heap_collect_abandon(&tstate_impl->mimalloc.heaps[i]);
}
#endif
}