mirror of
https://github.com/python/cpython.git
synced 2025-07-19 01:05:26 +00:00
gh-115103: Delay reuse of mimalloc pages that store PyObjects (#115435)
This implements the delayed reuse of mimalloc pages that contain Python objects in the free-threaded build. Allocations of the same size class are grouped in data structures called pages. These are different from operating system pages. For thread-safety, we want to ensure that memory used to store PyObjects remains valid as long as there may be concurrent lock-free readers; we want to delay using it for other size classes, in other heaps, or returning it to the operating system. When a mimalloc page becomes empty, instead of immediately freeing it, we tag it with a QSBR goal and insert it into a per-thread state linked list of pages to be freed. When mimalloc needs a fresh page, we process the queue and free any still empty pages that are now deemed safe to be freed. Pages waiting to be freed are still available for allocations of the same size class and allocating from a page prevent it from being freed. There is additional logic to handle abandoned pages when threads exit.
This commit is contained in:
parent
02ee475ee3
commit
c012c8ab7b
9 changed files with 199 additions and 17 deletions
|
@ -2839,6 +2839,7 @@ tstate_mimalloc_bind(PyThreadState *tstate)
|
|||
// the "backing" heap.
|
||||
mi_tld_t *tld = &mts->tld;
|
||||
_mi_tld_init(tld, &mts->heaps[_Py_MIMALLOC_HEAP_MEM]);
|
||||
llist_init(&mts->page_list);
|
||||
|
||||
// Exiting threads push any remaining in-use segments to the abandoned
|
||||
// pool to be re-claimed later by other threads. We use per-interpreter
|
||||
|
@ -2865,6 +2866,12 @@ tstate_mimalloc_bind(PyThreadState *tstate)
|
|||
mts->heaps[i].debug_offset = (uint8_t)debug_offsets[i];
|
||||
}
|
||||
|
||||
// Heaps that store Python objects should use QSBR to delay freeing
|
||||
// mimalloc pages while there may be concurrent lock-free readers.
|
||||
mts->heaps[_Py_MIMALLOC_HEAP_OBJECT].page_use_qsbr = true;
|
||||
mts->heaps[_Py_MIMALLOC_HEAP_GC].page_use_qsbr = true;
|
||||
mts->heaps[_Py_MIMALLOC_HEAP_GC_PRE].page_use_qsbr = true;
|
||||
|
||||
// By default, object allocations use _Py_MIMALLOC_HEAP_OBJECT.
|
||||
// _PyObject_GC_New() and similar functions temporarily override this to
|
||||
// use one of the GC heaps.
|
||||
|
|
|
@ -38,12 +38,6 @@
|
|||
#include "pycore_pystate.h" // _PyThreadState_GET()
|
||||
|
||||
|
||||
// Wrap-around safe comparison. This is a holdover from the FreeBSD
|
||||
// implementation, which uses 32-bit sequence numbers. We currently use 64-bit
|
||||
// sequence numbers, so wrap-around is unlikely.
|
||||
#define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0)
|
||||
#define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0)
|
||||
|
||||
// Starting size of the array of qsbr thread states
|
||||
#define MIN_ARRAY_SIZE 8
|
||||
|
||||
|
@ -167,13 +161,11 @@ bool
|
|||
_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal)
|
||||
{
|
||||
assert(_PyThreadState_GET()->state == _Py_THREAD_ATTACHED);
|
||||
|
||||
uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq);
|
||||
if (QSBR_LEQ(goal, rd_seq)) {
|
||||
if (_Py_qbsr_goal_reached(qsbr, goal)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
rd_seq = qsbr_poll_scan(qsbr->shared);
|
||||
uint64_t rd_seq = qsbr_poll_scan(qsbr->shared);
|
||||
return QSBR_LEQ(goal, rd_seq);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue