mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00

The free threading build uses QSBR to delay the freeing of dictionary keys and list arrays when the objects are accessed by multiple threads in order to allow concurrent reads to proceed with holding the object lock. The requests are processed in batches to reduce execution overhead, but for large memory blocks this can lead to excess memory usage. Take into account the size of the memory block when deciding when to process QSBR requests. Also track the amount of memory being held by QSBR for mimalloc pages. Advance the write sequence if this memory exceeds a limit. Advancing the sequence will allow it to be freed more quickly. Process the held QSBR items from the "eval breaker", rather than from `_PyMem_FreeDelayed()`. This gives a higher chance that the global read sequence has advanced enough so that items can be freed. Co-authored-by: Sam Gross <colesbury@gmail.com>
172 lines
5.4 KiB
C
172 lines
5.4 KiB
C
// The QSBR APIs (quiescent state-based reclamation) provide a mechanism for
|
|
// the free-threaded build to safely reclaim memory when there may be
|
|
// concurrent accesses.
|
|
//
|
|
// Many operations in the free-threaded build are protected by locks. However,
|
|
// in some cases, we want to allow reads to happen concurrently with updates.
|
|
// In this case, we need to delay freeing ("reclaiming") any memory that may be
|
|
// concurrently accessed by a reader. The QSBR APIs provide a way to do this.
|
|
#ifndef Py_INTERNAL_QSBR_H
|
|
#define Py_INTERNAL_QSBR_H
|
|
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#ifndef Py_BUILD_CORE
|
|
# error "this header requires Py_BUILD_CORE define"
|
|
#endif
|
|
|
|
// The shared write sequence is always odd and incremented by two. Detached
|
|
// threads are indicated by a read sequence of zero. This avoids collisions
|
|
// between the offline state and any valid sequence number even if the
|
|
// sequences numbers wrap around.
|
|
#define QSBR_OFFLINE 0
|
|
#define QSBR_INITIAL 1
|
|
#define QSBR_INCR 2
|
|
|
|
// Wrap-around safe comparison. This is a holdover from the FreeBSD
|
|
// implementation, which uses 32-bit sequence numbers. We currently use 64-bit
|
|
// sequence numbers, so wrap-around is unlikely.
|
|
#define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0)
|
|
#define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0)
|
|
|
|
struct _qsbr_shared;
|
|
struct _PyThreadStateImpl; // forward declare to avoid circular dependency
|
|
|
|
// Per-thread state
|
|
struct _qsbr_thread_state {
|
|
// Last observed write sequence (or 0 if detached)
|
|
uint64_t seq;
|
|
|
|
// Shared (per-interpreter) QSBR state
|
|
struct _qsbr_shared *shared;
|
|
|
|
// Thread state (or NULL)
|
|
PyThreadState *tstate;
|
|
|
|
// Number of held items added by this thread since the last write sequence
|
|
// advance
|
|
int deferred_count;
|
|
|
|
// Estimate for the amount of memory that is held by this thread since
|
|
// the last write sequence advance
|
|
size_t deferred_memory;
|
|
|
|
// Amount of memory in mimalloc pages deferred from collection. When
|
|
// deferred, they are prevented from being used for a different size class
|
|
// and in a different thread.
|
|
size_t deferred_page_memory;
|
|
|
|
// True if the deferred memory frees should be processed.
|
|
bool should_process;
|
|
|
|
// Is this thread state allocated?
|
|
bool allocated;
|
|
struct _qsbr_thread_state *freelist_next;
|
|
};
|
|
|
|
// Padding to avoid false sharing
|
|
struct _qsbr_pad {
|
|
struct _qsbr_thread_state qsbr;
|
|
char __padding[64 - sizeof(struct _qsbr_thread_state)];
|
|
};
|
|
|
|
// Per-interpreter state
|
|
struct _qsbr_shared {
|
|
// Write sequence: always odd, incremented by two
|
|
uint64_t wr_seq;
|
|
|
|
// Minimum observed read sequence of all QSBR thread states
|
|
uint64_t rd_seq;
|
|
|
|
// Array of QSBR thread states.
|
|
struct _qsbr_pad *array;
|
|
Py_ssize_t size;
|
|
|
|
// Freelist of unused _qsbr_thread_states (protected by mutex)
|
|
PyMutex mutex;
|
|
struct _qsbr_thread_state *freelist;
|
|
};
|
|
|
|
static inline uint64_t
|
|
_Py_qsbr_shared_current(struct _qsbr_shared *shared)
|
|
{
|
|
return _Py_atomic_load_uint64_acquire(&shared->wr_seq);
|
|
}
|
|
|
|
// Reports a quiescent state: the caller no longer holds any pointer to shared
|
|
// data not protected by locks or reference counts.
|
|
static inline void
|
|
_Py_qsbr_quiescent_state(struct _qsbr_thread_state *qsbr)
|
|
{
|
|
uint64_t seq = _Py_qsbr_shared_current(qsbr->shared);
|
|
_Py_atomic_store_uint64_release(&qsbr->seq, seq);
|
|
}
|
|
|
|
// Have the read sequences advanced to the given goal? Like `_Py_qsbr_poll()`,
|
|
// but does not perform a scan of threads.
|
|
static inline bool
|
|
_Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal)
|
|
{
|
|
uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq);
|
|
return QSBR_LEQ(goal, rd_seq);
|
|
}
|
|
|
|
// Advance the write sequence and return the new goal. This should be called
|
|
// after data is removed. The returned goal is used with `_Py_qsbr_poll()` to
|
|
// determine when it is safe to reclaim (free) the memory.
|
|
extern uint64_t
|
|
_Py_qsbr_advance(struct _qsbr_shared *shared);
|
|
|
|
// Return the next value for the write sequence (current plus the increment).
|
|
extern uint64_t
|
|
_Py_qsbr_shared_next(struct _qsbr_shared *shared);
|
|
|
|
// Return true if deferred memory frees held by QSBR should be processed to
|
|
// determine if they can be safely freed.
|
|
static inline bool
|
|
_Py_qsbr_should_process(struct _qsbr_thread_state *qsbr)
|
|
{
|
|
return qsbr->should_process;
|
|
}
|
|
|
|
// Have the read sequences advanced to the given goal? If this returns true,
|
|
// it safe to reclaim any memory tagged with the goal (or earlier goal).
|
|
extern bool
|
|
_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal);
|
|
|
|
// Called when thread attaches to interpreter
|
|
extern void
|
|
_Py_qsbr_attach(struct _qsbr_thread_state *qsbr);
|
|
|
|
// Called when thread detaches from interpreter
|
|
extern void
|
|
_Py_qsbr_detach(struct _qsbr_thread_state *qsbr);
|
|
|
|
// Reserves (allocates) a QSBR state and returns its index.
|
|
extern Py_ssize_t
|
|
_Py_qsbr_reserve(PyInterpreterState *interp);
|
|
|
|
// Associates a PyThreadState with the QSBR state at the given index
|
|
extern void
|
|
_Py_qsbr_register(struct _PyThreadStateImpl *tstate,
|
|
PyInterpreterState *interp, Py_ssize_t index);
|
|
|
|
// Disassociates a PyThreadState from the QSBR state and frees the QSBR state.
|
|
extern void
|
|
_Py_qsbr_unregister(PyThreadState *tstate);
|
|
|
|
extern void
|
|
_Py_qsbr_fini(PyInterpreterState *interp);
|
|
|
|
extern void
|
|
_Py_qsbr_after_fork(struct _PyThreadStateImpl *tstate);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
#endif /* !Py_INTERNAL_QSBR_H */
|