gh-111968: Use per-thread freelists for dict in free-threading (gh-114323)

This commit is contained in:
Donghee Na 2024-02-02 05:53:53 +09:00 committed by GitHub
parent 587d480203
commit 13907968d7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 75 additions and 75 deletions

View file

@ -9,6 +9,7 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define" # error "this header requires Py_BUILD_CORE define"
#endif #endif
#include "pycore_freelist.h" // _PyFreeListState
#include "pycore_identifier.h" // _Py_Identifier #include "pycore_identifier.h" // _Py_Identifier
#include "pycore_object.h" // PyDictOrValues #include "pycore_object.h" // PyDictOrValues
@ -69,7 +70,7 @@ extern PyObject* _PyDictView_Intersect(PyObject* self, PyObject *other);
/* runtime lifecycle */ /* runtime lifecycle */
extern void _PyDict_Fini(PyInterpreterState *interp); extern void _PyDict_Fini(PyInterpreterState *state);
/* other API */ /* other API */

View file

@ -8,16 +8,6 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define" # error "this header requires Py_BUILD_CORE define"
#endif #endif
#ifndef WITH_FREELISTS
// without freelists
# define PyDict_MAXFREELIST 0
#endif
#ifndef PyDict_MAXFREELIST
# define PyDict_MAXFREELIST 80
#endif
#define DICT_MAX_WATCHERS 8 #define DICT_MAX_WATCHERS 8
struct _Py_dict_state { struct _Py_dict_state {
@ -26,15 +16,6 @@ struct _Py_dict_state {
* time that a dictionary is modified. */ * time that a dictionary is modified. */
uint64_t global_version; uint64_t global_version;
uint32_t next_keys_version; uint32_t next_keys_version;
#if PyDict_MAXFREELIST > 0
/* Dictionary reuse scheme to save calls to malloc and free */
PyDictObject *free_list[PyDict_MAXFREELIST];
PyDictKeysObject *keys_free_list[PyDict_MAXFREELIST];
int numfree;
int keys_numfree;
#endif
PyDict_WatchCallback watchers[DICT_MAX_WATCHERS]; PyDict_WatchCallback watchers[DICT_MAX_WATCHERS];
}; };

View file

@ -17,6 +17,7 @@ extern "C" {
# define PyTuple_NFREELISTS PyTuple_MAXSAVESIZE # define PyTuple_NFREELISTS PyTuple_MAXSAVESIZE
# define PyTuple_MAXFREELIST 2000 # define PyTuple_MAXFREELIST 2000
# define PyList_MAXFREELIST 80 # define PyList_MAXFREELIST 80
# define PyDict_MAXFREELIST 80
# define PyFloat_MAXFREELIST 100 # define PyFloat_MAXFREELIST 100
# define PyContext_MAXFREELIST 255 # define PyContext_MAXFREELIST 255
# define _PyAsyncGen_MAXFREELIST 80 # define _PyAsyncGen_MAXFREELIST 80
@ -25,6 +26,7 @@ extern "C" {
# define PyTuple_NFREELISTS 0 # define PyTuple_NFREELISTS 0
# define PyTuple_MAXFREELIST 0 # define PyTuple_MAXFREELIST 0
# define PyList_MAXFREELIST 0 # define PyList_MAXFREELIST 0
# define PyDict_MAXFREELIST 0
# define PyFloat_MAXFREELIST 0 # define PyFloat_MAXFREELIST 0
# define PyContext_MAXFREELIST 0 # define PyContext_MAXFREELIST 0
# define _PyAsyncGen_MAXFREELIST 0 # define _PyAsyncGen_MAXFREELIST 0
@ -65,6 +67,16 @@ struct _Py_float_state {
#endif #endif
}; };
struct _Py_dict_freelist {
#ifdef WITH_FREELISTS
/* Dictionary reuse scheme to save calls to malloc and free */
PyDictObject *free_list[PyDict_MAXFREELIST];
PyDictKeysObject *keys_free_list[PyDict_MAXFREELIST];
int numfree;
int keys_numfree;
#endif
};
struct _Py_slice_state { struct _Py_slice_state {
#ifdef WITH_FREELISTS #ifdef WITH_FREELISTS
/* Using a cache is very effective since typically only a single slice is /* Using a cache is very effective since typically only a single slice is
@ -106,6 +118,7 @@ typedef struct _Py_freelist_state {
struct _Py_float_state floats; struct _Py_float_state floats;
struct _Py_tuple_state tuples; struct _Py_tuple_state tuples;
struct _Py_list_state lists; struct _Py_list_state lists;
struct _Py_dict_freelist dicts;
struct _Py_slice_state slices; struct _Py_slice_state slices;
struct _Py_context_state contexts; struct _Py_context_state contexts;
struct _Py_async_gen_state async_gens; struct _Py_async_gen_state async_gens;

View file

@ -267,7 +267,7 @@ extern void _PyTuple_ClearFreeList(_PyFreeListState *state, int is_finalization)
extern void _PyFloat_ClearFreeList(_PyFreeListState *state, int is_finalization); extern void _PyFloat_ClearFreeList(_PyFreeListState *state, int is_finalization);
extern void _PyList_ClearFreeList(_PyFreeListState *state, int is_finalization); extern void _PyList_ClearFreeList(_PyFreeListState *state, int is_finalization);
extern void _PySlice_ClearCache(_PyFreeListState *state); extern void _PySlice_ClearCache(_PyFreeListState *state);
extern void _PyDict_ClearFreeList(PyInterpreterState *interp); extern void _PyDict_ClearFreeList(_PyFreeListState *state, int is_finalization);
extern void _PyAsyncGen_ClearFreeLists(_PyFreeListState *state, int is_finalization); extern void _PyAsyncGen_ClearFreeLists(_PyFreeListState *state, int is_finalization);
extern void _PyContext_ClearFreeList(_PyFreeListState *state, int is_finalization); extern void _PyContext_ClearFreeList(_PyFreeListState *state, int is_finalization);
extern void _Py_ScheduleGC(PyInterpreterState *interp); extern void _Py_ScheduleGC(PyInterpreterState *interp);

View file

@ -20,6 +20,7 @@ extern "C" {
#include "pycore_dtoa.h" // struct _dtoa_state #include "pycore_dtoa.h" // struct _dtoa_state
#include "pycore_exceptions.h" // struct _Py_exc_state #include "pycore_exceptions.h" // struct _Py_exc_state
#include "pycore_floatobject.h" // struct _Py_float_state #include "pycore_floatobject.h" // struct _Py_float_state
#include "pycore_freelist.h" // struct _Py_freelist_state
#include "pycore_function.h" // FUNC_MAX_WATCHERS #include "pycore_function.h" // FUNC_MAX_WATCHERS
#include "pycore_gc.h" // struct _gc_runtime_state #include "pycore_gc.h" // struct _gc_runtime_state
#include "pycore_genobject.h" // struct _Py_async_gen_state #include "pycore_genobject.h" // struct _Py_async_gen_state
@ -230,7 +231,6 @@ struct _is {
struct _dtoa_state dtoa; struct _dtoa_state dtoa;
struct _py_func_state func_state; struct _py_func_state func_state;
struct _Py_tuple_state tuple;
struct _Py_dict_state dict_state; struct _Py_dict_state dict_state;
struct _Py_exc_state exc_state; struct _Py_exc_state exc_state;

View file

@ -118,6 +118,7 @@ As a consequence of this, split keys have a maximum size of 16.
#include "pycore_ceval.h" // _PyEval_GetBuiltin() #include "pycore_ceval.h" // _PyEval_GetBuiltin()
#include "pycore_code.h" // stats #include "pycore_code.h" // stats
#include "pycore_dict.h" // export _PyDict_SizeOf() #include "pycore_dict.h" // export _PyDict_SizeOf()
#include "pycore_freelist.h" // _PyFreeListState_GET()
#include "pycore_gc.h" // _PyObject_GC_IS_TRACKED() #include "pycore_gc.h" // _PyObject_GC_IS_TRACKED()
#include "pycore_object.h" // _PyObject_GC_TRACK(), _PyDebugAllocatorStats() #include "pycore_object.h" // _PyObject_GC_TRACK(), _PyDebugAllocatorStats()
#include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_pyerrors.h" // _PyErr_GetRaisedException()
@ -242,40 +243,44 @@ static PyObject* dict_iter(PyObject *dict);
#include "clinic/dictobject.c.h" #include "clinic/dictobject.c.h"
#if PyDict_MAXFREELIST > 0 #ifdef WITH_FREELISTS
static struct _Py_dict_state * static struct _Py_dict_freelist *
get_dict_state(PyInterpreterState *interp) get_dict_state(void)
{ {
return &interp->dict_state; _PyFreeListState *state = _PyFreeListState_GET();
return &state->dicts;
} }
#endif #endif
void void
_PyDict_ClearFreeList(PyInterpreterState *interp) _PyDict_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization)
{ {
#if PyDict_MAXFREELIST > 0 #ifdef WITH_FREELISTS
struct _Py_dict_state *state = &interp->dict_state; struct _Py_dict_freelist *state = &freelist_state->dicts;
while (state->numfree) { while (state->numfree > 0) {
PyDictObject *op = state->free_list[--state->numfree]; PyDictObject *op = state->free_list[--state->numfree];
assert(PyDict_CheckExact(op)); assert(PyDict_CheckExact(op));
PyObject_GC_Del(op); PyObject_GC_Del(op);
} }
while (state->keys_numfree) { while (state->keys_numfree > 0) {
PyMem_Free(state->keys_free_list[--state->keys_numfree]); PyMem_Free(state->keys_free_list[--state->keys_numfree]);
} }
if (is_finalization) {
state->numfree = -1;
state->keys_numfree = -1;
}
#endif #endif
} }
void void
_PyDict_Fini(PyInterpreterState *interp) _PyDict_Fini(PyInterpreterState *Py_UNUSED(interp))
{ {
_PyDict_ClearFreeList(interp); // With Py_GIL_DISABLED:
#if defined(Py_DEBUG) && PyDict_MAXFREELIST > 0 // the freelists for the current thread state have already been cleared.
struct _Py_dict_state *state = &interp->dict_state; #ifndef Py_GIL_DISABLED
state->numfree = -1; _PyFreeListState *state = _PyFreeListState_GET();
state->keys_numfree = -1; _PyDict_ClearFreeList(state, 1);
#endif #endif
} }
@ -290,9 +295,8 @@ unicode_get_hash(PyObject *o)
void void
_PyDict_DebugMallocStats(FILE *out) _PyDict_DebugMallocStats(FILE *out)
{ {
#if PyDict_MAXFREELIST > 0 #ifdef WITH_FREELISTS
PyInterpreterState *interp = _PyInterpreterState_GET(); struct _Py_dict_freelist *state = get_dict_state();
struct _Py_dict_state *state = get_dict_state(interp);
_PyDebugAllocatorStats(out, "free PyDictObject", _PyDebugAllocatorStats(out, "free PyDictObject",
state->numfree, sizeof(PyDictObject)); state->numfree, sizeof(PyDictObject));
#endif #endif
@ -300,7 +304,7 @@ _PyDict_DebugMallocStats(FILE *out)
#define DK_MASK(dk) (DK_SIZE(dk)-1) #define DK_MASK(dk) (DK_SIZE(dk)-1)
static void free_keys_object(PyInterpreterState *interp, PyDictKeysObject *keys); static void free_keys_object(PyDictKeysObject *keys);
/* PyDictKeysObject has refcounts like PyObject does, so we have the /* PyDictKeysObject has refcounts like PyObject does, so we have the
following two functions to mirror what Py_INCREF() and Py_DECREF() do. following two functions to mirror what Py_INCREF() and Py_DECREF() do.
@ -348,7 +352,7 @@ dictkeys_decref(PyInterpreterState *interp, PyDictKeysObject *dk)
Py_XDECREF(entries[i].me_value); Py_XDECREF(entries[i].me_value);
} }
} }
free_keys_object(interp, dk); free_keys_object(dk);
} }
} }
@ -643,12 +647,8 @@ new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode)
log2_bytes = log2_size + 2; log2_bytes = log2_size + 2;
} }
#if PyDict_MAXFREELIST > 0 #ifdef WITH_FREELISTS
struct _Py_dict_state *state = get_dict_state(interp); struct _Py_dict_freelist *state = get_dict_state();
#ifdef Py_DEBUG
// new_keys_object() must not be called after _PyDict_Fini()
assert(state->keys_numfree != -1);
#endif
if (log2_size == PyDict_LOG_MINSIZE && unicode && state->keys_numfree > 0) { if (log2_size == PyDict_LOG_MINSIZE && unicode && state->keys_numfree > 0) {
dk = state->keys_free_list[--state->keys_numfree]; dk = state->keys_free_list[--state->keys_numfree];
OBJECT_STAT_INC(from_freelist); OBJECT_STAT_INC(from_freelist);
@ -680,16 +680,13 @@ new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode)
} }
static void static void
free_keys_object(PyInterpreterState *interp, PyDictKeysObject *keys) free_keys_object(PyDictKeysObject *keys)
{ {
#if PyDict_MAXFREELIST > 0 #ifdef WITH_FREELISTS
struct _Py_dict_state *state = get_dict_state(interp); struct _Py_dict_freelist *state = get_dict_state();
#ifdef Py_DEBUG
// free_keys_object() must not be called after _PyDict_Fini()
assert(state->keys_numfree != -1);
#endif
if (DK_LOG_SIZE(keys) == PyDict_LOG_MINSIZE if (DK_LOG_SIZE(keys) == PyDict_LOG_MINSIZE
&& state->keys_numfree < PyDict_MAXFREELIST && state->keys_numfree < PyDict_MAXFREELIST
&& state->keys_numfree >= 0
&& DK_IS_UNICODE(keys)) { && DK_IS_UNICODE(keys)) {
state->keys_free_list[state->keys_numfree++] = keys; state->keys_free_list[state->keys_numfree++] = keys;
OBJECT_STAT_INC(to_freelist); OBJECT_STAT_INC(to_freelist);
@ -730,13 +727,9 @@ new_dict(PyInterpreterState *interp,
{ {
PyDictObject *mp; PyDictObject *mp;
assert(keys != NULL); assert(keys != NULL);
#if PyDict_MAXFREELIST > 0 #ifdef WITH_FREELISTS
struct _Py_dict_state *state = get_dict_state(interp); struct _Py_dict_freelist *state = get_dict_state();
#ifdef Py_DEBUG if (state->numfree > 0) {
// new_dict() must not be called after _PyDict_Fini()
assert(state->numfree != -1);
#endif
if (state->numfree) {
mp = state->free_list[--state->numfree]; mp = state->free_list[--state->numfree];
assert (mp != NULL); assert (mp != NULL);
assert (Py_IS_TYPE(mp, &PyDict_Type)); assert (Py_IS_TYPE(mp, &PyDict_Type));
@ -1547,7 +1540,7 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp,
#endif #endif
assert(oldkeys->dk_kind != DICT_KEYS_SPLIT); assert(oldkeys->dk_kind != DICT_KEYS_SPLIT);
assert(oldkeys->dk_refcnt == 1); assert(oldkeys->dk_refcnt == 1);
free_keys_object(interp, oldkeys); free_keys_object(oldkeys);
} }
} }
@ -2458,13 +2451,10 @@ dict_dealloc(PyObject *self)
assert(keys->dk_refcnt == 1 || keys == Py_EMPTY_KEYS); assert(keys->dk_refcnt == 1 || keys == Py_EMPTY_KEYS);
dictkeys_decref(interp, keys); dictkeys_decref(interp, keys);
} }
#if PyDict_MAXFREELIST > 0 #ifdef WITH_FREELISTS
struct _Py_dict_state *state = get_dict_state(interp); struct _Py_dict_freelist *state = get_dict_state();
#ifdef Py_DEBUG if (state->numfree < PyDict_MAXFREELIST && state->numfree >=0 &&
// new_dict() must not be called after _PyDict_Fini() Py_IS_TYPE(mp, &PyDict_Type)) {
assert(state->numfree != -1);
#endif
if (state->numfree < PyDict_MAXFREELIST && Py_IS_TYPE(mp, &PyDict_Type)) {
state->free_list[state->numfree++] = mp; state->free_list[state->numfree++] = mp;
OBJECT_STAT_INC(to_freelist); OBJECT_STAT_INC(to_freelist);
} }

View file

@ -2013,7 +2013,11 @@ _PyFloat_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization)
void void
_PyFloat_Fini(_PyFreeListState *state) _PyFloat_Fini(_PyFreeListState *state)
{ {
// With Py_GIL_DISABLED:
// the freelists for the current thread state have already been cleared.
#ifndef Py_GIL_DISABLED
_PyFloat_ClearFreeList(state, 1); _PyFloat_ClearFreeList(state, 1);
#endif
} }
void void

View file

@ -1685,7 +1685,11 @@ _PyAsyncGen_ClearFreeLists(_PyFreeListState *freelist_state, int is_finalization
void void
_PyAsyncGen_Fini(_PyFreeListState *state) _PyAsyncGen_Fini(_PyFreeListState *state)
{ {
// With Py_GIL_DISABLED:
// the freelists for the current thread state have already been cleared.
#ifndef Py_GIL_DISABLED
_PyAsyncGen_ClearFreeLists(state, 1); _PyAsyncGen_ClearFreeLists(state, 1);
#endif
} }

View file

@ -138,7 +138,11 @@ _PyList_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization)
void void
_PyList_Fini(_PyFreeListState *state) _PyList_Fini(_PyFreeListState *state)
{ {
// With Py_GIL_DISABLED:
// the freelists for the current thread state have already been cleared.
#ifndef Py_GIL_DISABLED
_PyList_ClearFreeList(state, 1); _PyList_ClearFreeList(state, 1);
#endif
} }
/* Print summary info about the state of the optimized allocator */ /* Print summary info about the state of the optimized allocator */

View file

@ -1287,7 +1287,11 @@ _PyContext_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization)
void void
_PyContext_Fini(_PyFreeListState *state) _PyContext_Fini(_PyFreeListState *state)
{ {
// With Py_GIL_DISABLED:
// the freelists for the current thread state have already been cleared.
#ifndef Py_GIL_DISABLED
_PyContext_ClearFreeList(state, 1); _PyContext_ClearFreeList(state, 1);
#endif
} }

View file

@ -1676,8 +1676,6 @@ PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
void void
_PyGC_ClearAllFreeLists(PyInterpreterState *interp) _PyGC_ClearAllFreeLists(PyInterpreterState *interp)
{ {
_PyDict_ClearFreeList(interp);
HEAD_LOCK(&_PyRuntime); HEAD_LOCK(&_PyRuntime);
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)interp->threads.head; _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)interp->threads.head;
while (tstate != NULL) { while (tstate != NULL) {

View file

@ -11,8 +11,6 @@
void void
_PyGC_ClearAllFreeLists(PyInterpreterState *interp) _PyGC_ClearAllFreeLists(PyInterpreterState *interp)
{ {
_PyDict_ClearFreeList(interp);
_Py_ClearFreeLists(&interp->freelist_state, 0); _Py_ClearFreeLists(&interp->freelist_state, 0);
} }

View file

@ -1461,9 +1461,12 @@ clear_datastack(PyThreadState *tstate)
void void
_Py_ClearFreeLists(_PyFreeListState *state, int is_finalization) _Py_ClearFreeLists(_PyFreeListState *state, int is_finalization)
{ {
// In the free-threaded build, freelists are per-PyThreadState and cleared in PyThreadState_Clear()
// In the default build, freelists are per-interpreter and cleared in finalize_interp_types()
_PyFloat_ClearFreeList(state, is_finalization); _PyFloat_ClearFreeList(state, is_finalization);
_PyTuple_ClearFreeList(state, is_finalization); _PyTuple_ClearFreeList(state, is_finalization);
_PyList_ClearFreeList(state, is_finalization); _PyList_ClearFreeList(state, is_finalization);
_PyDict_ClearFreeList(state, is_finalization);
_PyContext_ClearFreeList(state, is_finalization); _PyContext_ClearFreeList(state, is_finalization);
_PyAsyncGen_ClearFreeLists(state, is_finalization); _PyAsyncGen_ClearFreeLists(state, is_finalization);
_PyObjectStackChunk_ClearFreeList(state, is_finalization); _PyObjectStackChunk_ClearFreeList(state, is_finalization);