gh-111968: Use per-thread freelists for generator in free-threading (gh-114189)

This commit is contained in:
Donghee Na 2024-01-19 03:15:00 +09:00 committed by GitHub
parent 2d3f6b56c5
commit 7fa511ba57
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 48 additions and 74 deletions

View file

@ -19,12 +19,14 @@ extern "C" {
# define PyList_MAXFREELIST 80 # define PyList_MAXFREELIST 80
# define PyFloat_MAXFREELIST 100 # define PyFloat_MAXFREELIST 100
# define PyContext_MAXFREELIST 255 # define PyContext_MAXFREELIST 255
# define _PyAsyncGen_MAXFREELIST 80
#else #else
# define PyTuple_NFREELISTS 0 # define PyTuple_NFREELISTS 0
# define PyTuple_MAXFREELIST 0 # define PyTuple_MAXFREELIST 0
# define PyList_MAXFREELIST 0 # define PyList_MAXFREELIST 0
# define PyFloat_MAXFREELIST 0 # define PyFloat_MAXFREELIST 0
# define PyContext_MAXFREELIST 0 # define PyContext_MAXFREELIST 0
# define _PyAsyncGen_MAXFREELIST 0
#endif #endif
struct _Py_list_state { struct _Py_list_state {
@ -77,12 +79,27 @@ struct _Py_context_state {
#endif #endif
}; };
struct _Py_async_gen_state {
#ifdef WITH_FREELISTS
/* Freelists boost performance 6-10%; they also reduce memory
fragmentation, as _PyAsyncGenWrappedValue and PyAsyncGenASend
are short-living objects that are instantiated for every
__anext__() call. */
struct _PyAsyncGenWrappedValue* value_freelist[_PyAsyncGen_MAXFREELIST];
int value_numfree;
struct PyAsyncGenASend* asend_freelist[_PyAsyncGen_MAXFREELIST];
int asend_numfree;
#endif
};
typedef struct _Py_freelist_state { typedef struct _Py_freelist_state {
struct _Py_float_state float_state; struct _Py_float_state float_state;
struct _Py_tuple_state tuple_state; struct _Py_tuple_state tuple_state;
struct _Py_list_state list_state; struct _Py_list_state list_state;
struct _Py_slice_state slice_state; struct _Py_slice_state slice_state;
struct _Py_context_state context_state; struct _Py_context_state context_state;
struct _Py_async_gen_state async_gen_state;
} _PyFreeListState; } _PyFreeListState;
#ifdef __cplusplus #ifdef __cplusplus

View file

@ -251,7 +251,7 @@ extern void _PyFloat_ClearFreeList(_PyFreeListState *state, int is_finalization)
extern void _PyList_ClearFreeList(_PyFreeListState *state, int is_finalization); extern void _PyList_ClearFreeList(_PyFreeListState *state, int is_finalization);
extern void _PySlice_ClearCache(_PyFreeListState *state); extern void _PySlice_ClearCache(_PyFreeListState *state);
extern void _PyDict_ClearFreeList(PyInterpreterState *interp); extern void _PyDict_ClearFreeList(PyInterpreterState *interp);
extern void _PyAsyncGen_ClearFreeLists(PyInterpreterState *interp); extern void _PyAsyncGen_ClearFreeLists(_PyFreeListState *state, int is_finalization);
extern void _PyContext_ClearFreeList(_PyFreeListState *state, int is_finalization); extern void _PyContext_ClearFreeList(_PyFreeListState *state, int is_finalization);
extern void _Py_ScheduleGC(PyInterpreterState *interp); extern void _Py_ScheduleGC(PyInterpreterState *interp);
extern void _Py_RunGC(PyThreadState *tstate); extern void _Py_RunGC(PyThreadState *tstate);

View file

@ -8,6 +8,8 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define" # error "this header requires Py_BUILD_CORE define"
#endif #endif
#include "pycore_freelist.h"
extern PyObject *_PyGen_yf(PyGenObject *); extern PyObject *_PyGen_yf(PyGenObject *);
extern void _PyGen_Finalize(PyObject *self); extern void _PyGen_Finalize(PyObject *self);
@ -26,34 +28,7 @@ extern PyTypeObject _PyAsyncGenAThrow_Type;
/* runtime lifecycle */ /* runtime lifecycle */
extern void _PyAsyncGen_Fini(PyInterpreterState *); extern void _PyAsyncGen_Fini(_PyFreeListState *);
/* other API */
#ifndef WITH_FREELISTS
// without freelists
# define _PyAsyncGen_MAXFREELIST 0
#endif
#ifndef _PyAsyncGen_MAXFREELIST
# define _PyAsyncGen_MAXFREELIST 80
#endif
struct _Py_async_gen_state {
#if _PyAsyncGen_MAXFREELIST > 0
/* Freelists boost performance 6-10%; they also reduce memory
fragmentation, as _PyAsyncGenWrappedValue and PyAsyncGenASend
are short-living objects that are instantiated for every
__anext__() call. */
struct _PyAsyncGenWrappedValue* value_freelist[_PyAsyncGen_MAXFREELIST];
int value_numfree;
struct PyAsyncGenASend* asend_freelist[_PyAsyncGen_MAXFREELIST];
int asend_numfree;
#endif
};
#ifdef __cplusplus #ifdef __cplusplus
} }

View file

@ -190,7 +190,6 @@ struct _is {
struct _Py_tuple_state tuple; struct _Py_tuple_state tuple;
struct _Py_dict_state dict_state; struct _Py_dict_state dict_state;
struct _Py_async_gen_state async_gen;
struct _Py_exc_state exc_state; struct _Py_exc_state exc_state;
struct ast_state ast; struct ast_state ast;

View file

@ -1628,12 +1628,12 @@ PyTypeObject PyAsyncGen_Type = {
}; };
#if _PyAsyncGen_MAXFREELIST > 0 #ifdef WITH_FREELISTS
static struct _Py_async_gen_state * static struct _Py_async_gen_state *
get_async_gen_state(void) get_async_gen_state(void)
{ {
PyInterpreterState *interp = _PyInterpreterState_GET(); _PyFreeListState *state = _PyFreeListState_GET();
return &interp->async_gen; return &state->async_gen_state;
} }
#endif #endif
@ -1656,36 +1656,36 @@ PyAsyncGen_New(PyFrameObject *f, PyObject *name, PyObject *qualname)
void void
_PyAsyncGen_ClearFreeLists(PyInterpreterState *interp) _PyAsyncGen_ClearFreeLists(_PyFreeListState *freelist_state, int is_finalization)
{ {
#if _PyAsyncGen_MAXFREELIST > 0 #ifdef WITH_FREELISTS
struct _Py_async_gen_state *state = &interp->async_gen; struct _Py_async_gen_state *state = &freelist_state->async_gen_state;
while (state->value_numfree) { while (state->value_numfree > 0) {
_PyAsyncGenWrappedValue *o; _PyAsyncGenWrappedValue *o;
o = state->value_freelist[--state->value_numfree]; o = state->value_freelist[--state->value_numfree];
assert(_PyAsyncGenWrappedValue_CheckExact(o)); assert(_PyAsyncGenWrappedValue_CheckExact(o));
PyObject_GC_Del(o); PyObject_GC_Del(o);
} }
while (state->asend_numfree) { while (state->asend_numfree > 0) {
PyAsyncGenASend *o; PyAsyncGenASend *o;
o = state->asend_freelist[--state->asend_numfree]; o = state->asend_freelist[--state->asend_numfree];
assert(Py_IS_TYPE(o, &_PyAsyncGenASend_Type)); assert(Py_IS_TYPE(o, &_PyAsyncGenASend_Type));
PyObject_GC_Del(o); PyObject_GC_Del(o);
} }
if (is_finalization) {
state->value_numfree = -1;
state->asend_numfree = -1;
}
#endif #endif
} }
void void
_PyAsyncGen_Fini(PyInterpreterState *interp) _PyAsyncGen_Fini(_PyFreeListState *state)
{ {
_PyAsyncGen_ClearFreeLists(interp); _PyAsyncGen_ClearFreeLists(state, 1);
#if defined(Py_DEBUG) && _PyAsyncGen_MAXFREELIST > 0
struct _Py_async_gen_state *state = &interp->async_gen;
state->value_numfree = -1;
state->asend_numfree = -1;
#endif
} }
@ -1732,13 +1732,9 @@ async_gen_asend_dealloc(PyAsyncGenASend *o)
_PyObject_GC_UNTRACK((PyObject *)o); _PyObject_GC_UNTRACK((PyObject *)o);
Py_CLEAR(o->ags_gen); Py_CLEAR(o->ags_gen);
Py_CLEAR(o->ags_sendval); Py_CLEAR(o->ags_sendval);
#if _PyAsyncGen_MAXFREELIST > 0 #ifdef WITH_FREELISTS
struct _Py_async_gen_state *state = get_async_gen_state(); struct _Py_async_gen_state *state = get_async_gen_state();
#ifdef Py_DEBUG if (state->asend_numfree >= 0 && state->asend_numfree < _PyAsyncGen_MAXFREELIST) {
// async_gen_asend_dealloc() must not be called after _PyAsyncGen_Fini()
assert(state->asend_numfree != -1);
#endif
if (state->asend_numfree < _PyAsyncGen_MAXFREELIST) {
assert(PyAsyncGenASend_CheckExact(o)); assert(PyAsyncGenASend_CheckExact(o));
_PyGC_CLEAR_FINALIZED((PyObject *)o); _PyGC_CLEAR_FINALIZED((PyObject *)o);
state->asend_freelist[state->asend_numfree++] = o; state->asend_freelist[state->asend_numfree++] = o;
@ -1906,13 +1902,9 @@ static PyObject *
async_gen_asend_new(PyAsyncGenObject *gen, PyObject *sendval) async_gen_asend_new(PyAsyncGenObject *gen, PyObject *sendval)
{ {
PyAsyncGenASend *o; PyAsyncGenASend *o;
#if _PyAsyncGen_MAXFREELIST > 0 #ifdef WITH_FREELISTS
struct _Py_async_gen_state *state = get_async_gen_state(); struct _Py_async_gen_state *state = get_async_gen_state();
#ifdef Py_DEBUG if (state->asend_numfree > 0) {
// async_gen_asend_new() must not be called after _PyAsyncGen_Fini()
assert(state->asend_numfree != -1);
#endif
if (state->asend_numfree) {
state->asend_numfree--; state->asend_numfree--;
o = state->asend_freelist[state->asend_numfree]; o = state->asend_freelist[state->asend_numfree];
_Py_NewReference((PyObject *)o); _Py_NewReference((PyObject *)o);
@ -1945,13 +1937,9 @@ async_gen_wrapped_val_dealloc(_PyAsyncGenWrappedValue *o)
{ {
_PyObject_GC_UNTRACK((PyObject *)o); _PyObject_GC_UNTRACK((PyObject *)o);
Py_CLEAR(o->agw_val); Py_CLEAR(o->agw_val);
#if _PyAsyncGen_MAXFREELIST > 0 #ifdef WITH_FREELISTS
struct _Py_async_gen_state *state = get_async_gen_state(); struct _Py_async_gen_state *state = get_async_gen_state();
#ifdef Py_DEBUG if (state->value_numfree >= 0 && state->value_numfree < _PyAsyncGen_MAXFREELIST) {
// async_gen_wrapped_val_dealloc() must not be called after _PyAsyncGen_Fini()
assert(state->value_numfree != -1);
#endif
if (state->value_numfree < _PyAsyncGen_MAXFREELIST) {
assert(_PyAsyncGenWrappedValue_CheckExact(o)); assert(_PyAsyncGenWrappedValue_CheckExact(o));
state->value_freelist[state->value_numfree++] = o; state->value_freelist[state->value_numfree++] = o;
OBJECT_STAT_INC(to_freelist); OBJECT_STAT_INC(to_freelist);
@ -2022,13 +2010,9 @@ _PyAsyncGenValueWrapperNew(PyThreadState *tstate, PyObject *val)
_PyAsyncGenWrappedValue *o; _PyAsyncGenWrappedValue *o;
assert(val); assert(val);
#if _PyAsyncGen_MAXFREELIST > 0 #ifdef WITH_FREELISTS
struct _Py_async_gen_state *state = &tstate->interp->async_gen; struct _Py_async_gen_state *state = get_async_gen_state();
#ifdef Py_DEBUG if (state->value_numfree > 0) {
// _PyAsyncGenValueWrapperNew() must not be called after _PyAsyncGen_Fini()
assert(state->value_numfree != -1);
#endif
if (state->value_numfree) {
state->value_numfree--; state->value_numfree--;
o = state->value_freelist[state->value_numfree]; o = state->value_freelist[state->value_numfree];
OBJECT_STAT_INC(from_freelist); OBJECT_STAT_INC(from_freelist);

View file

@ -15,7 +15,6 @@ void
_PyGC_ClearAllFreeLists(PyInterpreterState *interp) _PyGC_ClearAllFreeLists(PyInterpreterState *interp)
{ {
_PyDict_ClearFreeList(interp); _PyDict_ClearFreeList(interp);
_PyAsyncGen_ClearFreeLists(interp);
HEAD_LOCK(&_PyRuntime); HEAD_LOCK(&_PyRuntime);
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)interp->threads.head; _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)interp->threads.head;

View file

@ -12,7 +12,6 @@ void
_PyGC_ClearAllFreeLists(PyInterpreterState *interp) _PyGC_ClearAllFreeLists(PyInterpreterState *interp)
{ {
_PyDict_ClearFreeList(interp); _PyDict_ClearFreeList(interp);
_PyAsyncGen_ClearFreeLists(interp);
_Py_ClearFreeLists(&interp->freelist_state, 0); _Py_ClearFreeLists(&interp->freelist_state, 0);
} }

View file

@ -1735,7 +1735,6 @@ finalize_interp_types(PyInterpreterState *interp)
_PySys_FiniTypes(interp); _PySys_FiniTypes(interp);
_PyXI_FiniTypes(interp); _PyXI_FiniTypes(interp);
_PyExc_Fini(interp); _PyExc_Fini(interp);
_PyAsyncGen_Fini(interp);
_PyFloat_FiniType(interp); _PyFloat_FiniType(interp);
_PyLong_FiniTypes(interp); _PyLong_FiniTypes(interp);
_PyThread_FiniType(interp); _PyThread_FiniType(interp);
@ -1759,6 +1758,7 @@ finalize_interp_types(PyInterpreterState *interp)
_PyFloat_Fini(state); _PyFloat_Fini(state);
_PySlice_Fini(state); _PySlice_Fini(state);
_PyContext_Fini(state); _PyContext_Fini(state);
_PyAsyncGen_Fini(state);
#ifdef Py_DEBUG #ifdef Py_DEBUG
_PyStaticObjects_CheckRefcnt(interp); _PyStaticObjects_CheckRefcnt(interp);

View file

@ -1462,6 +1462,7 @@ _Py_ClearFreeLists(_PyFreeListState *state, int is_finalization)
_PyTuple_ClearFreeList(state, is_finalization); _PyTuple_ClearFreeList(state, is_finalization);
_PyList_ClearFreeList(state, is_finalization); _PyList_ClearFreeList(state, is_finalization);
_PyContext_ClearFreeList(state, is_finalization); _PyContext_ClearFreeList(state, is_finalization);
_PyAsyncGen_ClearFreeLists(state, is_finalization);
} }
void void
@ -1549,7 +1550,7 @@ PyThreadState_Clear(PyThreadState *tstate)
#ifdef Py_GIL_DISABLED #ifdef Py_GIL_DISABLED
// Each thread should clear own freelists in free-threading builds. // Each thread should clear own freelists in free-threading builds.
_PyFreeListState *freelist_state = &((_PyThreadStateImpl*)tstate)->freelist_state; _PyFreeListState *freelist_state = &((_PyThreadStateImpl*)tstate)->freelist_state;
_Py_ClearFreeLists(freelist_state, 0); _Py_ClearFreeLists(freelist_state, 1);
_PySlice_ClearCache(freelist_state); _PySlice_ClearCache(freelist_state);
#endif #endif