gh-91048: Refactor and optimize remote debugging module (#134652)

Completely refactor Modules/_remote_debugging_module.c with improved
code organization, replacing scattered reference counting and error
handling with centralized goto error paths. This cleanup improves
maintainability and reduces code duplication throughout the module while
preserving the same external API.

Implement memory page caching optimization in Python/remote_debug.h to
avoid repeated reads of the same memory regions during debugging
operations. The cache stores previously read memory pages and reuses
them for subsequent reads, significantly reducing system calls and
improving performance.

Add code object caching mechanism with a new code_object_generation
field in the interpreter state that tracks when code object caches need
invalidation. This allows efficient reuse of parsed code object metadata
and eliminates redundant processing of the same code objects across
debugging sessions.

Optimize memory operations by replacing multiple individual structure
copies with single bulk reads for the same data structures. This reduces
the number of memory operations and system calls required to gather
debugging information from the target process.

Update Makefile.pre.in to include Python/remote_debug.h in the headers
list, ensuring that changes to the remote debugging header force proper
recompilation of dependent modules and maintain build consistency across
the codebase.

Also, make the module compatible with the free threading build as an extra :)

Co-authored-by: Łukasz Langa <lukasz@langa.pl>
This commit is contained in:
Pablo Galindo Salgado 2025-05-25 21:19:29 +01:00 committed by GitHub
parent 328a778db8
commit 42b25ad4d3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 2413 additions and 1081 deletions

View file

@ -54,11 +54,13 @@ extern "C" {
# define _Py_Debug_Free_Threaded 1
# define _Py_Debug_code_object_co_tlbc offsetof(PyCodeObject, co_tlbc)
# define _Py_Debug_interpreter_frame_tlbc_index offsetof(_PyInterpreterFrame, tlbc_index)
# define _Py_Debug_interpreter_state_tlbc_generation offsetof(PyInterpreterState, tlbc_indices.tlbc_generation)
#else
# define _Py_Debug_gilruntimestate_enabled 0
# define _Py_Debug_Free_Threaded 0
# define _Py_Debug_code_object_co_tlbc 0
# define _Py_Debug_interpreter_frame_tlbc_index 0
# define _Py_Debug_interpreter_state_tlbc_generation 0
#endif
@ -89,6 +91,8 @@ typedef struct _Py_DebugOffsets {
uint64_t gil_runtime_state_enabled;
uint64_t gil_runtime_state_locked;
uint64_t gil_runtime_state_holder;
uint64_t code_object_generation;
uint64_t tlbc_generation;
} interpreter_state;
// Thread state offset;
@ -216,6 +220,11 @@ typedef struct _Py_DebugOffsets {
uint64_t gi_frame_state;
} gen_object;
struct _llist_node {
uint64_t next;
uint64_t prev;
} llist_node;
struct _debugger_support {
uint64_t eval_breaker;
uint64_t remote_debugger_support;
@ -251,6 +260,8 @@ typedef struct _Py_DebugOffsets {
.gil_runtime_state_enabled = _Py_Debug_gilruntimestate_enabled, \
.gil_runtime_state_locked = offsetof(PyInterpreterState, _gil.locked), \
.gil_runtime_state_holder = offsetof(PyInterpreterState, _gil.last_holder), \
.code_object_generation = offsetof(PyInterpreterState, _code_object_generation), \
.tlbc_generation = _Py_Debug_interpreter_state_tlbc_generation, \
}, \
.thread_state = { \
.size = sizeof(PyThreadState), \
@ -347,6 +358,10 @@ typedef struct _Py_DebugOffsets {
.gi_iframe = offsetof(PyGenObject, gi_iframe), \
.gi_frame_state = offsetof(PyGenObject, gi_frame_state), \
}, \
.llist_node = { \
.next = offsetof(struct llist_node, next), \
.prev = offsetof(struct llist_node, prev), \
}, \
.debugger_support = { \
.eval_breaker = offsetof(PyThreadState, eval_breaker), \
.remote_debugger_support = offsetof(PyThreadState, remote_debugger_support), \

View file

@ -795,6 +795,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(alias));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(align));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(all));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(all_threads));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(allow_code));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(any));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(append));

View file

@ -286,6 +286,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(alias)
STRUCT_FOR_ID(align)
STRUCT_FOR_ID(all)
STRUCT_FOR_ID(all_threads)
STRUCT_FOR_ID(allow_code)
STRUCT_FOR_ID(any)
STRUCT_FOR_ID(append)

View file

@ -726,6 +726,10 @@ typedef struct _PyIndexPool {
// Next index to allocate if no free indices are available
int32_t next_index;
// Generation counter incremented on thread creation/destruction
// Used for TLBC cache invalidation in remote debugging
uint32_t tlbc_generation;
} _PyIndexPool;
typedef union _Py_unique_id_entry {
@ -843,6 +847,8 @@ struct _is {
/* The per-interpreter GIL, which might not be used. */
struct _gil_runtime_state _gil;
uint64_t _code_object_generation;
/* ---------- IMPORTANT ---------------------------
The fields above this line are declared as early as
possible to facilitate out-of-process observability

View file

@ -793,6 +793,7 @@ extern "C" {
INIT_ID(alias), \
INIT_ID(align), \
INIT_ID(all), \
INIT_ID(all_threads), \
INIT_ID(allow_code), \
INIT_ID(any), \
INIT_ID(append), \

View file

@ -932,6 +932,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(all_threads);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(allow_code);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));