gh-91048: Refactor and optimize remote debugging module (#134652)

Completely refactor Modules/_remote_debugging_module.c with improved
code organization, replacing scattered reference counting and error
handling with centralized goto error paths. This cleanup improves
maintainability and reduces code duplication throughout the module while
preserving the same external API.

Implement memory page caching optimization in Python/remote_debug.h to
avoid repeated reads of the same memory regions during debugging
operations. The cache stores previously read memory pages and reuses
them for subsequent reads, significantly reducing system calls and
improving performance.

Add code object caching mechanism with a new code_object_generation
field in the interpreter state that tracks when code object caches need
invalidation. This allows efficient reuse of parsed code object metadata
and eliminates redundant processing of the same code objects across
debugging sessions.

Optimize memory operations by replacing multiple individual structure
copies with single bulk reads for the same data structures. This reduces
the number of memory operations and system calls required to gather
debugging information from the target process.

Update Makefile.pre.in to include Python/remote_debug.h in the headers
list, ensuring that changes to the remote debugging header force proper
recompilation of dependent modules and maintain build consistency across
the codebase.

Also, make the module compatible with the free threading build as an extra :)

Co-authored-by: Łukasz Langa <lukasz@langa.pl>
This commit is contained in:
Pablo Galindo Salgado 2025-05-25 21:19:29 +01:00 committed by GitHub
parent 328a778db8
commit 42b25ad4d3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 2413 additions and 1081 deletions

View file

@ -172,6 +172,9 @@ _PyIndexPool_AllocIndex(_PyIndexPool *pool)
else {
index = heap_pop(free_indices);
}
pool->tlbc_generation++;
UNLOCK_POOL(pool);
return index;
}
@ -180,6 +183,7 @@ void
_PyIndexPool_FreeIndex(_PyIndexPool *pool, int32_t index)
{
LOCK_POOL(pool);
pool->tlbc_generation++;
heap_add(&pool->free_indices, index);
UNLOCK_POOL(pool);
}

View file

@ -567,6 +567,7 @@ init_interpreter(PyInterpreterState *interp,
}
interp->sys_profile_initialized = false;
interp->sys_trace_initialized = false;
interp->_code_object_generation = 0;
interp->jit = false;
interp->executor_list_head = NULL;
interp->executor_deletion_list_head = NULL;
@ -777,6 +778,10 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
for (int t = 0; t < PY_MONITORING_TOOL_IDS; t++) {
Py_CLEAR(interp->monitoring_tool_names[t]);
}
interp->_code_object_generation = 0;
#ifdef Py_GIL_DISABLED
interp->tlbc_indices.tlbc_generation = 0;
#endif
PyConfig_Clear(&interp->config);
_PyCodec_Fini(interp);
@ -1346,9 +1351,6 @@ tstate_is_alive(PyThreadState *tstate)
// lifecycle
//----------
/* Minimum size of data stack chunk */
#define DATA_STACK_CHUNK_SIZE (16*1024)
static _PyStackChunk*
allocate_chunk(int size_in_bytes, _PyStackChunk* previous)
{
@ -2897,7 +2899,7 @@ _PyInterpreterState_HasFeature(PyInterpreterState *interp, unsigned long feature
static PyObject **
push_chunk(PyThreadState *tstate, int size)
{
int allocate_size = DATA_STACK_CHUNK_SIZE;
int allocate_size = _PY_DATA_STACK_CHUNK_SIZE;
while (allocate_size < (int)sizeof(PyObject*)*(size + MINIMUM_OVERHEAD)) {
allocate_size *= 2;
}

View file

@ -73,19 +73,71 @@ extern "C" {
# define HAVE_PROCESS_VM_READV 0
#endif
static inline size_t
get_page_size(void) {
size_t page_size = 0;
if (page_size == 0) {
#ifdef MS_WINDOWS
SYSTEM_INFO si;
GetSystemInfo(&si);
page_size = si.dwPageSize;
#else
page_size = (size_t)getpagesize();
#endif
}
return page_size;
}
typedef struct page_cache_entry {
uintptr_t page_addr; // page-aligned base address
char *data;
int valid;
struct page_cache_entry *next;
} page_cache_entry_t;
#define MAX_PAGES 1024
// Define a platform-independent process handle structure
typedef struct {
pid_t pid;
#ifdef MS_WINDOWS
#if defined(__APPLE__)
mach_port_t task;
#elif defined(MS_WINDOWS)
HANDLE hProcess;
#endif
page_cache_entry_t pages[MAX_PAGES];
Py_ssize_t page_size;
} proc_handle_t;
static void
_Py_RemoteDebug_FreePageCache(proc_handle_t *handle)
{
for (int i = 0; i < MAX_PAGES; i++) {
PyMem_RawFree(handle->pages[i].data);
handle->pages[i].data = NULL;
handle->pages[i].valid = 0;
}
}
void
_Py_RemoteDebug_ClearCache(proc_handle_t *handle)
{
for (int i = 0; i < MAX_PAGES; i++) {
handle->pages[i].valid = 0;
}
}
#if defined(__APPLE__) && TARGET_OS_OSX
static mach_port_t pid_to_task(pid_t pid);
#endif
// Initialize the process handle
static int
_Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) {
handle->pid = pid;
#ifdef MS_WINDOWS
#if defined(__APPLE__)
handle->task = pid_to_task(handle->pid);
#elif defined(MS_WINDOWS)
handle->hProcess = OpenProcess(
PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION | PROCESS_QUERY_INFORMATION,
FALSE, pid);
@ -94,6 +146,11 @@ _Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) {
return -1;
}
#endif
handle->page_size = get_page_size();
for (int i = 0; i < MAX_PAGES; i++) {
handle->pages[i].data = NULL;
handle->pages[i].valid = 0;
}
return 0;
}
@ -107,6 +164,7 @@ _Py_RemoteDebug_CleanupProcHandle(proc_handle_t *handle) {
}
#endif
handle->pid = 0;
_Py_RemoteDebug_FreePageCache(handle);
}
#if defined(__APPLE__) && TARGET_OS_OSX
@ -755,7 +813,7 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address
#elif defined(__APPLE__) && TARGET_OS_OSX
Py_ssize_t result = -1;
kern_return_t kr = mach_vm_read_overwrite(
pid_to_task(handle->pid),
handle->task,
(mach_vm_address_t)remote_address,
len,
(mach_vm_address_t)dst,
@ -780,6 +838,59 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address
#endif
}
int
_Py_RemoteDebug_PagedReadRemoteMemory(proc_handle_t *handle,
uintptr_t addr,
size_t size,
void *out)
{
size_t page_size = handle->page_size;
uintptr_t page_base = addr & ~(page_size - 1);
size_t offset_in_page = addr - page_base;
if (offset_in_page + size > page_size) {
return _Py_RemoteDebug_ReadRemoteMemory(handle, addr, size, out);
}
// Search for valid cached page
for (int i = 0; i < MAX_PAGES; i++) {
page_cache_entry_t *entry = &handle->pages[i];
if (entry->valid && entry->page_addr == page_base) {
memcpy(out, entry->data + offset_in_page, size);
return 0;
}
}
// Find reusable slot
for (int i = 0; i < MAX_PAGES; i++) {
page_cache_entry_t *entry = &handle->pages[i];
if (!entry->valid) {
if (entry->data == NULL) {
entry->data = PyMem_RawMalloc(page_size);
if (entry->data == NULL) {
PyErr_NoMemory();
return -1;
}
}
if (_Py_RemoteDebug_ReadRemoteMemory(handle, page_base, page_size, entry->data) < 0) {
// Try to just copy the exact ammount as a fallback
PyErr_Clear();
goto fallback;
}
entry->page_addr = page_base;
entry->valid = 1;
memcpy(out, entry->data + offset_in_page, size);
return 0;
}
}
fallback:
// Cache full — fallback to uncached read
return _Py_RemoteDebug_ReadRemoteMemory(handle, addr, size, out);
}
static int
_Py_RemoteDebug_ReadDebugOffsets(
proc_handle_t *handle,