mirror of
https://github.com/python/cpython.git
synced 2025-12-23 09:19:18 +00:00
gh-91048: Refactor common data into context object in Modules/_remote_debugging (#142879)
This commit is contained in:
parent
fba4584ffc
commit
77b56eafde
5 changed files with 172 additions and 164 deletions
|
|
@ -279,6 +279,35 @@ typedef struct {
|
|||
size_t count;
|
||||
} StackChunkList;
|
||||
|
||||
/*
|
||||
* Context for frame chain traversal operations.
|
||||
*/
|
||||
typedef struct {
|
||||
/* Inputs */
|
||||
uintptr_t frame_addr; // Starting frame address
|
||||
uintptr_t base_frame_addr; // Sentinel at bottom (for validation)
|
||||
uintptr_t gc_frame; // GC frame address (0 if not tracking)
|
||||
uintptr_t last_profiled_frame; // Last cached frame (0 if no cache)
|
||||
StackChunkList *chunks; // Pre-copied stack chunks
|
||||
|
||||
/* Outputs */
|
||||
PyObject *frame_info; // List to append FrameInfo objects
|
||||
uintptr_t *frame_addrs; // Array of visited frame addresses
|
||||
Py_ssize_t num_addrs; // Count of addresses collected
|
||||
Py_ssize_t max_addrs; // Capacity of frame_addrs array
|
||||
uintptr_t last_frame_visited; // Last frame address visited
|
||||
int stopped_at_cached_frame; // Whether we stopped at cached frame
|
||||
} FrameWalkContext;
|
||||
|
||||
/*
|
||||
* Context for code object parsing.
|
||||
*/
|
||||
typedef struct {
|
||||
uintptr_t code_addr; // Code object address in remote process
|
||||
uintptr_t instruction_pointer; // Current instruction pointer
|
||||
int32_t tlbc_index; // Thread-local bytecode index (free-threading)
|
||||
} CodeObjectContext;
|
||||
|
||||
/* Function pointer types for iteration callbacks */
|
||||
typedef int (*thread_processor_func)(
|
||||
RemoteUnwinderObject *unwinder,
|
||||
|
|
@ -343,10 +372,7 @@ extern long read_py_long(RemoteUnwinderObject *unwinder, uintptr_t address);
|
|||
extern int parse_code_object(
|
||||
RemoteUnwinderObject *unwinder,
|
||||
PyObject **result,
|
||||
uintptr_t address,
|
||||
uintptr_t instruction_pointer,
|
||||
uintptr_t *previous_frame,
|
||||
int32_t tlbc_index
|
||||
const CodeObjectContext *ctx
|
||||
);
|
||||
|
||||
extern PyObject *make_location_info(
|
||||
|
|
@ -420,17 +446,7 @@ extern void *find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr);
|
|||
|
||||
extern int process_frame_chain(
|
||||
RemoteUnwinderObject *unwinder,
|
||||
uintptr_t initial_frame_addr,
|
||||
StackChunkList *chunks,
|
||||
PyObject *frame_info,
|
||||
uintptr_t base_frame_addr,
|
||||
uintptr_t gc_frame,
|
||||
uintptr_t last_profiled_frame,
|
||||
int *stopped_at_cached_frame,
|
||||
uintptr_t *frame_addrs,
|
||||
Py_ssize_t *num_addrs,
|
||||
Py_ssize_t max_addrs,
|
||||
uintptr_t *out_last_frame_addr
|
||||
FrameWalkContext *ctx
|
||||
);
|
||||
|
||||
/* Frame cache functions */
|
||||
|
|
@ -460,12 +476,7 @@ extern int frame_cache_store(
|
|||
|
||||
extern int collect_frames_with_cache(
|
||||
RemoteUnwinderObject *unwinder,
|
||||
uintptr_t frame_addr,
|
||||
StackChunkList *chunks,
|
||||
PyObject *frame_info,
|
||||
uintptr_t base_frame_addr,
|
||||
uintptr_t gc_frame,
|
||||
uintptr_t last_profiled_frame,
|
||||
FrameWalkContext *ctx,
|
||||
uint64_t thread_id);
|
||||
|
||||
/* ============================================================================
|
||||
|
|
|
|||
|
|
@ -76,6 +76,7 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t
|
|||
PyErr_SetString(PyExc_RuntimeError, "TLBC array size exceeds maximum limit");
|
||||
return 0; // Invalid size
|
||||
}
|
||||
assert(tlbc_size > 0 && tlbc_size <= MAX_TLBC_SIZE);
|
||||
|
||||
// Allocate and read the entire TLBC array
|
||||
size_t array_data_size = tlbc_size * sizeof(void*);
|
||||
|
|
@ -156,8 +157,11 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
|
|||
const uint8_t* ptr = (const uint8_t*)(linetable);
|
||||
uintptr_t addr = 0;
|
||||
int computed_line = firstlineno; // Running accumulator, separate from output
|
||||
const size_t MAX_LINETABLE_ENTRIES = 65536;
|
||||
size_t entry_count = 0;
|
||||
|
||||
while (*ptr != '\0') {
|
||||
while (*ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) {
|
||||
entry_count++;
|
||||
uint8_t first_byte = *(ptr++);
|
||||
uint8_t code = (first_byte >> 3) & 15;
|
||||
size_t length = (first_byte & 7) + 1;
|
||||
|
|
@ -277,12 +281,9 @@ make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *locati
|
|||
int
|
||||
parse_code_object(RemoteUnwinderObject *unwinder,
|
||||
PyObject **result,
|
||||
uintptr_t address,
|
||||
uintptr_t instruction_pointer,
|
||||
uintptr_t *previous_frame,
|
||||
int32_t tlbc_index)
|
||||
const CodeObjectContext *ctx)
|
||||
{
|
||||
void *key = (void *)address;
|
||||
void *key = (void *)ctx->code_addr;
|
||||
CachedCodeMetadata *meta = NULL;
|
||||
PyObject *func = NULL;
|
||||
PyObject *file = NULL;
|
||||
|
|
@ -291,9 +292,9 @@ parse_code_object(RemoteUnwinderObject *unwinder,
|
|||
#ifdef Py_GIL_DISABLED
|
||||
// In free threading builds, code object addresses might have the low bit set
|
||||
// as a flag, so we need to mask it off to get the real address
|
||||
uintptr_t real_address = address & (~1);
|
||||
uintptr_t real_address = ctx->code_addr & (~1);
|
||||
#else
|
||||
uintptr_t real_address = address;
|
||||
uintptr_t real_address = ctx->code_addr;
|
||||
#endif
|
||||
|
||||
if (unwinder && unwinder->code_object_cache != NULL) {
|
||||
|
|
@ -360,12 +361,12 @@ parse_code_object(RemoteUnwinderObject *unwinder,
|
|||
linetable = NULL;
|
||||
}
|
||||
|
||||
uintptr_t ip = instruction_pointer;
|
||||
uintptr_t ip = ctx->instruction_pointer;
|
||||
ptrdiff_t addrq;
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// Handle thread-local bytecode (TLBC) in free threading builds
|
||||
if (tlbc_index == 0 || unwinder->debug_offsets.code_object.co_tlbc == 0 || unwinder == NULL) {
|
||||
if (ctx->tlbc_index == 0 || unwinder->debug_offsets.code_object.co_tlbc == 0 || unwinder == NULL) {
|
||||
// No TLBC or no unwinder - use main bytecode directly
|
||||
addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
|
||||
goto done_tlbc;
|
||||
|
|
@ -383,10 +384,12 @@ parse_code_object(RemoteUnwinderObject *unwinder,
|
|||
tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
|
||||
}
|
||||
|
||||
if (tlbc_entry && tlbc_index < tlbc_entry->tlbc_array_size) {
|
||||
if (tlbc_entry && ctx->tlbc_index < tlbc_entry->tlbc_array_size) {
|
||||
assert(ctx->tlbc_index >= 0);
|
||||
assert(tlbc_entry->tlbc_array_size > 0);
|
||||
// Use cached TLBC data
|
||||
uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t));
|
||||
uintptr_t tlbc_bytecode_addr = entries[tlbc_index];
|
||||
uintptr_t tlbc_bytecode_addr = entries[ctx->tlbc_index];
|
||||
|
||||
if (tlbc_bytecode_addr != 0) {
|
||||
// Calculate offset from TLBC bytecode
|
||||
|
|
@ -401,8 +404,6 @@ parse_code_object(RemoteUnwinderObject *unwinder,
|
|||
done_tlbc:
|
||||
#else
|
||||
// Non-free-threaded build, always use the main bytecode
|
||||
(void)tlbc_index; // Suppress unused parameter warning
|
||||
(void)unwinder; // Suppress unused parameter warning
|
||||
addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
|
||||
#endif
|
||||
; // Empty statement to avoid C23 extension warning
|
||||
|
|
|
|||
|
|
@ -44,7 +44,9 @@ frame_cache_find(RemoteUnwinderObject *unwinder, uint64_t thread_id)
|
|||
return NULL;
|
||||
}
|
||||
for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
|
||||
assert(i >= 0 && i < FRAME_CACHE_MAX_THREADS);
|
||||
if (unwinder->frame_cache[i].thread_id == thread_id) {
|
||||
assert(unwinder->frame_cache[i].num_addrs <= FRAME_CACHE_MAX_FRAMES);
|
||||
return &unwinder->frame_cache[i];
|
||||
}
|
||||
}
|
||||
|
|
@ -154,6 +156,8 @@ frame_cache_lookup_and_extend(
|
|||
return 0;
|
||||
}
|
||||
|
||||
assert(entry->num_addrs >= 0 && entry->num_addrs <= FRAME_CACHE_MAX_FRAMES);
|
||||
|
||||
// Find the index where last_profiled_frame matches
|
||||
Py_ssize_t start_idx = -1;
|
||||
for (Py_ssize_t i = 0; i < entry->num_addrs; i++) {
|
||||
|
|
@ -166,6 +170,7 @@ frame_cache_lookup_and_extend(
|
|||
if (start_idx < 0) {
|
||||
return 0; // Not found
|
||||
}
|
||||
assert(start_idx < entry->num_addrs);
|
||||
|
||||
Py_ssize_t num_frames = PyList_GET_SIZE(entry->frame_list);
|
||||
|
||||
|
|
@ -225,6 +230,7 @@ frame_cache_store(
|
|||
if (num_addrs > FRAME_CACHE_MAX_FRAMES) {
|
||||
num_addrs = FRAME_CACHE_MAX_FRAMES;
|
||||
}
|
||||
assert(num_addrs >= 0 && num_addrs <= FRAME_CACHE_MAX_FRAMES);
|
||||
|
||||
FrameCacheEntry *entry = frame_cache_alloc_slot(unwinder, thread_id);
|
||||
if (!entry) {
|
||||
|
|
@ -245,6 +251,8 @@ frame_cache_store(
|
|||
entry->thread_id = thread_id;
|
||||
memcpy(entry->addrs, addrs, num_addrs * sizeof(uintptr_t));
|
||||
entry->num_addrs = num_addrs;
|
||||
assert(entry->num_addrs == num_addrs);
|
||||
assert(entry->thread_id == thread_id);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -88,7 +88,8 @@ copy_stack_chunks(RemoteUnwinderObject *unwinder,
|
|||
return -1;
|
||||
}
|
||||
|
||||
while (chunk_addr != 0) {
|
||||
const size_t MAX_STACK_CHUNKS = 4096;
|
||||
while (chunk_addr != 0 && count < MAX_STACK_CHUNKS) {
|
||||
// Grow array if needed
|
||||
if (count >= max_chunks) {
|
||||
max_chunks *= 2;
|
||||
|
|
@ -128,6 +129,7 @@ void *
|
|||
find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr)
|
||||
{
|
||||
for (size_t i = 0; i < chunks->count; ++i) {
|
||||
assert(chunks->chunks[i].size > offsetof(_PyStackChunk, data));
|
||||
uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data);
|
||||
size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data);
|
||||
|
||||
|
|
@ -209,7 +211,13 @@ parse_frame_object(
|
|||
#endif
|
||||
|
||||
*address_of_code_object = code_object;
|
||||
return parse_code_object(unwinder, result, code_object, instruction_pointer, previous_frame, tlbc_index);
|
||||
|
||||
CodeObjectContext code_ctx = {
|
||||
.code_addr = code_object,
|
||||
.instruction_pointer = instruction_pointer,
|
||||
.tlbc_index = tlbc_index,
|
||||
};
|
||||
return parse_code_object(unwinder, result, &code_ctx);
|
||||
}
|
||||
|
||||
int
|
||||
|
|
@ -246,7 +254,12 @@ parse_frame_from_chunks(
|
|||
}
|
||||
#endif
|
||||
|
||||
return parse_code_object(unwinder, result, code_object, instruction_pointer, previous_frame, tlbc_index);
|
||||
CodeObjectContext code_ctx = {
|
||||
.code_addr = code_object,
|
||||
.instruction_pointer = instruction_pointer,
|
||||
.tlbc_index = tlbc_index,
|
||||
};
|
||||
return parse_code_object(unwinder, result, &code_ctx);
|
||||
}
|
||||
|
||||
/* ============================================================================
|
||||
|
|
@ -256,105 +269,80 @@ parse_frame_from_chunks(
|
|||
int
|
||||
process_frame_chain(
|
||||
RemoteUnwinderObject *unwinder,
|
||||
uintptr_t initial_frame_addr,
|
||||
StackChunkList *chunks,
|
||||
PyObject *frame_info,
|
||||
uintptr_t base_frame_addr,
|
||||
uintptr_t gc_frame,
|
||||
uintptr_t last_profiled_frame,
|
||||
int *stopped_at_cached_frame,
|
||||
uintptr_t *frame_addrs, // optional: C array to receive frame addresses
|
||||
Py_ssize_t *num_addrs, // in/out: current count / updated count
|
||||
Py_ssize_t max_addrs, // max capacity of frame_addrs array
|
||||
uintptr_t *out_last_frame_addr) // optional: receives last frame address visited
|
||||
FrameWalkContext *ctx)
|
||||
{
|
||||
uintptr_t frame_addr = initial_frame_addr;
|
||||
uintptr_t frame_addr = ctx->frame_addr;
|
||||
uintptr_t prev_frame_addr = 0;
|
||||
uintptr_t last_frame_addr = 0; // Track last frame visited for validation
|
||||
uintptr_t last_frame_addr = 0;
|
||||
const size_t MAX_FRAMES = 1024 + 512;
|
||||
size_t frame_count = 0;
|
||||
assert(MAX_FRAMES > 0 && MAX_FRAMES < 10000);
|
||||
|
||||
// Initialize output parameters
|
||||
if (stopped_at_cached_frame) {
|
||||
*stopped_at_cached_frame = 0;
|
||||
}
|
||||
if (out_last_frame_addr) {
|
||||
*out_last_frame_addr = 0;
|
||||
}
|
||||
ctx->stopped_at_cached_frame = 0;
|
||||
ctx->last_frame_visited = 0;
|
||||
|
||||
// Quick check: if current_frame == last_profiled_frame, entire stack is unchanged
|
||||
if (last_profiled_frame != 0 && initial_frame_addr == last_profiled_frame) {
|
||||
if (stopped_at_cached_frame) {
|
||||
*stopped_at_cached_frame = 1;
|
||||
}
|
||||
if (ctx->last_profiled_frame != 0 && ctx->frame_addr == ctx->last_profiled_frame) {
|
||||
ctx->stopped_at_cached_frame = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
while ((void*)frame_addr != NULL) {
|
||||
// Check if we've reached the cached frame - if so, stop here
|
||||
if (last_profiled_frame != 0 && frame_addr == last_profiled_frame) {
|
||||
if (stopped_at_cached_frame) {
|
||||
*stopped_at_cached_frame = 1;
|
||||
}
|
||||
if (ctx->last_profiled_frame != 0 && frame_addr == ctx->last_profiled_frame) {
|
||||
ctx->stopped_at_cached_frame = 1;
|
||||
break;
|
||||
}
|
||||
PyObject *frame = NULL;
|
||||
uintptr_t next_frame_addr = 0;
|
||||
uintptr_t stackpointer = 0;
|
||||
last_frame_addr = frame_addr; // Remember this frame address
|
||||
last_frame_addr = frame_addr;
|
||||
|
||||
if (++frame_count > MAX_FRAMES) {
|
||||
PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)");
|
||||
set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain iteration limit exceeded");
|
||||
return -1;
|
||||
}
|
||||
assert(frame_count <= MAX_FRAMES);
|
||||
|
||||
if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, &stackpointer, chunks) < 0) {
|
||||
if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, &stackpointer, ctx->chunks) < 0) {
|
||||
PyErr_Clear();
|
||||
uintptr_t address_of_code_object = 0;
|
||||
if (parse_frame_object(unwinder, &frame, frame_addr, &address_of_code_object ,&next_frame_addr) < 0) {
|
||||
if (parse_frame_object(unwinder, &frame, frame_addr, &address_of_code_object, &next_frame_addr) < 0) {
|
||||
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse frame object in chain");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (frame == NULL && PyList_GET_SIZE(frame_info) == 0) {
|
||||
if (frame == NULL && PyList_GET_SIZE(ctx->frame_info) == 0) {
|
||||
const char *e = "Failed to parse initial frame in chain";
|
||||
PyErr_SetString(PyExc_RuntimeError, e);
|
||||
return -1;
|
||||
}
|
||||
PyObject *extra_frame = NULL;
|
||||
// This frame kicked off the current GC collection:
|
||||
if (unwinder->gc && frame_addr == gc_frame) {
|
||||
if (unwinder->gc && frame_addr == ctx->gc_frame) {
|
||||
_Py_DECLARE_STR(gc, "<GC>");
|
||||
extra_frame = &_Py_STR(gc);
|
||||
}
|
||||
// Otherwise, check for native frames to insert:
|
||||
else if (unwinder->native &&
|
||||
// We've reached an interpreter trampoline frame:
|
||||
frame == NULL &&
|
||||
// Bottommost frame is always native, so skip that one:
|
||||
next_frame_addr &&
|
||||
// Only suppress native frames if GC tracking is enabled and the next frame will be a GC frame:
|
||||
!(unwinder->gc && next_frame_addr == gc_frame))
|
||||
!(unwinder->gc && next_frame_addr == ctx->gc_frame))
|
||||
{
|
||||
_Py_DECLARE_STR(native, "<native>");
|
||||
extra_frame = &_Py_STR(native);
|
||||
}
|
||||
if (extra_frame) {
|
||||
// Use "~" as file, None as location (synthetic frame), None as opcode
|
||||
PyObject *extra_frame_info = make_frame_info(
|
||||
unwinder, _Py_LATIN1_CHR('~'), Py_None, extra_frame, Py_None);
|
||||
if (extra_frame_info == NULL) {
|
||||
return -1;
|
||||
}
|
||||
if (PyList_Append(frame_info, extra_frame_info) < 0) {
|
||||
if (PyList_Append(ctx->frame_info, extra_frame_info) < 0) {
|
||||
Py_DECREF(extra_frame_info);
|
||||
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append extra frame");
|
||||
return -1;
|
||||
}
|
||||
// Extra frames use 0 as address (they're synthetic)
|
||||
if (frame_addrs && *num_addrs < max_addrs) {
|
||||
frame_addrs[(*num_addrs)++] = 0;
|
||||
if (ctx->frame_addrs && ctx->num_addrs < ctx->max_addrs) {
|
||||
assert(ctx->num_addrs >= 0);
|
||||
ctx->frame_addrs[ctx->num_addrs++] = 0;
|
||||
}
|
||||
Py_DECREF(extra_frame_info);
|
||||
}
|
||||
|
|
@ -367,14 +355,14 @@ process_frame_chain(
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (PyList_Append(frame_info, frame) < 0) {
|
||||
if (PyList_Append(ctx->frame_info, frame) < 0) {
|
||||
Py_DECREF(frame);
|
||||
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame");
|
||||
return -1;
|
||||
}
|
||||
// Track the address for this frame
|
||||
if (frame_addrs && *num_addrs < max_addrs) {
|
||||
frame_addrs[(*num_addrs)++] = frame_addr;
|
||||
if (ctx->frame_addrs && ctx->num_addrs < ctx->max_addrs) {
|
||||
assert(ctx->num_addrs >= 0);
|
||||
ctx->frame_addrs[ctx->num_addrs++] = frame_addr;
|
||||
}
|
||||
Py_DECREF(frame);
|
||||
}
|
||||
|
|
@ -383,21 +371,14 @@ process_frame_chain(
|
|||
frame_addr = next_frame_addr;
|
||||
}
|
||||
|
||||
// Validate we reached the base frame (sentinel at bottom of stack)
|
||||
// Only validate if we walked the full chain (didn't stop at cached frame)
|
||||
// and base_frame_addr is provided (non-zero)
|
||||
int stopped_early = stopped_at_cached_frame && *stopped_at_cached_frame;
|
||||
if (!stopped_early && base_frame_addr != 0 && last_frame_addr != base_frame_addr) {
|
||||
if (!ctx->stopped_at_cached_frame && ctx->base_frame_addr != 0 && last_frame_addr != ctx->base_frame_addr) {
|
||||
PyErr_Format(PyExc_RuntimeError,
|
||||
"Incomplete sample: did not reach base frame (expected 0x%lx, got 0x%lx)",
|
||||
base_frame_addr, last_frame_addr);
|
||||
ctx->base_frame_addr, last_frame_addr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Set output parameter for caller (needed for cache validation)
|
||||
if (out_last_frame_addr) {
|
||||
*out_last_frame_addr = last_frame_addr;
|
||||
}
|
||||
ctx->last_frame_visited = last_frame_addr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -410,8 +391,11 @@ clear_last_profiled_frames(RemoteUnwinderObject *unwinder)
|
|||
{
|
||||
uintptr_t current_interp = unwinder->interpreter_addr;
|
||||
uintptr_t zero = 0;
|
||||
const size_t MAX_INTERPRETERS = 256;
|
||||
size_t interp_count = 0;
|
||||
|
||||
while (current_interp != 0) {
|
||||
while (current_interp != 0 && interp_count < MAX_INTERPRETERS) {
|
||||
interp_count++;
|
||||
// Get first thread in this interpreter
|
||||
uintptr_t tstate_addr;
|
||||
if (_Py_RemoteDebug_PagedReadRemoteMemory(
|
||||
|
|
@ -425,7 +409,10 @@ clear_last_profiled_frames(RemoteUnwinderObject *unwinder)
|
|||
}
|
||||
|
||||
// Iterate all threads in this interpreter
|
||||
while (tstate_addr != 0) {
|
||||
const size_t MAX_THREADS_PER_INTERP = 8192;
|
||||
size_t thread_count = 0;
|
||||
while (tstate_addr != 0 && thread_count < MAX_THREADS_PER_INTERP) {
|
||||
thread_count++;
|
||||
// Clear last_profiled_frame
|
||||
uintptr_t lpf_addr = tstate_addr + unwinder->debug_offsets.thread_state.last_profiled_frame;
|
||||
if (_Py_RemoteDebug_WriteRemoteMemory(&unwinder->handle, lpf_addr,
|
||||
|
|
@ -468,16 +455,13 @@ clear_last_profiled_frames(RemoteUnwinderObject *unwinder)
|
|||
static int
|
||||
try_full_cache_hit(
|
||||
RemoteUnwinderObject *unwinder,
|
||||
uintptr_t frame_addr,
|
||||
uintptr_t last_profiled_frame,
|
||||
uint64_t thread_id,
|
||||
PyObject *frame_info)
|
||||
const FrameWalkContext *ctx,
|
||||
uint64_t thread_id)
|
||||
{
|
||||
if (!unwinder->frame_cache || last_profiled_frame == 0) {
|
||||
if (!unwinder->frame_cache || ctx->last_profiled_frame == 0) {
|
||||
return 0;
|
||||
}
|
||||
// Full hit only if current frame == last profiled frame
|
||||
if (frame_addr != last_profiled_frame) {
|
||||
if (ctx->frame_addr != ctx->last_profiled_frame) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -486,22 +470,19 @@ try_full_cache_hit(
|
|||
return 0;
|
||||
}
|
||||
|
||||
// Verify first address matches (sanity check)
|
||||
if (entry->num_addrs == 0 || entry->addrs[0] != frame_addr) {
|
||||
if (entry->num_addrs == 0 || entry->addrs[0] != ctx->frame_addr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Always read the current frame from memory to get updated line number
|
||||
PyObject *current_frame = NULL;
|
||||
uintptr_t code_object_addr = 0;
|
||||
uintptr_t previous_frame = 0;
|
||||
int parse_result = parse_frame_object(unwinder, ¤t_frame, frame_addr,
|
||||
int parse_result = parse_frame_object(unwinder, ¤t_frame, ctx->frame_addr,
|
||||
&code_object_addr, &previous_frame);
|
||||
if (parse_result < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Get cached parent frames first (before modifying frame_info)
|
||||
Py_ssize_t cached_size = PyList_GET_SIZE(entry->frame_list);
|
||||
PyObject *parent_slice = NULL;
|
||||
if (cached_size > 1) {
|
||||
|
|
@ -512,9 +493,8 @@ try_full_cache_hit(
|
|||
}
|
||||
}
|
||||
|
||||
// Now safe to modify frame_info - add current frame if valid
|
||||
if (current_frame != NULL) {
|
||||
if (PyList_Append(frame_info, current_frame) < 0) {
|
||||
if (PyList_Append(ctx->frame_info, current_frame) < 0) {
|
||||
Py_DECREF(current_frame);
|
||||
Py_XDECREF(parent_slice);
|
||||
return -1;
|
||||
|
|
@ -523,10 +503,9 @@ try_full_cache_hit(
|
|||
STATS_ADD(unwinder, frames_read_from_memory, 1);
|
||||
}
|
||||
|
||||
// Extend with cached parent frames
|
||||
if (parent_slice) {
|
||||
Py_ssize_t cur_size = PyList_GET_SIZE(frame_info);
|
||||
int result = PyList_SetSlice(frame_info, cur_size, cur_size, parent_slice);
|
||||
Py_ssize_t cur_size = PyList_GET_SIZE(ctx->frame_info);
|
||||
int result = PyList_SetSlice(ctx->frame_info, cur_size, cur_size, parent_slice);
|
||||
Py_DECREF(parent_slice);
|
||||
if (result < 0) {
|
||||
return -1;
|
||||
|
|
@ -543,72 +522,67 @@ try_full_cache_hit(
|
|||
int
|
||||
collect_frames_with_cache(
|
||||
RemoteUnwinderObject *unwinder,
|
||||
uintptr_t frame_addr,
|
||||
StackChunkList *chunks,
|
||||
PyObject *frame_info,
|
||||
uintptr_t base_frame_addr,
|
||||
uintptr_t gc_frame,
|
||||
uintptr_t last_profiled_frame,
|
||||
FrameWalkContext *ctx,
|
||||
uint64_t thread_id)
|
||||
{
|
||||
// Fast path: check for full cache hit first (no allocations needed)
|
||||
int full_hit = try_full_cache_hit(unwinder, frame_addr, last_profiled_frame,
|
||||
thread_id, frame_info);
|
||||
int full_hit = try_full_cache_hit(unwinder, ctx, thread_id);
|
||||
if (full_hit != 0) {
|
||||
return full_hit < 0 ? -1 : 0; // Either error or success
|
||||
return full_hit < 0 ? -1 : 0;
|
||||
}
|
||||
|
||||
uintptr_t addrs[FRAME_CACHE_MAX_FRAMES];
|
||||
Py_ssize_t num_addrs = 0;
|
||||
Py_ssize_t frames_before = PyList_GET_SIZE(frame_info);
|
||||
uintptr_t last_frame_visited = 0;
|
||||
Py_ssize_t frames_before = PyList_GET_SIZE(ctx->frame_info);
|
||||
|
||||
int stopped_at_cached = 0;
|
||||
if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, base_frame_addr, gc_frame,
|
||||
last_profiled_frame, &stopped_at_cached,
|
||||
addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES,
|
||||
&last_frame_visited) < 0) {
|
||||
if (process_frame_chain(unwinder, ctx) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Track frames read from memory (frames added by process_frame_chain)
|
||||
STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(frame_info) - frames_before);
|
||||
STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(ctx->frame_info) - frames_before);
|
||||
|
||||
// If stopped at cached frame, extend with cached continuation (both frames and addresses)
|
||||
if (stopped_at_cached) {
|
||||
Py_ssize_t frames_before_cache = PyList_GET_SIZE(frame_info);
|
||||
int cache_result = frame_cache_lookup_and_extend(unwinder, thread_id, last_profiled_frame,
|
||||
frame_info, addrs, &num_addrs,
|
||||
FRAME_CACHE_MAX_FRAMES);
|
||||
if (ctx->stopped_at_cached_frame) {
|
||||
Py_ssize_t frames_before_cache = PyList_GET_SIZE(ctx->frame_info);
|
||||
int cache_result = frame_cache_lookup_and_extend(unwinder, thread_id, ctx->last_profiled_frame,
|
||||
ctx->frame_info, ctx->frame_addrs, &ctx->num_addrs,
|
||||
ctx->max_addrs);
|
||||
if (cache_result < 0) {
|
||||
return -1;
|
||||
}
|
||||
if (cache_result == 0) {
|
||||
// Cache miss - continue walking from last_profiled_frame to get the rest
|
||||
STATS_INC(unwinder, frame_cache_misses);
|
||||
Py_ssize_t frames_before_walk = PyList_GET_SIZE(frame_info);
|
||||
if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, base_frame_addr, gc_frame,
|
||||
0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES,
|
||||
&last_frame_visited) < 0) {
|
||||
Py_ssize_t frames_before_walk = PyList_GET_SIZE(ctx->frame_info);
|
||||
|
||||
FrameWalkContext continue_ctx = {
|
||||
.frame_addr = ctx->last_profiled_frame,
|
||||
.base_frame_addr = ctx->base_frame_addr,
|
||||
.gc_frame = ctx->gc_frame,
|
||||
.last_profiled_frame = 0,
|
||||
.chunks = ctx->chunks,
|
||||
.frame_info = ctx->frame_info,
|
||||
.frame_addrs = ctx->frame_addrs,
|
||||
.num_addrs = ctx->num_addrs,
|
||||
.max_addrs = ctx->max_addrs,
|
||||
};
|
||||
if (process_frame_chain(unwinder, &continue_ctx) < 0) {
|
||||
return -1;
|
||||
}
|
||||
STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(frame_info) - frames_before_walk);
|
||||
ctx->num_addrs = continue_ctx.num_addrs;
|
||||
ctx->last_frame_visited = continue_ctx.last_frame_visited;
|
||||
|
||||
STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(ctx->frame_info) - frames_before_walk);
|
||||
} else {
|
||||
// Partial cache hit - cache was validated when stored, so we trust it
|
||||
// Partial cache hit - cached stack was validated as complete when stored,
|
||||
// so set last_frame_visited to base_frame_addr for validation in frame_cache_store
|
||||
ctx->last_frame_visited = ctx->base_frame_addr;
|
||||
STATS_INC(unwinder, frame_cache_partial_hits);
|
||||
STATS_ADD(unwinder, frames_read_from_cache, PyList_GET_SIZE(frame_info) - frames_before_cache);
|
||||
STATS_ADD(unwinder, frames_read_from_cache, PyList_GET_SIZE(ctx->frame_info) - frames_before_cache);
|
||||
}
|
||||
} else {
|
||||
if (last_profiled_frame == 0) {
|
||||
// No cache involvement (no last_profiled_frame or cache disabled)
|
||||
if (ctx->last_profiled_frame == 0) {
|
||||
STATS_INC(unwinder, frame_cache_misses);
|
||||
}
|
||||
}
|
||||
|
||||
// Store in cache - frame_cache_store validates internally that we have a
|
||||
// complete stack (reached base_frame_addr) before actually storing
|
||||
if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs,
|
||||
base_frame_addr, last_frame_visited) < 0) {
|
||||
if (frame_cache_store(unwinder, thread_id, ctx->frame_info, ctx->frame_addrs, ctx->num_addrs,
|
||||
ctx->base_frame_addr, ctx->last_frame_visited) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,8 @@ iterate_threads(
|
|||
) {
|
||||
uintptr_t thread_state_addr;
|
||||
unsigned long tid = 0;
|
||||
const size_t MAX_THREADS = 8192;
|
||||
size_t thread_count = 0;
|
||||
|
||||
if (0 > _Py_RemoteDebug_PagedReadRemoteMemory(
|
||||
&unwinder->handle,
|
||||
|
|
@ -34,7 +36,8 @@ iterate_threads(
|
|||
return -1;
|
||||
}
|
||||
|
||||
while (thread_state_addr != 0) {
|
||||
while (thread_state_addr != 0 && thread_count < MAX_THREADS) {
|
||||
thread_count++;
|
||||
if (0 > _Py_RemoteDebug_PagedReadRemoteMemory(
|
||||
&unwinder->handle,
|
||||
thread_state_addr + (uintptr_t)unwinder->debug_offsets.thread_state.native_thread_id,
|
||||
|
|
@ -425,12 +428,24 @@ unwind_stack_for_thread(
|
|||
}
|
||||
}
|
||||
|
||||
uintptr_t addrs[FRAME_CACHE_MAX_FRAMES];
|
||||
FrameWalkContext ctx = {
|
||||
.frame_addr = frame_addr,
|
||||
.base_frame_addr = base_frame_addr,
|
||||
.gc_frame = gc_frame,
|
||||
.chunks = &chunks,
|
||||
.frame_info = frame_info,
|
||||
.frame_addrs = addrs,
|
||||
.num_addrs = 0,
|
||||
.max_addrs = FRAME_CACHE_MAX_FRAMES,
|
||||
};
|
||||
assert(ctx.max_addrs == FRAME_CACHE_MAX_FRAMES);
|
||||
|
||||
if (unwinder->cache_frames) {
|
||||
// Use cache to avoid re-reading unchanged parent frames
|
||||
uintptr_t last_profiled_frame = GET_MEMBER(uintptr_t, ts,
|
||||
ctx.last_profiled_frame = GET_MEMBER(uintptr_t, ts,
|
||||
unwinder->debug_offsets.thread_state.last_profiled_frame);
|
||||
if (collect_frames_with_cache(unwinder, frame_addr, &chunks, frame_info,
|
||||
base_frame_addr, gc_frame, last_profiled_frame, tid) < 0) {
|
||||
if (collect_frames_with_cache(unwinder, &ctx, tid) < 0) {
|
||||
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to collect frames");
|
||||
goto error;
|
||||
}
|
||||
|
|
@ -443,8 +458,7 @@ unwind_stack_for_thread(
|
|||
}
|
||||
} else {
|
||||
// No caching - process entire frame chain with base_frame validation
|
||||
if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info,
|
||||
base_frame_addr, gc_frame, 0, NULL, NULL, NULL, 0, NULL) < 0) {
|
||||
if (process_frame_chain(unwinder, &ctx) < 0) {
|
||||
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain");
|
||||
goto error;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue