mirror of
https://github.com/python/cpython.git
synced 2025-12-23 09:19:18 +00:00
gh-138122: Validate base frame before caching in remote debugging frame cache (#142852)
This commit is contained in:
parent
2b466c47c3
commit
568a819f67
6 changed files with 152 additions and 17 deletions
|
|
@ -863,3 +863,98 @@ asyncio.run(supervisor())
|
|||
self.assertGreater(cpu_percentage, 90.0,
|
||||
f"cpu_leaf should dominate samples in 'running' mode, "
|
||||
f"got {cpu_percentage:.1f}% ({cpu_leaf_samples}/{total})")
|
||||
|
||||
|
||||
def _generate_deep_generators_script(chain_depth=20, recurse_depth=150):
|
||||
"""Generate a script with deep nested generators for stress testing."""
|
||||
lines = [
|
||||
'import sys',
|
||||
'sys.setrecursionlimit(5000)',
|
||||
'',
|
||||
]
|
||||
# Generate chain of yield-from functions
|
||||
for i in range(chain_depth - 1):
|
||||
lines.extend([
|
||||
f'def deep_yield_chain_{i}(n):',
|
||||
f' yield ("L{i}", n)',
|
||||
f' yield from deep_yield_chain_{i + 1}(n)',
|
||||
'',
|
||||
])
|
||||
# Last chain function calls recursive_diver
|
||||
lines.extend([
|
||||
f'def deep_yield_chain_{chain_depth - 1}(n):',
|
||||
f' yield ("L{chain_depth - 1}", n)',
|
||||
f' yield from recursive_diver(n, {chain_depth})',
|
||||
'',
|
||||
'def recursive_diver(n, depth):',
|
||||
' yield (f"DIVE_{depth}", n)',
|
||||
f' if depth < {recurse_depth}:',
|
||||
' yield from recursive_diver(n, depth + 1)',
|
||||
' else:',
|
||||
' for i in range(5):',
|
||||
' yield (f"BOTTOM_{depth}", i)',
|
||||
'',
|
||||
'def oscillating_generator(iterations=1000):',
|
||||
' for i in range(iterations):',
|
||||
' yield ("OSCILLATE", i)',
|
||||
' yield from deep_yield_chain_0(i)',
|
||||
'',
|
||||
'def run_forever():',
|
||||
' while True:',
|
||||
' for _ in oscillating_generator(10):',
|
||||
' pass',
|
||||
'',
|
||||
'_test_sock.sendall(b"working")',
|
||||
'run_forever()',
|
||||
])
|
||||
return '\n'.join(lines)
|
||||
|
||||
|
||||
@requires_remote_subprocess_debugging()
|
||||
class TestDeepGeneratorFrameCache(unittest.TestCase):
|
||||
"""Test frame cache consistency with deep oscillating generator stacks."""
|
||||
|
||||
def test_all_stacks_share_same_base_frame(self):
|
||||
"""Verify all sampled stacks reach the entry point function.
|
||||
|
||||
When profiling deep generators that oscillate up and down the call
|
||||
stack, every sample should include the entry point function
|
||||
(run_forever) in its call chain. If the frame cache stores
|
||||
incomplete stacks, some samples will be missing this base function,
|
||||
causing broken flamegraphs.
|
||||
"""
|
||||
script = _generate_deep_generators_script()
|
||||
with test_subprocess(script, wait_for_working=True) as subproc:
|
||||
collector = CollapsedStackCollector(sample_interval_usec=1, skip_idle=False)
|
||||
|
||||
with (
|
||||
io.StringIO() as captured_output,
|
||||
mock.patch("sys.stdout", captured_output),
|
||||
):
|
||||
profiling.sampling.sample.sample(
|
||||
subproc.process.pid,
|
||||
collector,
|
||||
duration_sec=2,
|
||||
)
|
||||
|
||||
samples_with_entry_point = 0
|
||||
samples_without_entry_point = 0
|
||||
total_samples = 0
|
||||
|
||||
for (call_tree, _thread_id), count in collector.stack_counter.items():
|
||||
total_samples += count
|
||||
if call_tree:
|
||||
has_entry_point = call_tree and call_tree[0][2] == "<module>"
|
||||
if has_entry_point:
|
||||
samples_with_entry_point += count
|
||||
else:
|
||||
samples_without_entry_point += count
|
||||
|
||||
self.assertGreater(total_samples, 100,
|
||||
f"Expected at least 100 samples, got {total_samples}")
|
||||
|
||||
self.assertEqual(samples_without_entry_point, 0,
|
||||
f"Found {samples_without_entry_point}/{total_samples} samples "
|
||||
f"missing the entry point function 'run_forever'. This indicates "
|
||||
f"incomplete stacks are being returned, likely due to frame cache "
|
||||
f"storing partial stack traces.")
|
||||
|
|
|
|||
|
|
@ -0,0 +1,4 @@
|
|||
Fix incomplete stack traces in the Tachyon profiler's frame cache when
|
||||
profiling code with deeply nested generators. The frame cache now validates
|
||||
that stack traces reach the base frame before caching, preventing broken
|
||||
flamegraphs. Patch by Pablo Galindo.
|
||||
|
|
@ -429,7 +429,8 @@ extern int process_frame_chain(
|
|||
int *stopped_at_cached_frame,
|
||||
uintptr_t *frame_addrs,
|
||||
Py_ssize_t *num_addrs,
|
||||
Py_ssize_t max_addrs
|
||||
Py_ssize_t max_addrs,
|
||||
uintptr_t *out_last_frame_addr
|
||||
);
|
||||
|
||||
/* Frame cache functions */
|
||||
|
|
@ -447,18 +448,22 @@ extern int frame_cache_lookup_and_extend(
|
|||
Py_ssize_t *num_addrs,
|
||||
Py_ssize_t max_addrs);
|
||||
// Returns: 1 = stored, 0 = not stored (graceful), -1 = error
|
||||
// Only stores complete stacks that reach base_frame_addr
|
||||
extern int frame_cache_store(
|
||||
RemoteUnwinderObject *unwinder,
|
||||
uint64_t thread_id,
|
||||
PyObject *frame_list,
|
||||
const uintptr_t *addrs,
|
||||
Py_ssize_t num_addrs);
|
||||
Py_ssize_t num_addrs,
|
||||
uintptr_t base_frame_addr,
|
||||
uintptr_t last_frame_visited);
|
||||
|
||||
extern int collect_frames_with_cache(
|
||||
RemoteUnwinderObject *unwinder,
|
||||
uintptr_t frame_addr,
|
||||
StackChunkList *chunks,
|
||||
PyObject *frame_info,
|
||||
uintptr_t base_frame_addr,
|
||||
uintptr_t gc_frame,
|
||||
uintptr_t last_profiled_frame,
|
||||
uint64_t thread_id);
|
||||
|
|
|
|||
|
|
@ -194,6 +194,7 @@ frame_cache_lookup_and_extend(
|
|||
}
|
||||
|
||||
// Store frame list with addresses in cache
|
||||
// Only stores complete stacks that reach base_frame_addr (validation done internally)
|
||||
// Returns: 1 = stored successfully, 0 = not stored (graceful degradation), -1 = error
|
||||
int
|
||||
frame_cache_store(
|
||||
|
|
@ -201,12 +202,25 @@ frame_cache_store(
|
|||
uint64_t thread_id,
|
||||
PyObject *frame_list,
|
||||
const uintptr_t *addrs,
|
||||
Py_ssize_t num_addrs)
|
||||
Py_ssize_t num_addrs,
|
||||
uintptr_t base_frame_addr,
|
||||
uintptr_t last_frame_visited)
|
||||
{
|
||||
if (!unwinder->frame_cache || thread_id == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Validate we have a complete stack before caching.
|
||||
// Only cache if last_frame_visited matches base_frame_addr (the sentinel
|
||||
// at the bottom of the stack). Note: we use last_frame_visited rather than
|
||||
// addrs[num_addrs-1] because the base frame is visited but not added to the
|
||||
// addrs array (it returns frame==NULL from is_frame_valid due to
|
||||
// owner==FRAME_OWNED_BY_INTERPRETER).
|
||||
if (base_frame_addr != 0 && last_frame_visited != base_frame_addr) {
|
||||
// Incomplete stack - don't cache (graceful degradation)
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Clamp to max frames
|
||||
if (num_addrs > FRAME_CACHE_MAX_FRAMES) {
|
||||
num_addrs = FRAME_CACHE_MAX_FRAMES;
|
||||
|
|
|
|||
|
|
@ -265,7 +265,8 @@ process_frame_chain(
|
|||
int *stopped_at_cached_frame,
|
||||
uintptr_t *frame_addrs, // optional: C array to receive frame addresses
|
||||
Py_ssize_t *num_addrs, // in/out: current count / updated count
|
||||
Py_ssize_t max_addrs) // max capacity of frame_addrs array
|
||||
Py_ssize_t max_addrs, // max capacity of frame_addrs array
|
||||
uintptr_t *out_last_frame_addr) // optional: receives last frame address visited
|
||||
{
|
||||
uintptr_t frame_addr = initial_frame_addr;
|
||||
uintptr_t prev_frame_addr = 0;
|
||||
|
|
@ -273,10 +274,13 @@ process_frame_chain(
|
|||
const size_t MAX_FRAMES = 1024 + 512;
|
||||
size_t frame_count = 0;
|
||||
|
||||
// Initialize output flag
|
||||
// Initialize output parameters
|
||||
if (stopped_at_cached_frame) {
|
||||
*stopped_at_cached_frame = 0;
|
||||
}
|
||||
if (out_last_frame_addr) {
|
||||
*out_last_frame_addr = 0;
|
||||
}
|
||||
|
||||
// Quick check: if current_frame == last_profiled_frame, entire stack is unchanged
|
||||
if (last_profiled_frame != 0 && initial_frame_addr == last_profiled_frame) {
|
||||
|
|
@ -390,6 +394,11 @@ process_frame_chain(
|
|||
return -1;
|
||||
}
|
||||
|
||||
// Set output parameter for caller (needed for cache validation)
|
||||
if (out_last_frame_addr) {
|
||||
*out_last_frame_addr = last_frame_addr;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -537,6 +546,7 @@ collect_frames_with_cache(
|
|||
uintptr_t frame_addr,
|
||||
StackChunkList *chunks,
|
||||
PyObject *frame_info,
|
||||
uintptr_t base_frame_addr,
|
||||
uintptr_t gc_frame,
|
||||
uintptr_t last_profiled_frame,
|
||||
uint64_t thread_id)
|
||||
|
|
@ -551,11 +561,13 @@ collect_frames_with_cache(
|
|||
uintptr_t addrs[FRAME_CACHE_MAX_FRAMES];
|
||||
Py_ssize_t num_addrs = 0;
|
||||
Py_ssize_t frames_before = PyList_GET_SIZE(frame_info);
|
||||
uintptr_t last_frame_visited = 0;
|
||||
|
||||
int stopped_at_cached = 0;
|
||||
if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, 0, gc_frame,
|
||||
if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, base_frame_addr, gc_frame,
|
||||
last_profiled_frame, &stopped_at_cached,
|
||||
addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
|
||||
addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES,
|
||||
&last_frame_visited) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
@ -575,23 +587,28 @@ collect_frames_with_cache(
|
|||
// Cache miss - continue walking from last_profiled_frame to get the rest
|
||||
STATS_INC(unwinder, frame_cache_misses);
|
||||
Py_ssize_t frames_before_walk = PyList_GET_SIZE(frame_info);
|
||||
if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, 0, gc_frame,
|
||||
0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
|
||||
if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, base_frame_addr, gc_frame,
|
||||
0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES,
|
||||
&last_frame_visited) < 0) {
|
||||
return -1;
|
||||
}
|
||||
STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(frame_info) - frames_before_walk);
|
||||
} else {
|
||||
// Partial cache hit
|
||||
// Partial cache hit - cache was validated when stored, so we trust it
|
||||
STATS_INC(unwinder, frame_cache_partial_hits);
|
||||
STATS_ADD(unwinder, frames_read_from_cache, PyList_GET_SIZE(frame_info) - frames_before_cache);
|
||||
}
|
||||
} else if (last_profiled_frame == 0) {
|
||||
// No cache involvement (no last_profiled_frame or cache disabled)
|
||||
STATS_INC(unwinder, frame_cache_misses);
|
||||
} else {
|
||||
if (last_profiled_frame == 0) {
|
||||
// No cache involvement (no last_profiled_frame or cache disabled)
|
||||
STATS_INC(unwinder, frame_cache_misses);
|
||||
}
|
||||
}
|
||||
|
||||
// Store in cache (frame_cache_store handles truncation if num_addrs > FRAME_CACHE_MAX_FRAMES)
|
||||
if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs) < 0) {
|
||||
// Store in cache - frame_cache_store validates internally that we have a
|
||||
// complete stack (reached base_frame_addr) before actually storing
|
||||
if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs,
|
||||
base_frame_addr, last_frame_visited) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -430,7 +430,7 @@ unwind_stack_for_thread(
|
|||
uintptr_t last_profiled_frame = GET_MEMBER(uintptr_t, ts,
|
||||
unwinder->debug_offsets.thread_state.last_profiled_frame);
|
||||
if (collect_frames_with_cache(unwinder, frame_addr, &chunks, frame_info,
|
||||
gc_frame, last_profiled_frame, tid) < 0) {
|
||||
base_frame_addr, gc_frame, last_profiled_frame, tid) < 0) {
|
||||
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to collect frames");
|
||||
goto error;
|
||||
}
|
||||
|
|
@ -444,7 +444,7 @@ unwind_stack_for_thread(
|
|||
} else {
|
||||
// No caching - process entire frame chain with base_frame validation
|
||||
if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info,
|
||||
base_frame_addr, gc_frame, 0, NULL, NULL, NULL, 0) < 0) {
|
||||
base_frame_addr, gc_frame, 0, NULL, NULL, NULL, 0, NULL) < 0) {
|
||||
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain");
|
||||
goto error;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue