Skip to content

Commit 224dede

Browse files
committed
gh-138122: Validate base frame before caching in remote debugging frame cache
The frame cache in the remote debugging module was storing frame chains without validating that they reached the base frame. This could happen when a frame chain was interrupted or when the process state changed during reading, resulting in incomplete stacks being cached. Subsequent samples that hit the cache would then produce flamegraphs that didn't reach the bottom of the call stack. The fix passes base_frame_addr through to process_frame_chain() which already has validation logic to ensure the frame walk terminates at the expected sentinel frame. By enabling this validation in the caching code path and tracking whether we've confirmed reaching the base frame, we now only store complete frame chains in the cache. When extending from cached data, we trust that the cached frames were already validated at storage time, maintaining the invariant that cached stacks are always complete. An integration test using deeply nested generators that oscillate the stack depth is added to verify that all sampled stacks contain the entry point function. This catches regressions where incomplete stacks might be cached and returned.
1 parent 790a46a commit 224dede

File tree

5 files changed

+126
-10
lines changed

5 files changed

+126
-10
lines changed

‎Lib/test/test_profiling/test_sampling_profiler/test_integration.py‎

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,3 +863,100 @@ def test_async_aware_running_sees_only_cpu_task(self):
863863
self.assertGreater(cpu_percentage, 90.0,
864864
f"cpu_leaf should dominate samples in 'running' mode, "
865865
f"got {cpu_percentage:.1f}% ({cpu_leaf_samples}/{total})")
866+
867+
868+
def _generate_deep_generators_script(chain_depth=20, recurse_depth=150):
869+
"""Generate a script with deep nested generators for stress testing."""
870+
lines = [
871+
'import sys',
872+
'sys.setrecursionlimit(5000)',
873+
'',
874+
]
875+
# Generate chain of yield-from functions
876+
for i in range(chain_depth - 1):
877+
lines.extend([
878+
f'def deep_yield_chain_{i}(n):',
879+
f' yield ("L{i}", n)',
880+
f' yield from deep_yield_chain_{i + 1}(n)',
881+
'',
882+
])
883+
# Last chain function calls recursive_diver
884+
lines.extend([
885+
f'def deep_yield_chain_{chain_depth - 1}(n):',
886+
f' yield ("L{chain_depth - 1}", n)',
887+
f' yield from recursive_diver(n, {chain_depth})',
888+
'',
889+
'def recursive_diver(n, depth):',
890+
' yield (f"DIVE_{depth}", n)',
891+
f' if depth < {recurse_depth}:',
892+
' yield from recursive_diver(n, depth + 1)',
893+
' else:',
894+
' for i in range(5):',
895+
' yield (f"BOTTOM_{depth}", i)',
896+
'',
897+
'def oscillating_generator(iterations=1000):',
898+
' for i in range(iterations):',
899+
' yield ("OSCILLATE", i)',
900+
' yield from deep_yield_chain_0(i)',
901+
'',
902+
'def run_forever():',
903+
' while True:',
904+
' for _ in oscillating_generator(10):',
905+
' pass',
906+
'',
907+
'_test_sock.sendall(b"working")',
908+
'run_forever()',
909+
])
910+
return '\n'.join(lines)
911+
912+
913+
@requires_remote_subprocess_debugging()
914+
class TestDeepGeneratorFrameCache(unittest.TestCase):
915+
"""Test frame cache consistency with deep oscillating generator stacks."""
916+
917+
def test_all_stacks_share_same_base_frame(self):
918+
"""Verify all sampled stacks reach the entry point function.
919+
920+
When profiling deep generators that oscillate up and down the call
921+
stack, every sample should include the entry point function
922+
(run_forever) in its call chain. If the frame cache stores
923+
incomplete stacks, some samples will be missing this base function,
924+
causing broken flamegraphs.
925+
"""
926+
script = _generate_deep_generators_script()
927+
with test_subprocess(script, wait_for_working=True) as subproc:
928+
collector = CollapsedStackCollector(sample_interval_usec=1, skip_idle=False)
929+
930+
with (
931+
io.StringIO() as captured_output,
932+
mock.patch("sys.stdout", captured_output),
933+
):
934+
profiling.sampling.sample.sample(
935+
subproc.process.pid,
936+
collector,
937+
duration_sec=2,
938+
)
939+
940+
samples_with_entry_point = 0
941+
samples_without_entry_point = 0
942+
total_samples = 0
943+
944+
for (call_tree, _thread_id), count in collector.stack_counter.items():
945+
total_samples += count
946+
if call_tree:
947+
has_entry_point = any(
948+
frame[2] == "run_forever" for frame in call_tree
949+
)
950+
if has_entry_point:
951+
samples_with_entry_point += count
952+
else:
953+
samples_without_entry_point += count
954+
955+
self.assertGreater(total_samples, 100,
956+
f"Expected at least 100 samples, got {total_samples}")
957+
958+
self.assertEqual(samples_without_entry_point, 0,
959+
f"Found {samples_without_entry_point}/{total_samples} samples "
960+
f"missing the entry point function 'run_forever'. This indicates "
961+
f"incomplete stacks are being returned, likely due to frame cache "
962+
f"storing partial stack traces.")
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix incomplete stack traces in the Tachyon profiler's frame cache when
2+
profiling code with deeply nested generators. The frame cache now validates
3+
that stack traces reach the base frame before caching, preventing broken
4+
flamegraphs. Patch by Pablo Galindo.

‎Modules/_remote_debugging/_remote_debugging.h‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,7 @@ extern int collect_frames_with_cache(
459459
uintptr_t frame_addr,
460460
StackChunkList *chunks,
461461
PyObject *frame_info,
462+
uintptr_t base_frame_addr,
462463
uintptr_t gc_frame,
463464
uintptr_t last_profiled_frame,
464465
uint64_t thread_id);

‎Modules/_remote_debugging/frames.c‎

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,7 @@ collect_frames_with_cache(
537537
uintptr_t frame_addr,
538538
StackChunkList *chunks,
539539
PyObject *frame_info,
540+
uintptr_t base_frame_addr,
540541
uintptr_t gc_frame,
541542
uintptr_t last_profiled_frame,
542543
uint64_t thread_id)
@@ -552,8 +553,11 @@ collect_frames_with_cache(
552553
Py_ssize_t num_addrs = 0;
553554
Py_ssize_t frames_before = PyList_GET_SIZE(frame_info);
554555

556+
// Track whether we've validated reaching the base frame (either directly or via cache)
557+
int reached_base_frame = 0;
558+
555559
int stopped_at_cached = 0;
556-
if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, 0, gc_frame,
560+
if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, base_frame_addr, gc_frame,
557561
last_profiled_frame, &stopped_at_cached,
558562
addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
559563
return -1;
@@ -575,24 +579,34 @@ collect_frames_with_cache(
575579
// Cache miss - continue walking from last_profiled_frame to get the rest
576580
STATS_INC(unwinder, frame_cache_misses);
577581
Py_ssize_t frames_before_walk = PyList_GET_SIZE(frame_info);
578-
if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, 0, gc_frame,
582+
if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, base_frame_addr, gc_frame,
579583
0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
580584
return -1;
581585
}
582586
STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(frame_info) - frames_before_walk);
587+
// We walked to base frame (process_frame_chain validated it)
588+
reached_base_frame = 1;
583589
} else {
584-
// Partial cache hit
590+
// Partial cache hit - cache was validated when stored, so we trust it
585591
STATS_INC(unwinder, frame_cache_partial_hits);
586592
STATS_ADD(unwinder, frames_read_from_cache, PyList_GET_SIZE(frame_info) - frames_before_cache);
593+
reached_base_frame = 1;
594+
}
595+
} else {
596+
// Walked entire chain without stopping at cache - process_frame_chain validated base frame
597+
reached_base_frame = 1;
598+
if (last_profiled_frame == 0) {
599+
// No cache involvement (no last_profiled_frame or cache disabled)
600+
STATS_INC(unwinder, frame_cache_misses);
587601
}
588-
} else if (last_profiled_frame == 0) {
589-
// No cache involvement (no last_profiled_frame or cache disabled)
590-
STATS_INC(unwinder, frame_cache_misses);
591602
}
592603

593-
// Store in cache (frame_cache_store handles truncation if num_addrs > FRAME_CACHE_MAX_FRAMES)
594-
if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs) < 0) {
595-
return -1;
604+
// Only store in cache if we reached the base frame (complete stack)
605+
// This prevents caching incomplete stacks that would produce broken flamegraphs
606+
if (reached_base_frame) {
607+
if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs) < 0) {
608+
return -1;
609+
}
596610
}
597611

598612
return 0;

‎Modules/_remote_debugging/threads.c‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ unwind_stack_for_thread(
430430
uintptr_t last_profiled_frame = GET_MEMBER(uintptr_t, ts,
431431
unwinder->debug_offsets.thread_state.last_profiled_frame);
432432
if (collect_frames_with_cache(unwinder, frame_addr, &chunks, frame_info,
433-
gc_frame, last_profiled_frame, tid) < 0) {
433+
base_frame_addr, gc_frame, last_profiled_frame, tid) < 0) {
434434
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to collect frames");
435435
goto error;
436436
}

0 commit comments

Comments
 (0)