Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Implement mark's idea
  • Loading branch information
pablogsal committed Nov 25, 2025
commit 111e70cb23643b6b917de4c83e125658a293f3ce
10 changes: 6 additions & 4 deletions Include/cpython/pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,12 @@ struct _ts {
/* Pointer to currently executing frame. */
struct _PyInterpreterFrame *current_frame;

/* Pointer to the entry/bottommost frame of the current call stack.
* This is the frame that was entered when starting execution.
* Used by profiling/sampling to detect incomplete stack traces. */
struct _PyInterpreterFrame *entry_frame;
/* Pointer to the base frame (bottommost sentinel frame).
Used by profilers to validate complete stack unwinding.
Points to the embedded base_frame in _PyThreadStateImpl.
The frame is embedded there rather than here because _PyInterpreterFrame
is defined in internal headers that cannot be exposed in the public API. */
struct _PyInterpreterFrame *base_frame;

Py_tracefunc c_profilefunc;
Py_tracefunc c_tracefunc;
Expand Down
4 changes: 2 additions & 2 deletions Include/internal/pycore_debug_offsets.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ typedef struct _Py_DebugOffsets {
uint64_t next;
uint64_t interp;
uint64_t current_frame;
uint64_t entry_frame;
uint64_t base_frame;
uint64_t thread_id;
uint64_t native_thread_id;
uint64_t datastack_chunk;
Expand Down Expand Up @@ -273,7 +273,7 @@ typedef struct _Py_DebugOffsets {
.next = offsetof(PyThreadState, next), \
.interp = offsetof(PyThreadState, interp), \
.current_frame = offsetof(PyThreadState, current_frame), \
.entry_frame = offsetof(PyThreadState, entry_frame), \
.base_frame = offsetof(PyThreadState, base_frame), \
.thread_id = offsetof(PyThreadState, thread_id), \
.native_thread_id = offsetof(PyThreadState, native_thread_id), \
.datastack_chunk = offsetof(PyThreadState, datastack_chunk), \
Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_interpframe_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ enum _frameowner {
FRAME_OWNED_BY_GENERATOR = 1,
FRAME_OWNED_BY_FRAME_OBJECT = 2,
FRAME_OWNED_BY_INTERPRETER = 3,
FRAME_OWNED_BY_THREAD_STATE = 4, /* Sentinel base frame in thread state */
};

struct _PyInterpreterFrame {
Expand Down
5 changes: 5 additions & 0 deletions Include/internal/pycore_tstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ extern "C" {

#include "pycore_brc.h" // struct _brc_thread_state
#include "pycore_freelist_state.h" // struct _Py_freelists
#include "pycore_interpframe_structs.h" // _PyInterpreterFrame
#include "pycore_mimalloc.h" // struct _mimalloc_thread_state
#include "pycore_qsbr.h" // struct qsbr
#include "pycore_uop.h" // struct _PyUOpInstruction
Expand Down Expand Up @@ -61,6 +62,10 @@ typedef struct _PyThreadStateImpl {
// semi-public fields are in PyThreadState.
PyThreadState base;

// Embedded base frame - sentinel at the bottom of the frame stack.
// Used by profiling/sampling to detect incomplete stack traces.
_PyInterpreterFrame base_frame;

// The reference count field is used to synchronize deallocation of the
// thread state during runtime finalization.
Py_ssize_t refcount;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Add incomplete sample detection to prevent corrupted profiling data. The
interpreter now tracks the base frame (bottommost frame) in each thread's
``PyThreadState.base_frame``, which the profiler uses to validate that
stack unwinding reached the expected bottom. Samples that fail to unwind
completely (due to race conditions, memory corruption, or other errors)
are now rejected rather than being included as spurious single-frame stacks.
Add incomplete sample detection to prevent corrupted profiling data. Each
thread state now contains an embedded base frame (sentinel at the bottom of
the frame stack) with owner type ``FRAME_OWNED_BY_THREAD_STATE``. The profiler
validates that stack unwinding terminates at this sentinel frame. Samples that
fail to reach the base frame (due to race conditions, memory corruption, or
other errors) are now rejected rather than being included as spurious data.
4 changes: 2 additions & 2 deletions Modules/_remote_debugging/_remote_debugging.h
Original file line number Diff line number Diff line change
Expand Up @@ -363,8 +363,8 @@ extern int process_frame_chain(
uintptr_t initial_frame_addr,
StackChunkList *chunks,
PyObject *frame_info,
uintptr_t gc_frame,
uintptr_t base_frame_addr
uintptr_t base_frame_addr,
uintptr_t gc_frame
);

/* ============================================================================
Expand Down
37 changes: 18 additions & 19 deletions Modules/_remote_debugging/frames.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,14 +154,17 @@ is_frame_valid(

void* frame = (void*)frame_addr;

if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) {
char owner = GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner);
if (owner == FRAME_OWNED_BY_INTERPRETER) {
return 0; // C frame
}

if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR
&& GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) {
PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n",
GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner));
if (owner == FRAME_OWNED_BY_THREAD_STATE) {
return 0; // Sentinel base frame - end of stack
}

if (owner != FRAME_OWNED_BY_GENERATOR && owner != FRAME_OWNED_BY_THREAD) {
PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", owner);
set_exception_cause(unwinder, PyExc_RuntimeError, "Unhandled frame owner type in async frame");
return -1;
}
Expand Down Expand Up @@ -258,28 +261,27 @@ process_frame_chain(
uintptr_t initial_frame_addr,
StackChunkList *chunks,
PyObject *frame_info,
uintptr_t gc_frame,
uintptr_t base_frame_addr)
uintptr_t base_frame_addr,
uintptr_t gc_frame)
{
uintptr_t frame_addr = initial_frame_addr;
uintptr_t prev_frame_addr = 0;
uintptr_t last_frame_addr = 0; // Track the last frame we processed
uintptr_t last_frame_addr = 0; // Track last frame visited for validation
const size_t MAX_FRAMES = 1024;
size_t frame_count = 0;

while ((void*)frame_addr != NULL) {
last_frame_addr = frame_addr; // Remember this frame before moving to next
PyObject *frame = NULL;
uintptr_t next_frame_addr = 0;
uintptr_t stackpointer = 0;
last_frame_addr = frame_addr; // Remember this frame address

if (++frame_count > MAX_FRAMES) {
PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)");
set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain iteration limit exceeded");
return -1;
}

// Try chunks first, fallback to direct memory read
if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, &stackpointer, chunks) < 0) {
PyErr_Clear();
uintptr_t address_of_code_object = 0;
Expand Down Expand Up @@ -350,15 +352,12 @@ process_frame_chain(
frame_addr = next_frame_addr;
}

// Validate we reached the base frame if it's set
if (base_frame_addr != 0 && last_frame_addr != 0) {
if (last_frame_addr != base_frame_addr) {
// We didn't reach the expected bottom frame - incomplete sample
PyErr_Format(PyExc_RuntimeError,
"Incomplete sample: reached frame 0x%lx but expected base frame 0x%lx",
last_frame_addr, base_frame_addr);
return -1;
}
// Validate we reached the base frame (sentinel at bottom of stack)
if (last_frame_addr != base_frame_addr) {
PyErr_Format(PyExc_RuntimeError,
"Incomplete sample: did not reach base frame (expected 0x%lx, got 0x%lx)",
base_frame_addr, last_frame_addr);
return -1;
}

return 0;
Expand Down
9 changes: 2 additions & 7 deletions Modules/_remote_debugging/threads.c
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ unwind_stack_for_thread(
}

uintptr_t frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.current_frame);
uintptr_t base_frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.base_frame);

frame_info = PyList_New(0);
if (!frame_info) {
Expand All @@ -388,13 +389,7 @@ unwind_stack_for_thread(
goto error;
}

// Read base_frame for validation
uintptr_t base_frame_addr = 0;
if (unwinder->debug_offsets.thread_state.base_frame != 0) {
base_frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.base_frame);
}

if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info, gc_frame, base_frame_addr) < 0) {
if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info, base_frame_addr, gc_frame) < 0) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain");
goto error;
}
Expand Down
8 changes: 0 additions & 8 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1234,10 +1234,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
entry.frame.previous = tstate->current_frame;
frame->previous = &entry.frame;
tstate->current_frame = frame;
/* Track entry frame for profiling/sampling */
if (entry.frame.previous == NULL) {
tstate->entry_frame = &entry.frame;
}
entry.frame.localsplus[0] = PyStackRef_NULL;
#ifdef _Py_TIER2
if (tstate->current_executor != NULL) {
Expand Down Expand Up @@ -1304,10 +1300,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
assert(frame->owner == FRAME_OWNED_BY_INTERPRETER);
/* Restore previous frame and exit */
tstate->current_frame = frame->previous;
/* Clear entry frame if we're returning to no frame */
if (tstate->current_frame == NULL) {
tstate->entry_frame = NULL;
}
return NULL;
}
#ifdef _Py_TIER2
Expand Down
27 changes: 25 additions & 2 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1482,8 +1482,31 @@ init_threadstate(_PyThreadStateImpl *_tstate,
// This is cleared when PyGILState_Ensure() creates the thread state.
tstate->gilstate_counter = 1;

tstate->current_frame = NULL;
tstate->entry_frame = NULL;
// Initialize the embedded base frame - sentinel at the bottom of the frame stack
_tstate->base_frame.previous = NULL;
_tstate->base_frame.f_executable = PyStackRef_None;
_tstate->base_frame.f_funcobj = PyStackRef_NULL;
_tstate->base_frame.f_globals = NULL;
_tstate->base_frame.f_builtins = NULL;
_tstate->base_frame.f_locals = NULL;
_tstate->base_frame.frame_obj = NULL;
_tstate->base_frame.instr_ptr = NULL;
_tstate->base_frame.stackpointer = _tstate->base_frame.localsplus;
_tstate->base_frame.return_offset = 0;
_tstate->base_frame.owner = FRAME_OWNED_BY_THREAD_STATE;
_tstate->base_frame.visited = 0;
#ifdef Py_DEBUG
_tstate->base_frame.lltrace = 0;
#endif
#ifdef Py_GIL_DISABLED
_tstate->base_frame.tlbc_index = 0;
#endif
_tstate->base_frame.localsplus[0] = PyStackRef_NULL;

// current_frame starts pointing to the base frame
tstate->current_frame = &_tstate->base_frame;
// base_frame pointer for profilers to validate stack unwinding
tstate->base_frame = &_tstate->base_frame;
tstate->datastack_chunk = NULL;
tstate->datastack_top = NULL;
tstate->datastack_limit = NULL;
Expand Down
5 changes: 3 additions & 2 deletions Python/traceback.c
Original file line number Diff line number Diff line change
Expand Up @@ -1035,8 +1035,9 @@ _Py_DumpWideString(int fd, wchar_t *str)
static int
dump_frame(int fd, _PyInterpreterFrame *frame)
{
if (frame->owner == FRAME_OWNED_BY_INTERPRETER) {
/* Ignore trampoline frame */
if (frame->owner == FRAME_OWNED_BY_INTERPRETER ||
frame->owner == FRAME_OWNED_BY_THREAD_STATE) {
/* Ignore trampoline frames and base frame sentinel */
return 0;
}

Expand Down
Loading