mirror of
https://github.com/python/cpython.git
synced 2025-12-23 09:19:18 +00:00
Merge bb4129f83c into f9704f1d84
This commit is contained in:
commit
02a3eef715
6 changed files with 418 additions and 63 deletions
|
|
@ -272,33 +272,85 @@ byte.
|
|||
|
||||
## Frame Table
|
||||
|
||||
The frame table stores deduplicated frame entries:
|
||||
The frame table stores deduplicated frame entries with full source position
|
||||
information and bytecode opcode:
|
||||
|
||||
```
|
||||
+----------------------+
|
||||
| filename_idx: varint |
|
||||
| funcname_idx: varint |
|
||||
| lineno: svarint |
|
||||
+----------------------+ (repeated for each frame)
|
||||
+----------------------------+
|
||||
| filename_idx: varint |
|
||||
| funcname_idx: varint |
|
||||
| lineno: svarint |
|
||||
| end_lineno_delta: svarint |
|
||||
| column: svarint |
|
||||
| end_column_delta: svarint |
|
||||
| opcode: u8 |
|
||||
+----------------------------+ (repeated for each frame)
|
||||
```
|
||||
|
||||
Each unique (filename, funcname, lineno) combination gets one entry. Two
|
||||
calls to the same function at different line numbers produce different
|
||||
frame entries; two calls at the same line number share one entry.
|
||||
### Field Definitions
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| filename_idx | varint | Index into string table for file name |
|
||||
| funcname_idx | varint | Index into string table for function name |
|
||||
| lineno | zigzag varint | Start line number (-1 for synthetic frames) |
|
||||
| end_lineno_delta | zigzag varint | Delta from lineno (end_lineno = lineno + delta) |
|
||||
| column | zigzag varint | Start column offset in UTF-8 bytes (-1 if not available) |
|
||||
| end_column_delta | zigzag varint | Delta from column (end_column = column + delta) |
|
||||
| opcode | u8 | Python bytecode opcode (0-254) or 255 for None |
|
||||
|
||||
### Delta Encoding
|
||||
|
||||
Position end values use delta encoding for efficiency:
|
||||
|
||||
- `end_lineno = lineno + end_lineno_delta`
|
||||
- `end_column = column + end_column_delta`
|
||||
|
||||
Typical values:
|
||||
- `end_lineno_delta`: Usually 0 (single-line expressions) → encodes to 1 byte
|
||||
- `end_column_delta`: Usually 5-20 (expression width) → encodes to 1 byte
|
||||
|
||||
This saves ~1-2 bytes per frame compared to absolute encoding. When the base
|
||||
value (lineno or column) is -1 (not available), the delta is stored as 0 and
|
||||
the reconstructed value is -1.
|
||||
|
||||
### Sentinel Values
|
||||
|
||||
- `opcode = 255`: No opcode captured
|
||||
- `lineno = -1`: Synthetic frame (no source location)
|
||||
- `column = -1`: Column offset not available
|
||||
|
||||
### Deduplication
|
||||
|
||||
Each unique (filename, funcname, lineno, end_lineno, column, end_column,
|
||||
opcode) combination gets one entry. This enables instruction-level profiling
|
||||
where multiple bytecode instructions on the same line can be distinguished.
|
||||
|
||||
Strings and frames are deduplicated separately because they have different
|
||||
cardinalities and reference patterns. A codebase might have hundreds of
|
||||
unique source files but thousands of unique functions. Many functions share
|
||||
the same filename, so storing the filename index in each frame entry (rather
|
||||
than the full string) provides an additional layer of deduplication. A frame
|
||||
entry is just three varints (typically 3-6 bytes) rather than two full
|
||||
strings plus a line number.
|
||||
entry is typically 7-9 bytes rather than two full strings plus location data.
|
||||
|
||||
Line numbers use signed varint (zigzag encoding) rather than unsigned to
|
||||
handle edge cases. Synthetic frames—generated frames that don't correspond
|
||||
directly to Python source code, such as C extension boundaries or internal
|
||||
interpreter frames—use line number 0 or -1 to indicate the absence of a
|
||||
source location. Zigzag encoding ensures these small negative values encode
|
||||
### Size Analysis
|
||||
|
||||
Typical frame size with delta encoding:
|
||||
- file_idx: 1-2 bytes
|
||||
- func_idx: 1-2 bytes
|
||||
- lineno: 1-2 bytes
|
||||
- end_lineno_delta: 1 byte (usually 0)
|
||||
- column: 1 byte (usually < 64)
|
||||
- end_column_delta: 1 byte (usually < 64)
|
||||
- opcode: 1 byte
|
||||
|
||||
**Total: ~7-9 bytes per frame**
|
||||
|
||||
Line numbers and columns use signed varint (zigzag encoding) to handle
|
||||
sentinel values efficiently. Synthetic frames—generated frames that don't
|
||||
correspond directly to Python source code, such as C extension boundaries or
|
||||
internal interpreter frames—use -1 to indicate the absence of a source
|
||||
location. Zigzag encoding ensures these small negative values encode
|
||||
efficiently (−1 becomes 1, which is one byte) rather than requiring the
|
||||
maximum varint length.
|
||||
|
||||
|
|
|
|||
|
|
@ -649,7 +649,7 @@ def _validate_args(args, parser):
|
|||
)
|
||||
|
||||
# Validate --opcodes is only used with compatible formats
|
||||
opcodes_compatible_formats = ("live", "gecko", "flamegraph", "heatmap")
|
||||
opcodes_compatible_formats = ("live", "gecko", "flamegraph", "heatmap", "binary")
|
||||
if getattr(args, 'opcodes', False) and args.format not in opcodes_compatible_formats:
|
||||
parser.error(
|
||||
f"--opcodes is only compatible with {', '.join('--' + f for f in opcodes_compatible_formats)}."
|
||||
|
|
|
|||
|
|
@ -29,10 +29,17 @@ except ImportError:
|
|||
)
|
||||
|
||||
|
||||
def make_frame(filename, lineno, funcname):
|
||||
"""Create a FrameInfo struct sequence."""
|
||||
location = LocationInfo((lineno, lineno, -1, -1))
|
||||
return FrameInfo((filename, location, funcname, None))
|
||||
def make_frame(filename, lineno, funcname, end_lineno=None, column=None,
|
||||
end_column=None, opcode=None):
|
||||
"""Create a FrameInfo struct sequence with full location info and opcode."""
|
||||
if end_lineno is None:
|
||||
end_lineno = lineno
|
||||
if column is None:
|
||||
column = 0
|
||||
if end_column is None:
|
||||
end_column = 0
|
||||
location = LocationInfo((lineno, end_lineno, column, end_column))
|
||||
return FrameInfo((filename, location, funcname, opcode))
|
||||
|
||||
|
||||
def make_thread(thread_id, frames, status=0):
|
||||
|
|
@ -54,6 +61,22 @@ def extract_lineno(location):
|
|||
return location
|
||||
|
||||
|
||||
def extract_location(location):
|
||||
"""Extract full location info as dict from location tuple or None."""
|
||||
if location is None:
|
||||
return {"lineno": 0, "end_lineno": 0, "column": 0, "end_column": 0}
|
||||
if isinstance(location, tuple) and len(location) >= 4:
|
||||
return {
|
||||
"lineno": location[0] if location[0] is not None else 0,
|
||||
"end_lineno": location[1] if location[1] is not None else 0,
|
||||
"column": location[2] if location[2] is not None else 0,
|
||||
"end_column": location[3] if location[3] is not None else 0,
|
||||
}
|
||||
# Fallback for old-style location
|
||||
lineno = location[0] if isinstance(location, tuple) else location
|
||||
return {"lineno": lineno or 0, "end_lineno": lineno or 0, "column": 0, "end_column": 0}
|
||||
|
||||
|
||||
class RawCollector:
|
||||
"""Collector that captures all raw data grouped by thread."""
|
||||
|
||||
|
|
@ -70,11 +93,16 @@ class RawCollector:
|
|||
for thread in interp.threads:
|
||||
frames = []
|
||||
for frame in thread.frame_info:
|
||||
loc = extract_location(frame.location)
|
||||
frames.append(
|
||||
{
|
||||
"filename": frame.filename,
|
||||
"funcname": frame.funcname,
|
||||
"lineno": extract_lineno(frame.location),
|
||||
"lineno": loc["lineno"],
|
||||
"end_lineno": loc["end_lineno"],
|
||||
"column": loc["column"],
|
||||
"end_column": loc["end_column"],
|
||||
"opcode": frame.opcode,
|
||||
}
|
||||
)
|
||||
key = (interp.interpreter_id, thread.thread_id)
|
||||
|
|
@ -95,11 +123,16 @@ def samples_to_by_thread(samples):
|
|||
for thread in interp.threads:
|
||||
frames = []
|
||||
for frame in thread.frame_info:
|
||||
loc = extract_location(frame.location)
|
||||
frames.append(
|
||||
{
|
||||
"filename": frame.filename,
|
||||
"funcname": frame.funcname,
|
||||
"lineno": extract_lineno(frame.location),
|
||||
"lineno": loc["lineno"],
|
||||
"end_lineno": loc["end_lineno"],
|
||||
"column": loc["column"],
|
||||
"end_column": loc["end_column"],
|
||||
"opcode": frame.opcode,
|
||||
}
|
||||
)
|
||||
key = (interp.interpreter_id, thread.thread_id)
|
||||
|
|
@ -206,6 +239,34 @@ class BinaryFormatTestBase(unittest.TestCase):
|
|||
f"frame {j}: lineno mismatch "
|
||||
f"(expected {exp_frame['lineno']}, got {act_frame['lineno']})",
|
||||
)
|
||||
self.assertEqual(
|
||||
exp_frame["end_lineno"],
|
||||
act_frame["end_lineno"],
|
||||
f"Thread ({interp_id}, {thread_id}), sample {i}, "
|
||||
f"frame {j}: end_lineno mismatch "
|
||||
f"(expected {exp_frame['end_lineno']}, got {act_frame['end_lineno']})",
|
||||
)
|
||||
self.assertEqual(
|
||||
exp_frame["column"],
|
||||
act_frame["column"],
|
||||
f"Thread ({interp_id}, {thread_id}), sample {i}, "
|
||||
f"frame {j}: column mismatch "
|
||||
f"(expected {exp_frame['column']}, got {act_frame['column']})",
|
||||
)
|
||||
self.assertEqual(
|
||||
exp_frame["end_column"],
|
||||
act_frame["end_column"],
|
||||
f"Thread ({interp_id}, {thread_id}), sample {i}, "
|
||||
f"frame {j}: end_column mismatch "
|
||||
f"(expected {exp_frame['end_column']}, got {act_frame['end_column']})",
|
||||
)
|
||||
self.assertEqual(
|
||||
exp_frame["opcode"],
|
||||
act_frame["opcode"],
|
||||
f"Thread ({interp_id}, {thread_id}), sample {i}, "
|
||||
f"frame {j}: opcode mismatch "
|
||||
f"(expected {exp_frame['opcode']}, got {act_frame['opcode']})",
|
||||
)
|
||||
|
||||
|
||||
class TestBinaryRoundTrip(BinaryFormatTestBase):
|
||||
|
|
@ -484,6 +545,97 @@ class TestBinaryRoundTrip(BinaryFormatTestBase):
|
|||
self.assertEqual(count, 60)
|
||||
self.assert_samples_equal(samples, collector)
|
||||
|
||||
def test_full_location_roundtrip(self):
|
||||
"""Full source location (end_lineno, column, end_column) roundtrips."""
|
||||
frames = [
|
||||
make_frame("test.py", 10, "func1", end_lineno=12, column=4, end_column=20),
|
||||
make_frame("test.py", 20, "func2", end_lineno=20, column=8, end_column=45),
|
||||
make_frame("test.py", 30, "func3", end_lineno=35, column=0, end_column=100),
|
||||
]
|
||||
samples = [[make_interpreter(0, [make_thread(1, frames)])]]
|
||||
collector, count = self.roundtrip(samples)
|
||||
self.assertEqual(count, 1)
|
||||
self.assert_samples_equal(samples, collector)
|
||||
|
||||
def test_opcode_roundtrip(self):
|
||||
"""Opcode values roundtrip exactly."""
|
||||
opcodes = [0, 1, 50, 100, 150, 200, 254] # Valid Python opcodes
|
||||
samples = []
|
||||
for opcode in opcodes:
|
||||
frame = make_frame("test.py", 10, "func", opcode=opcode)
|
||||
samples.append([make_interpreter(0, [make_thread(1, [frame])])])
|
||||
collector, count = self.roundtrip(samples)
|
||||
self.assertEqual(count, len(opcodes))
|
||||
self.assert_samples_equal(samples, collector)
|
||||
|
||||
def test_opcode_none_roundtrip(self):
|
||||
"""Opcode=None (sentinel 255) roundtrips as None."""
|
||||
frame = make_frame("test.py", 10, "func", opcode=None)
|
||||
samples = [[make_interpreter(0, [make_thread(1, [frame])])]]
|
||||
collector, count = self.roundtrip(samples)
|
||||
self.assertEqual(count, 1)
|
||||
self.assert_samples_equal(samples, collector)
|
||||
|
||||
def test_mixed_location_and_opcode(self):
|
||||
"""Mixed full location and opcode data roundtrips."""
|
||||
frames = [
|
||||
make_frame("a.py", 10, "a", end_lineno=15, column=4, end_column=30, opcode=100),
|
||||
make_frame("b.py", 20, "b", end_lineno=20, column=0, end_column=50, opcode=None),
|
||||
make_frame("c.py", 30, "c", end_lineno=32, column=8, end_column=25, opcode=50),
|
||||
]
|
||||
samples = [[make_interpreter(0, [make_thread(1, frames)])]]
|
||||
collector, count = self.roundtrip(samples)
|
||||
self.assertEqual(count, 1)
|
||||
self.assert_samples_equal(samples, collector)
|
||||
|
||||
def test_delta_encoding_multiline(self):
|
||||
"""Multi-line spans (large end_lineno delta) roundtrip correctly."""
|
||||
# This tests the delta encoding: end_lineno = lineno + delta
|
||||
frames = [
|
||||
make_frame("test.py", 1, "small", end_lineno=1, column=0, end_column=10),
|
||||
make_frame("test.py", 100, "medium", end_lineno=110, column=0, end_column=50),
|
||||
make_frame("test.py", 1000, "large", end_lineno=1500, column=0, end_column=200),
|
||||
]
|
||||
samples = [[make_interpreter(0, [make_thread(1, frames)])]]
|
||||
collector, count = self.roundtrip(samples)
|
||||
self.assertEqual(count, 1)
|
||||
self.assert_samples_equal(samples, collector)
|
||||
|
||||
def test_column_positions_preserved(self):
|
||||
"""Various column positions are preserved exactly."""
|
||||
columns = [(0, 10), (4, 50), (8, 100), (100, 200)]
|
||||
samples = []
|
||||
for col, end_col in columns:
|
||||
frame = make_frame("test.py", 10, "func", column=col, end_column=end_col)
|
||||
samples.append([make_interpreter(0, [make_thread(1, [frame])])])
|
||||
collector, count = self.roundtrip(samples)
|
||||
self.assertEqual(count, len(columns))
|
||||
self.assert_samples_equal(samples, collector)
|
||||
|
||||
def test_same_line_different_opcodes(self):
|
||||
"""Same line with different opcodes creates distinct frames."""
|
||||
# This tests that opcode is part of the frame key
|
||||
frames = [
|
||||
make_frame("test.py", 10, "func", opcode=100),
|
||||
make_frame("test.py", 10, "func", opcode=101),
|
||||
make_frame("test.py", 10, "func", opcode=102),
|
||||
]
|
||||
samples = [[make_interpreter(0, [make_thread(1, [f])]) for f in frames]]
|
||||
collector, count = self.roundtrip(samples)
|
||||
# Verify all three opcodes are preserved distinctly
|
||||
self.assertEqual(count, 3)
|
||||
|
||||
def test_same_line_different_columns(self):
|
||||
"""Same line with different columns creates distinct frames."""
|
||||
frames = [
|
||||
make_frame("test.py", 10, "func", column=0, end_column=10),
|
||||
make_frame("test.py", 10, "func", column=15, end_column=25),
|
||||
make_frame("test.py", 10, "func", column=30, end_column=40),
|
||||
]
|
||||
samples = [[make_interpreter(0, [make_thread(1, [f])]) for f in frames]]
|
||||
collector, count = self.roundtrip(samples)
|
||||
self.assertEqual(count, 3)
|
||||
|
||||
|
||||
class TestBinaryEdgeCases(BinaryFormatTestBase):
|
||||
"""Tests for edge cases in binary format."""
|
||||
|
|
|
|||
|
|
@ -25,6 +25,10 @@ extern "C" {
|
|||
#define BINARY_FORMAT_MAGIC_SWAPPED 0x48434154 /* Byte-swapped magic for endianness detection */
|
||||
#define BINARY_FORMAT_VERSION 1
|
||||
|
||||
/* Sentinel values for optional frame fields */
|
||||
#define OPCODE_NONE 255 /* No opcode captured (u8 sentinel) */
|
||||
#define LOCATION_NOT_AVAILABLE (-1) /* lineno/column not available (zigzag sentinel) */
|
||||
|
||||
/* Conditional byte-swap macros for cross-endian file reading.
|
||||
* Uses Python's optimized byte-swap functions from pycore_bitutils.h */
|
||||
#define SWAP16_IF(swap, x) ((swap) ? _Py_bswap16(x) : (x))
|
||||
|
|
@ -172,18 +176,28 @@ typedef struct {
|
|||
size_t compressed_buffer_size;
|
||||
} ZstdCompressor;
|
||||
|
||||
/* Frame entry - combines all frame data for better cache locality */
|
||||
/* Frame entry - combines all frame data for better cache locality.
|
||||
* Stores full source position (line, end_line, column, end_column) and opcode.
|
||||
* Delta values are computed during serialization for efficiency. */
|
||||
typedef struct {
|
||||
uint32_t filename_idx;
|
||||
uint32_t funcname_idx;
|
||||
int32_t lineno;
|
||||
int32_t lineno; /* Start line number (-1 for synthetic frames) */
|
||||
int32_t end_lineno; /* End line number (-1 if not available) */
|
||||
int32_t column; /* Start column in UTF-8 bytes (-1 if not available) */
|
||||
int32_t end_column; /* End column in UTF-8 bytes (-1 if not available) */
|
||||
uint8_t opcode; /* Python opcode (0-254) or OPCODE_NONE (255) */
|
||||
} FrameEntry;
|
||||
|
||||
/* Frame key for hash table lookup */
|
||||
/* Frame key for hash table lookup - includes all fields for proper deduplication */
|
||||
typedef struct {
|
||||
uint32_t filename_idx;
|
||||
uint32_t funcname_idx;
|
||||
int32_t lineno;
|
||||
int32_t end_lineno;
|
||||
int32_t column;
|
||||
int32_t end_column;
|
||||
uint8_t opcode;
|
||||
} FrameKey;
|
||||
|
||||
/* Pending RLE sample - buffered for run-length encoding */
|
||||
|
|
@ -305,8 +319,8 @@ typedef struct {
|
|||
PyObject **strings;
|
||||
uint32_t strings_count;
|
||||
|
||||
/* Parsed frame table: packed as [filename_idx, funcname_idx, lineno] */
|
||||
uint32_t *frame_data;
|
||||
/* Parsed frame table: array of FrameEntry structures */
|
||||
FrameEntry *frames;
|
||||
uint32_t frames_count;
|
||||
|
||||
/* Sample data region */
|
||||
|
|
|
|||
|
|
@ -276,47 +276,86 @@ reader_parse_string_table(BinaryReader *reader, const uint8_t *data, size_t file
|
|||
static inline int
|
||||
reader_parse_frame_table(BinaryReader *reader, const uint8_t *data, size_t file_size)
|
||||
{
|
||||
/* Check for integer overflow in allocation size calculation.
|
||||
Only needed on 32-bit where SIZE_MAX can be exceeded by uint32_t * 12. */
|
||||
/* Check for integer overflow in allocation size calculation. */
|
||||
#if SIZEOF_SIZE_T < 8
|
||||
if (reader->frames_count > SIZE_MAX / (3 * sizeof(uint32_t))) {
|
||||
if (reader->frames_count > SIZE_MAX / sizeof(FrameEntry)) {
|
||||
PyErr_SetString(PyExc_OverflowError, "Frame count too large for allocation");
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t alloc_size = (size_t)reader->frames_count * 3 * sizeof(uint32_t);
|
||||
reader->frame_data = PyMem_Malloc(alloc_size);
|
||||
if (!reader->frame_data && reader->frames_count > 0) {
|
||||
size_t alloc_size = (size_t)reader->frames_count * sizeof(FrameEntry);
|
||||
reader->frames = PyMem_Malloc(alloc_size);
|
||||
if (!reader->frames && reader->frames_count > 0) {
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t offset = reader->frame_table_offset;
|
||||
for (uint32_t i = 0; i < reader->frames_count; i++) {
|
||||
size_t base = (size_t)i * 3;
|
||||
FrameEntry *frame = &reader->frames[i];
|
||||
size_t prev_offset;
|
||||
|
||||
prev_offset = offset;
|
||||
reader->frame_data[base] = decode_varint_u32(data, &offset, file_size);
|
||||
frame->filename_idx = decode_varint_u32(data, &offset, file_size);
|
||||
if (offset == prev_offset) {
|
||||
PyErr_SetString(PyExc_ValueError, "Malformed varint in frame table (filename)");
|
||||
return -1;
|
||||
}
|
||||
|
||||
prev_offset = offset;
|
||||
reader->frame_data[base + 1] = decode_varint_u32(data, &offset, file_size);
|
||||
frame->funcname_idx = decode_varint_u32(data, &offset, file_size);
|
||||
if (offset == prev_offset) {
|
||||
PyErr_SetString(PyExc_ValueError, "Malformed varint in frame table (funcname)");
|
||||
return -1;
|
||||
}
|
||||
|
||||
prev_offset = offset;
|
||||
reader->frame_data[base + 2] = (uint32_t)decode_varint_i32(data, &offset, file_size);
|
||||
frame->lineno = decode_varint_i32(data, &offset, file_size);
|
||||
if (offset == prev_offset) {
|
||||
PyErr_SetString(PyExc_ValueError, "Malformed varint in frame table (lineno)");
|
||||
return -1;
|
||||
}
|
||||
|
||||
prev_offset = offset;
|
||||
int32_t end_lineno_delta = decode_varint_i32(data, &offset, file_size);
|
||||
if (offset == prev_offset) {
|
||||
PyErr_SetString(PyExc_ValueError, "Malformed varint in frame table (end_lineno_delta)");
|
||||
return -1;
|
||||
}
|
||||
/* Reconstruct end_lineno from delta. If lineno is -1, result is -1. */
|
||||
if (frame->lineno == LOCATION_NOT_AVAILABLE) {
|
||||
frame->end_lineno = LOCATION_NOT_AVAILABLE;
|
||||
} else {
|
||||
frame->end_lineno = frame->lineno + end_lineno_delta;
|
||||
}
|
||||
|
||||
prev_offset = offset;
|
||||
frame->column = decode_varint_i32(data, &offset, file_size);
|
||||
if (offset == prev_offset) {
|
||||
PyErr_SetString(PyExc_ValueError, "Malformed varint in frame table (column)");
|
||||
return -1;
|
||||
}
|
||||
|
||||
prev_offset = offset;
|
||||
int32_t end_column_delta = decode_varint_i32(data, &offset, file_size);
|
||||
if (offset == prev_offset) {
|
||||
PyErr_SetString(PyExc_ValueError, "Malformed varint in frame table (end_column_delta)");
|
||||
return -1;
|
||||
}
|
||||
/* Reconstruct end_column from delta. If column is -1, result is -1. */
|
||||
if (frame->column == LOCATION_NOT_AVAILABLE) {
|
||||
frame->end_column = LOCATION_NOT_AVAILABLE;
|
||||
} else {
|
||||
frame->end_column = frame->column + end_column_delta;
|
||||
}
|
||||
|
||||
/* Read opcode byte */
|
||||
if (offset >= file_size) {
|
||||
PyErr_SetString(PyExc_ValueError, "Unexpected end of frame table (opcode)");
|
||||
return -1;
|
||||
}
|
||||
frame->opcode = data[offset++];
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
@ -683,13 +722,10 @@ build_frame_list(RemoteDebuggingState *state, BinaryReader *reader,
|
|||
goto error;
|
||||
}
|
||||
|
||||
size_t base = frame_idx * 3;
|
||||
uint32_t filename_idx = reader->frame_data[base];
|
||||
uint32_t funcname_idx = reader->frame_data[base + 1];
|
||||
int32_t lineno = (int32_t)reader->frame_data[base + 2];
|
||||
FrameEntry *frame = &reader->frames[frame_idx];
|
||||
|
||||
if (filename_idx >= reader->strings_count ||
|
||||
funcname_idx >= reader->strings_count) {
|
||||
if (frame->filename_idx >= reader->strings_count ||
|
||||
frame->funcname_idx >= reader->strings_count) {
|
||||
PyErr_SetString(PyExc_ValueError, "Invalid string index in frame");
|
||||
goto error;
|
||||
}
|
||||
|
|
@ -699,9 +735,14 @@ build_frame_list(RemoteDebuggingState *state, BinaryReader *reader,
|
|||
goto error;
|
||||
}
|
||||
|
||||
/* Build location tuple with full position info */
|
||||
PyObject *location;
|
||||
if (lineno > 0) {
|
||||
location = Py_BuildValue("(iiii)", lineno, lineno, 0, 0);
|
||||
if (frame->lineno != LOCATION_NOT_AVAILABLE) {
|
||||
location = Py_BuildValue("(iiii)",
|
||||
frame->lineno,
|
||||
frame->end_lineno != LOCATION_NOT_AVAILABLE ? frame->end_lineno : frame->lineno,
|
||||
frame->column != LOCATION_NOT_AVAILABLE ? frame->column : 0,
|
||||
frame->end_column != LOCATION_NOT_AVAILABLE ? frame->end_column : 0);
|
||||
if (!location) {
|
||||
Py_DECREF(frame_info);
|
||||
goto error;
|
||||
|
|
@ -711,10 +752,24 @@ build_frame_list(RemoteDebuggingState *state, BinaryReader *reader,
|
|||
location = Py_NewRef(Py_None);
|
||||
}
|
||||
|
||||
PyStructSequence_SetItem(frame_info, 0, Py_NewRef(reader->strings[filename_idx]));
|
||||
/* Build opcode object */
|
||||
PyObject *opcode_obj;
|
||||
if (frame->opcode != OPCODE_NONE) {
|
||||
opcode_obj = PyLong_FromLong(frame->opcode);
|
||||
if (!opcode_obj) {
|
||||
Py_DECREF(location);
|
||||
Py_DECREF(frame_info);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
else {
|
||||
opcode_obj = Py_NewRef(Py_None);
|
||||
}
|
||||
|
||||
PyStructSequence_SetItem(frame_info, 0, Py_NewRef(reader->strings[frame->filename_idx]));
|
||||
PyStructSequence_SetItem(frame_info, 1, location);
|
||||
PyStructSequence_SetItem(frame_info, 2, Py_NewRef(reader->strings[funcname_idx]));
|
||||
PyStructSequence_SetItem(frame_info, 3, Py_NewRef(Py_None));
|
||||
PyStructSequence_SetItem(frame_info, 2, Py_NewRef(reader->strings[frame->funcname_idx]));
|
||||
PyStructSequence_SetItem(frame_info, 3, opcode_obj);
|
||||
PyList_SET_ITEM(frame_list, k, frame_info);
|
||||
}
|
||||
|
||||
|
|
@ -1192,7 +1247,7 @@ binary_reader_close(BinaryReader *reader)
|
|||
PyMem_Free(reader->strings);
|
||||
}
|
||||
|
||||
PyMem_Free(reader->frame_data);
|
||||
PyMem_Free(reader->frames);
|
||||
|
||||
if (reader->thread_states) {
|
||||
for (size_t i = 0; i < reader->thread_state_count; i++) {
|
||||
|
|
|
|||
|
|
@ -32,6 +32,16 @@
|
|||
/* File structure sizes */
|
||||
#define FILE_FOOTER_SIZE 32
|
||||
|
||||
/* Helper macro: convert PyLong to int32, using default_val if conversion fails */
|
||||
#define PYLONG_TO_INT32_OR_DEFAULT(obj, var, default_val) \
|
||||
do { \
|
||||
(var) = (int32_t)PyLong_AsLong(obj); \
|
||||
if (UNLIKELY(PyErr_Occurred() != NULL)) { \
|
||||
PyErr_Clear(); \
|
||||
(var) = (default_val); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* ============================================================================
|
||||
* WRITER-SPECIFIC UTILITY HELPERS
|
||||
* ============================================================================ */
|
||||
|
|
@ -311,7 +321,7 @@ static Py_uhash_t
|
|||
frame_key_hash_func(const void *key)
|
||||
{
|
||||
const FrameKey *fk = (const FrameKey *)key;
|
||||
/* FNV-1a style hash combining all three values */
|
||||
/* FNV-1a style hash combining all fields */
|
||||
Py_uhash_t hash = 2166136261u;
|
||||
hash ^= fk->filename_idx;
|
||||
hash *= 16777619u;
|
||||
|
|
@ -319,6 +329,14 @@ frame_key_hash_func(const void *key)
|
|||
hash *= 16777619u;
|
||||
hash ^= (uint32_t)fk->lineno;
|
||||
hash *= 16777619u;
|
||||
hash ^= (uint32_t)fk->end_lineno;
|
||||
hash *= 16777619u;
|
||||
hash ^= (uint32_t)fk->column;
|
||||
hash *= 16777619u;
|
||||
hash ^= (uint32_t)fk->end_column;
|
||||
hash *= 16777619u;
|
||||
hash ^= fk->opcode;
|
||||
hash *= 16777619u;
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
|
@ -329,7 +347,11 @@ frame_key_compare_func(const void *key1, const void *key2)
|
|||
const FrameKey *fk2 = (const FrameKey *)key2;
|
||||
return (fk1->filename_idx == fk2->filename_idx &&
|
||||
fk1->funcname_idx == fk2->funcname_idx &&
|
||||
fk1->lineno == fk2->lineno);
|
||||
fk1->lineno == fk2->lineno &&
|
||||
fk1->end_lineno == fk2->end_lineno &&
|
||||
fk1->column == fk2->column &&
|
||||
fk1->end_column == fk2->end_column &&
|
||||
fk1->opcode == fk2->opcode);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -389,9 +411,12 @@ writer_intern_string(BinaryWriter *writer, PyObject *string, uint32_t *index)
|
|||
|
||||
static inline int
|
||||
writer_intern_frame(BinaryWriter *writer, uint32_t filename_idx, uint32_t funcname_idx,
|
||||
int32_t lineno, uint32_t *index)
|
||||
int32_t lineno, int32_t end_lineno, int32_t column, int32_t end_column,
|
||||
uint8_t opcode, uint32_t *index)
|
||||
{
|
||||
FrameKey lookup_key = {filename_idx, funcname_idx, lineno};
|
||||
FrameKey lookup_key = {
|
||||
filename_idx, funcname_idx, lineno, end_lineno, column, end_column, opcode
|
||||
};
|
||||
|
||||
void *existing = _Py_hashtable_get(writer->frame_hash, &lookup_key);
|
||||
if (existing != NULL) {
|
||||
|
|
@ -416,6 +441,10 @@ writer_intern_frame(BinaryWriter *writer, uint32_t filename_idx, uint32_t funcna
|
|||
fe->filename_idx = filename_idx;
|
||||
fe->funcname_idx = funcname_idx;
|
||||
fe->lineno = lineno;
|
||||
fe->end_lineno = end_lineno;
|
||||
fe->column = column;
|
||||
fe->end_column = end_column;
|
||||
fe->opcode = opcode;
|
||||
|
||||
if (_Py_hashtable_set(writer->frame_hash, key, (void *)(uintptr_t)(*index + 1)) < 0) {
|
||||
PyMem_Free(key);
|
||||
|
|
@ -810,22 +839,49 @@ build_frame_stack(BinaryWriter *writer, PyObject *frame_list,
|
|||
/* Use unchecked accessors since we control the data structures */
|
||||
PyObject *frame_info = PyList_GET_ITEM(frame_list, k);
|
||||
|
||||
/* Get filename, location, funcname from FrameInfo using unchecked access */
|
||||
/* Get filename, location, funcname, opcode from FrameInfo using unchecked access */
|
||||
PyObject *filename = PyStructSequence_GET_ITEM(frame_info, 0);
|
||||
PyObject *location = PyStructSequence_GET_ITEM(frame_info, 1);
|
||||
PyObject *funcname = PyStructSequence_GET_ITEM(frame_info, 2);
|
||||
PyObject *opcode_obj = PyStructSequence_GET_ITEM(frame_info, 3);
|
||||
|
||||
/* Extract location fields (can be None for synthetic frames) */
|
||||
int32_t lineno = LOCATION_NOT_AVAILABLE;
|
||||
int32_t end_lineno = LOCATION_NOT_AVAILABLE;
|
||||
int32_t column = LOCATION_NOT_AVAILABLE;
|
||||
int32_t end_column = LOCATION_NOT_AVAILABLE;
|
||||
|
||||
/* Extract lineno from location (can be None for synthetic frames) */
|
||||
int32_t lineno = 0;
|
||||
if (location != Py_None) {
|
||||
/* Use unchecked access - first element is lineno */
|
||||
/* LocationInfo is a struct sequence or tuple with:
|
||||
* (lineno, end_lineno, col_offset, end_col_offset) */
|
||||
PyObject *lineno_obj = PyTuple_Check(location) ?
|
||||
PyTuple_GET_ITEM(location, 0) :
|
||||
PyStructSequence_GET_ITEM(location, 0);
|
||||
lineno = (int32_t)PyLong_AsLong(lineno_obj);
|
||||
PyObject *end_lineno_obj = PyTuple_Check(location) ?
|
||||
PyTuple_GET_ITEM(location, 1) :
|
||||
PyStructSequence_GET_ITEM(location, 1);
|
||||
PyObject *column_obj = PyTuple_Check(location) ?
|
||||
PyTuple_GET_ITEM(location, 2) :
|
||||
PyStructSequence_GET_ITEM(location, 2);
|
||||
PyObject *end_column_obj = PyTuple_Check(location) ?
|
||||
PyTuple_GET_ITEM(location, 3) :
|
||||
PyStructSequence_GET_ITEM(location, 3);
|
||||
|
||||
PYLONG_TO_INT32_OR_DEFAULT(lineno_obj, lineno, LOCATION_NOT_AVAILABLE);
|
||||
PYLONG_TO_INT32_OR_DEFAULT(end_lineno_obj, end_lineno, LOCATION_NOT_AVAILABLE);
|
||||
PYLONG_TO_INT32_OR_DEFAULT(column_obj, column, LOCATION_NOT_AVAILABLE);
|
||||
PYLONG_TO_INT32_OR_DEFAULT(end_column_obj, end_column, LOCATION_NOT_AVAILABLE);
|
||||
}
|
||||
|
||||
/* Extract opcode (can be None) */
|
||||
uint8_t opcode = OPCODE_NONE;
|
||||
if (opcode_obj != Py_None) {
|
||||
long opcode_long = PyLong_AsLong(opcode_obj);
|
||||
if (UNLIKELY(PyErr_Occurred() != NULL)) {
|
||||
PyErr_Clear();
|
||||
lineno = 0;
|
||||
opcode = OPCODE_NONE;
|
||||
} else if (opcode_long >= 0 && opcode_long <= 254) {
|
||||
opcode = (uint8_t)opcode_long;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -841,9 +897,11 @@ build_frame_stack(BinaryWriter *writer, PyObject *frame_list,
|
|||
return -1;
|
||||
}
|
||||
|
||||
/* Intern frame */
|
||||
/* Intern frame with full location info */
|
||||
uint32_t frame_idx;
|
||||
if (writer_intern_frame(writer, filename_idx, funcname_idx, lineno, &frame_idx) < 0) {
|
||||
if (writer_intern_frame(writer, filename_idx, funcname_idx,
|
||||
lineno, end_lineno, column, end_column,
|
||||
opcode, &frame_idx) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
@ -1038,10 +1096,33 @@ binary_writer_finalize(BinaryWriter *writer)
|
|||
|
||||
for (size_t i = 0; i < writer->frame_count; i++) {
|
||||
FrameEntry *entry = &writer->frame_entries[i];
|
||||
uint8_t buf[30];
|
||||
uint8_t buf[64]; /* Increased buffer for additional fields */
|
||||
size_t pos = encode_varint_u32(buf, entry->filename_idx);
|
||||
pos += encode_varint_u32(buf + pos, entry->funcname_idx);
|
||||
pos += encode_varint_i32(buf + pos, entry->lineno);
|
||||
|
||||
/* Delta encode end_lineno: store (end_lineno - lineno) as zigzag.
|
||||
* When lineno is -1, store delta as 0 (result will be -1). */
|
||||
int32_t end_lineno_delta = 0;
|
||||
if (entry->lineno != LOCATION_NOT_AVAILABLE &&
|
||||
entry->end_lineno != LOCATION_NOT_AVAILABLE) {
|
||||
end_lineno_delta = entry->end_lineno - entry->lineno;
|
||||
}
|
||||
pos += encode_varint_i32(buf + pos, end_lineno_delta);
|
||||
|
||||
pos += encode_varint_i32(buf + pos, entry->column);
|
||||
|
||||
/* Delta encode end_column: store (end_column - column) as zigzag.
|
||||
* When column is -1, store delta as 0 (result will be -1). */
|
||||
int32_t end_column_delta = 0;
|
||||
if (entry->column != LOCATION_NOT_AVAILABLE &&
|
||||
entry->end_column != LOCATION_NOT_AVAILABLE) {
|
||||
end_column_delta = entry->end_column - entry->column;
|
||||
}
|
||||
pos += encode_varint_i32(buf + pos, end_column_delta);
|
||||
|
||||
buf[pos++] = entry->opcode;
|
||||
|
||||
if (fwrite_checked_allow_threads(buf, pos, writer->fp) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
|
@ -1156,3 +1237,4 @@ binary_writer_destroy(BinaryWriter *writer)
|
|||
PyMem_Free(writer);
|
||||
}
|
||||
|
||||
#undef PYLONG_TO_INT32_OR_DEFAULT
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue