[3.13] GH-127953: Make line number lookup O(1) regardless of the size of the code object (#129127)

GH-127953: Make line number lookup O(1) regardless of the size of the code object (GH-128350)
This commit is contained in:
Mark Shannon 2025-04-07 19:15:02 +01:00 committed by GitHub
parent 1fcf409ace
commit 3f3863281b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 208 additions and 156 deletions

View file

@ -33,11 +33,12 @@ typedef struct {
} _PyCoCached; } _PyCoCached;
/* Ancillary data structure used for instrumentation. /* Ancillary data structure used for instrumentation.
Line instrumentation creates an array of Line instrumentation creates this with sufficient
these. One entry per code unit.*/ space for one entry per code unit. The total size
of the data will be `bytes_per_entry * Py_SIZE(code)` */
typedef struct { typedef struct {
uint8_t original_opcode; uint8_t bytes_per_entry;
int8_t line_delta; uint8_t data[1];
} _PyCoLineInstrumentationData; } _PyCoLineInstrumentationData;

View file

@ -0,0 +1,2 @@
The time to handle a ``LINE`` event in sys.monitoring (and sys.settrace) is
now independent of the number of lines in the code object.

View file

@ -987,6 +987,9 @@ PyCode_Addr2Line(PyCodeObject *co, int addrq)
if (addrq < 0) { if (addrq < 0) {
return co->co_firstlineno; return co->co_firstlineno;
} }
if (co->_co_monitoring && co->_co_monitoring->lines) {
return _Py_Instrumentation_GetLine(co, addrq/sizeof(_Py_CODEUNIT));
}
assert(addrq >= 0 && addrq < _PyCode_NBYTES(co)); assert(addrq >= 0 && addrq < _PyCode_NBYTES(co));
PyCodeAddressRange bounds; PyCodeAddressRange bounds;
_PyCode_InitAddressRange(co, &bounds); _PyCode_InitAddressRange(co, &bounds);

View file

@ -804,8 +804,10 @@ resume_frame:
int original_opcode = 0; int original_opcode = 0;
if (tstate->tracing) { if (tstate->tracing) {
PyCodeObject *code = _PyFrame_GetCode(frame); PyCodeObject *code = _PyFrame_GetCode(frame);
original_opcode = code->_co_monitoring->lines[(int)(here - _PyCode_CODE(code))].original_opcode; int index = (int)(here - _PyCode_CODE(code));
} else { original_opcode = code->_co_monitoring->lines->data[index*code->_co_monitoring->lines->bytes_per_entry];
}
else {
_PyFrame_SetStackPointer(frame, stack_pointer); _PyFrame_SetStackPointer(frame, stack_pointer);
original_opcode = _Py_call_instrumentation_line( original_opcode = _Py_call_instrumentation_line(
tstate, frame, here, prev); tstate, frame, here, prev);

View file

@ -264,48 +264,42 @@ get_events(_Py_GlobalMonitors *m, int tool_id)
return result; return result;
} }
/* Line delta. /* Module code can have line 0, even though modules start at line 1,
* 8 bit value. * so -1 is a legal delta. */
* if line_delta == -128: #define NO_LINE (-2)
* line = None # represented as -1
* elif line_delta == -127 or line_delta == -126: /* Returns the line delta. Defined as:
* line = PyCode_Addr2Line(code, offset * sizeof(_Py_CODEUNIT)); * if line is None:
* line_delta = NO_LINE
* else: * else:
* line = first_line + (offset >> OFFSET_SHIFT) + line_delta; * line_delta = line - first_line
*/ */
static int
#define NO_LINE -128 compute_line_delta(PyCodeObject *code, int line)
#define COMPUTED_LINE_LINENO_CHANGE -127
#define COMPUTED_LINE -126
#define OFFSET_SHIFT 4
static int8_t
compute_line_delta(PyCodeObject *code, int offset, int line)
{ {
if (line < 0) { if (line < 0) {
assert(line == -1);
return NO_LINE; return NO_LINE;
} }
int delta = line - code->co_firstlineno - (offset >> OFFSET_SHIFT); int delta = line - code->co_firstlineno;
if (delta <= INT8_MAX && delta > COMPUTED_LINE) { assert(delta > NO_LINE);
return delta; return delta;
}
return COMPUTED_LINE;
} }
static int static int
compute_line(PyCodeObject *code, int offset, int8_t line_delta) compute_line(PyCodeObject *code, int line_delta)
{ {
if (line_delta > COMPUTED_LINE) {
return code->co_firstlineno + (offset >> OFFSET_SHIFT) + line_delta;
}
if (line_delta == NO_LINE) { if (line_delta == NO_LINE) {
return -1; return -1;
} }
assert(line_delta == COMPUTED_LINE || line_delta == COMPUTED_LINE_LINENO_CHANGE); assert(line_delta > NO_LINE);
/* Look it up */ return code->co_firstlineno + line_delta;
return PyCode_Addr2Line(code, offset * sizeof(_Py_CODEUNIT)); }
static inline uint8_t
get_original_opcode(_PyCoLineInstrumentationData *line_data, int index)
{
return line_data->data[index*line_data->bytes_per_entry];
} }
int int
@ -317,7 +311,7 @@ _PyInstruction_GetLength(PyCodeObject *code, int offset)
assert(opcode != 0); assert(opcode != 0);
assert(opcode != RESERVED); assert(opcode != RESERVED);
if (opcode == INSTRUMENTED_LINE) { if (opcode == INSTRUMENTED_LINE) {
opcode = code->_co_monitoring->lines[offset].original_opcode; opcode = get_original_opcode(code->_co_monitoring->lines, offset);
} }
if (opcode == INSTRUMENTED_INSTRUCTION) { if (opcode == INSTRUMENTED_INSTRUCTION) {
opcode = code->_co_monitoring->per_instruction_opcodes[offset]; opcode = code->_co_monitoring->per_instruction_opcodes[offset];
@ -342,6 +336,51 @@ _PyInstruction_GetLength(PyCodeObject *code, int offset)
return 1 + _PyOpcode_Caches[opcode]; return 1 + _PyOpcode_Caches[opcode];
} }
static inline uint8_t *
get_original_opcode_ptr(_PyCoLineInstrumentationData *line_data, int index)
{
return &line_data->data[index*line_data->bytes_per_entry];
}
static inline void
set_original_opcode(_PyCoLineInstrumentationData *line_data, int index, uint8_t opcode)
{
line_data->data[index*line_data->bytes_per_entry] = opcode;
}
static inline int
get_line_delta(_PyCoLineInstrumentationData *line_data, int index)
{
uint8_t *ptr = &line_data->data[index*line_data->bytes_per_entry+1];
assert(line_data->bytes_per_entry >= 2);
uint32_t value = *ptr;
for (int idx = 2; idx < line_data->bytes_per_entry; idx++) {
ptr++;
int shift = (idx-1)*8;
value |= ((uint32_t)(*ptr)) << shift;
}
assert(value < INT_MAX);
/* NO_LINE is stored as zero. */
return ((int)value) + NO_LINE;
}
static inline void
set_line_delta(_PyCoLineInstrumentationData *line_data, int index, int line_delta)
{
/* Store line_delta + 2 as we need -2 to represent no line number */
assert(line_delta >= NO_LINE);
uint32_t adjusted = line_delta - NO_LINE;
uint8_t *ptr = &line_data->data[index*line_data->bytes_per_entry+1];
assert(adjusted < (1ULL << ((line_data->bytes_per_entry-1)*8)));
assert(line_data->bytes_per_entry >= 2);
*ptr = adjusted & 0xff;
for (int idx = 2; idx < line_data->bytes_per_entry; idx++) {
ptr++;
adjusted >>= 8;
*ptr = adjusted & 0xff;
}
}
#ifdef INSTRUMENT_DEBUG #ifdef INSTRUMENT_DEBUG
static void static void
@ -361,11 +400,15 @@ dump_instrumentation_data_lines(PyCodeObject *code, _PyCoLineInstrumentationData
if (lines == NULL) { if (lines == NULL) {
fprintf(out, ", lines = NULL"); fprintf(out, ", lines = NULL");
} }
else if (lines[i].original_opcode == 0) {
fprintf(out, ", lines = {original_opcode = No LINE (0), line_delta = %d)", lines[i].line_delta);
}
else { else {
fprintf(out, ", lines = {original_opcode = %s, line_delta = %d)", _PyOpcode_OpName[lines[i].original_opcode], lines[i].line_delta); int opcode = get_original_opcode(lines, i);
int line_delta = get_line_delta(lines, i);
if (opcode == 0) {
fprintf(out, ", lines = {original_opcode = No LINE (0), line_delta = %d)", line_delta);
}
else {
fprintf(out, ", lines = {original_opcode = %s, line_delta = %d)", _PyOpcode_OpName[opcode], line_delta);
}
} }
} }
@ -415,6 +458,12 @@ dump_local_monitors(const char *prefix, _Py_LocalMonitors monitors, FILE*out)
} }
} }
/** NOTE:
* Do not use PyCode_Addr2Line to determine the line number in instrumentation,
* as `PyCode_Addr2Line` uses the monitoring data if it is available.
*/
/* No error checking -- Don't use this for anything but experimental debugging */ /* No error checking -- Don't use this for anything but experimental debugging */
static void static void
dump_instrumentation_data(PyCodeObject *code, int star, FILE*out) dump_instrumentation_data(PyCodeObject *code, int star, FILE*out)
@ -432,6 +481,8 @@ dump_instrumentation_data(PyCodeObject *code, int star, FILE*out)
dump_local_monitors("Active", data->active_monitors, out); dump_local_monitors("Active", data->active_monitors, out);
int code_len = (int)Py_SIZE(code); int code_len = (int)Py_SIZE(code);
bool starred = false; bool starred = false;
PyCodeAddressRange range;
_PyCode_InitAddressRange(code, &range);
for (int i = 0; i < code_len; i += _PyInstruction_GetLength(code, i)) { for (int i = 0; i < code_len; i += _PyInstruction_GetLength(code, i)) {
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i]; _Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
int opcode = instr->op.code; int opcode = instr->op.code;
@ -439,7 +490,7 @@ dump_instrumentation_data(PyCodeObject *code, int star, FILE*out)
fprintf(out, "** "); fprintf(out, "** ");
starred = true; starred = true;
} }
fprintf(out, "Offset: %d, line: %d %s: ", i, PyCode_Addr2Line(code, i*2), _PyOpcode_OpName[opcode]); fprintf(out, "Offset: %d, line: %d %s: ", i, _PyCode_CheckLineNumber(i*2, &range), _PyOpcode_OpName[opcode]);
dump_instrumentation_data_tools(code, data->tools, i, out); dump_instrumentation_data_tools(code, data->tools, i, out);
dump_instrumentation_data_lines(code, data->lines, i, out); dump_instrumentation_data_lines(code, data->lines, i, out);
dump_instrumentation_data_line_tools(code, data->line_tools, i, out); dump_instrumentation_data_line_tools(code, data->line_tools, i, out);
@ -504,10 +555,12 @@ sanity_check_instrumentation(PyCodeObject *code)
code->_co_monitoring->active_monitors, code->_co_monitoring->active_monitors,
active_monitors)); active_monitors));
int code_len = (int)Py_SIZE(code); int code_len = (int)Py_SIZE(code);
PyCodeAddressRange range;
_PyCode_InitAddressRange(co, &range);
for (int i = 0; i < code_len;) { for (int i = 0; i < code_len;) {
_Py_CODEUNIT *instr = &_PyCode_CODE(code)[i]; _Py_CODEUNIT *instr = &_PyCode_CODE(code)[i];
int opcode = instr->op.code; int opcode = instr->op.code;
int base_opcode = _Py_GetBaseOpcode(code, i); int base_opcode = _Py_GetBaseCodeUnit(code, i).op.code;
CHECK(valid_opcode(opcode)); CHECK(valid_opcode(opcode));
CHECK(valid_opcode(base_opcode)); CHECK(valid_opcode(base_opcode));
if (opcode == INSTRUMENTED_INSTRUCTION) { if (opcode == INSTRUMENTED_INSTRUCTION) {
@ -518,8 +571,8 @@ sanity_check_instrumentation(PyCodeObject *code)
} }
if (opcode == INSTRUMENTED_LINE) { if (opcode == INSTRUMENTED_LINE) {
CHECK(data->lines); CHECK(data->lines);
CHECK(valid_opcode(data->lines[i].original_opcode)); opcode = get_original_opcode(data->lines, i);
opcode = data->lines[i].original_opcode; CHECK(valid_opcode(opcode));
CHECK(opcode != END_FOR); CHECK(opcode != END_FOR);
CHECK(opcode != RESUME); CHECK(opcode != RESUME);
CHECK(opcode != RESUME_CHECK); CHECK(opcode != RESUME_CHECK);
@ -534,7 +587,7 @@ sanity_check_instrumentation(PyCodeObject *code)
* *and* we are executing a INSTRUMENTED_LINE instruction * *and* we are executing a INSTRUMENTED_LINE instruction
* that has de-instrumented itself, then we will execute * that has de-instrumented itself, then we will execute
* an invalid INSTRUMENTED_INSTRUCTION */ * an invalid INSTRUMENTED_INSTRUCTION */
CHECK(data->lines[i].original_opcode != INSTRUMENTED_INSTRUCTION); CHECK(get_original_opcode(data->lines, i) != INSTRUMENTED_INSTRUCTION);
} }
if (opcode == INSTRUMENTED_INSTRUCTION) { if (opcode == INSTRUMENTED_INSTRUCTION) {
CHECK(data->per_instruction_opcodes[i] != 0); CHECK(data->per_instruction_opcodes[i] != 0);
@ -549,9 +602,9 @@ sanity_check_instrumentation(PyCodeObject *code)
} }
CHECK(active_monitors.tools[event] != 0); CHECK(active_monitors.tools[event] != 0);
} }
if (data->lines && base_opcode != END_FOR) { if (data->lines && get_original_opcode(data->lines, i)) {
int line1 = compute_line(code, i, data->lines[i].line_delta); int line1 = compute_line(code, get_line_delta(data->lines, i));
int line2 = PyCode_Addr2Line(code, i*sizeof(_Py_CODEUNIT)); int line2 = _PyCode_CheckLineNumber(i*sizeof(_Py_CODEUNIT), &range);
CHECK(line1 == line2); CHECK(line1 == line2);
} }
CHECK(valid_opcode(opcode)); CHECK(valid_opcode(opcode));
@ -584,7 +637,7 @@ int _Py_GetBaseOpcode(PyCodeObject *code, int i)
{ {
int opcode = _PyCode_CODE(code)[i].op.code; int opcode = _PyCode_CODE(code)[i].op.code;
if (opcode == INSTRUMENTED_LINE) { if (opcode == INSTRUMENTED_LINE) {
opcode = code->_co_monitoring->lines[i].original_opcode; opcode = get_original_opcode(code->_co_monitoring->lines, i);
} }
if (opcode == INSTRUMENTED_INSTRUCTION) { if (opcode == INSTRUMENTED_INSTRUCTION) {
opcode = code->_co_monitoring->per_instruction_opcodes[i]; opcode = code->_co_monitoring->per_instruction_opcodes[i];
@ -609,7 +662,7 @@ de_instrument(PyCodeObject *code, int i, int event)
int opcode = *opcode_ptr; int opcode = *opcode_ptr;
assert(opcode != ENTER_EXECUTOR); assert(opcode != ENTER_EXECUTOR);
if (opcode == INSTRUMENTED_LINE) { if (opcode == INSTRUMENTED_LINE) {
opcode_ptr = &code->_co_monitoring->lines[i].original_opcode; opcode_ptr = get_original_opcode_ptr(code->_co_monitoring->lines, i);
opcode = *opcode_ptr; opcode = *opcode_ptr;
} }
if (opcode == INSTRUMENTED_INSTRUCTION) { if (opcode == INSTRUMENTED_INSTRUCTION) {
@ -636,10 +689,10 @@ de_instrument_line(PyCodeObject *code, int i)
if (opcode != INSTRUMENTED_LINE) { if (opcode != INSTRUMENTED_LINE) {
return; return;
} }
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i]; _PyCoLineInstrumentationData *lines = code->_co_monitoring->lines;
int original_opcode = lines->original_opcode; int original_opcode = get_original_opcode(lines, i);
if (original_opcode == INSTRUMENTED_INSTRUCTION) { if (original_opcode == INSTRUMENTED_INSTRUCTION) {
lines->original_opcode = code->_co_monitoring->per_instruction_opcodes[i]; set_original_opcode(lines, i, code->_co_monitoring->per_instruction_opcodes[i]);
} }
CHECK(original_opcode != 0); CHECK(original_opcode != 0);
CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]); CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]);
@ -657,7 +710,7 @@ de_instrument_per_instruction(PyCodeObject *code, int i)
uint8_t *opcode_ptr = &instr->op.code; uint8_t *opcode_ptr = &instr->op.code;
int opcode = *opcode_ptr; int opcode = *opcode_ptr;
if (opcode == INSTRUMENTED_LINE) { if (opcode == INSTRUMENTED_LINE) {
opcode_ptr = &code->_co_monitoring->lines[i].original_opcode; opcode_ptr = get_original_opcode_ptr(code->_co_monitoring->lines, i);
opcode = *opcode_ptr; opcode = *opcode_ptr;
} }
if (opcode != INSTRUMENTED_INSTRUCTION) { if (opcode != INSTRUMENTED_INSTRUCTION) {
@ -682,8 +735,7 @@ instrument(PyCodeObject *code, int i)
uint8_t *opcode_ptr = &instr->op.code; uint8_t *opcode_ptr = &instr->op.code;
int opcode =*opcode_ptr; int opcode =*opcode_ptr;
if (opcode == INSTRUMENTED_LINE) { if (opcode == INSTRUMENTED_LINE) {
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i]; opcode_ptr = get_original_opcode_ptr(code->_co_monitoring->lines, i);
opcode_ptr = &lines->original_opcode;
opcode = *opcode_ptr; opcode = *opcode_ptr;
} }
if (opcode == INSTRUMENTED_INSTRUCTION) { if (opcode == INSTRUMENTED_INSTRUCTION) {
@ -714,9 +766,8 @@ instrument_line(PyCodeObject *code, int i)
if (opcode == INSTRUMENTED_LINE) { if (opcode == INSTRUMENTED_LINE) {
return; return;
} }
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i]; set_original_opcode(code->_co_monitoring->lines, i, _PyOpcode_Deopt[opcode]);
lines->original_opcode = _PyOpcode_Deopt[opcode]; CHECK(get_line_delta(code->_co_monitoring->lines, i) > NO_LINE);
CHECK(lines->original_opcode > 0);
*opcode_ptr = INSTRUMENTED_LINE; *opcode_ptr = INSTRUMENTED_LINE;
} }
@ -727,8 +778,7 @@ instrument_per_instruction(PyCodeObject *code, int i)
uint8_t *opcode_ptr = &instr->op.code; uint8_t *opcode_ptr = &instr->op.code;
int opcode = *opcode_ptr; int opcode = *opcode_ptr;
if (opcode == INSTRUMENTED_LINE) { if (opcode == INSTRUMENTED_LINE) {
_PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i]; opcode_ptr = get_original_opcode_ptr(code->_co_monitoring->lines, i);
opcode_ptr = &lines->original_opcode;
opcode = *opcode_ptr; opcode = *opcode_ptr;
} }
if (opcode == INSTRUMENTED_INSTRUCTION) { if (opcode == INSTRUMENTED_INSTRUCTION) {
@ -1204,18 +1254,16 @@ _Py_call_instrumentation_exc2(
call_instrumentation_vector_protected(tstate, event, frame, instr, 4, args); call_instrumentation_vector_protected(tstate, event, frame, instr, 4, args);
} }
int int
_Py_Instrumentation_GetLine(PyCodeObject *code, int index) _Py_Instrumentation_GetLine(PyCodeObject *code, int index)
{ {
_PyCoMonitoringData *monitoring = code->_co_monitoring; _PyCoMonitoringData *monitoring = code->_co_monitoring;
assert(monitoring != NULL); assert(monitoring != NULL);
assert(monitoring->lines != NULL); assert(monitoring->lines != NULL);
assert(index >= code->_co_firsttraceable);
assert(index < Py_SIZE(code)); assert(index < Py_SIZE(code));
_PyCoLineInstrumentationData *line_data = &monitoring->lines[index]; _PyCoLineInstrumentationData *line_data = monitoring->lines;
int8_t line_delta = line_data->line_delta; int line_delta = get_line_delta(line_data, index);
int line = compute_line(code, index, line_delta); int line = compute_line(code, line_delta);
return line; return line;
} }
@ -1228,29 +1276,20 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame,
int i = (int)(instr - _PyCode_CODE(code)); int i = (int)(instr - _PyCode_CODE(code));
_PyCoMonitoringData *monitoring = code->_co_monitoring; _PyCoMonitoringData *monitoring = code->_co_monitoring;
_PyCoLineInstrumentationData *line_data = &monitoring->lines[i]; _PyCoLineInstrumentationData *line_data = monitoring->lines;
PyInterpreterState *interp = tstate->interp; PyInterpreterState *interp = tstate->interp;
int8_t line_delta = line_data->line_delta; int line = _Py_Instrumentation_GetLine(code, i);
int line = 0; assert(line >= 0);
assert(prev != NULL);
if (line_delta == COMPUTED_LINE_LINENO_CHANGE) { int prev_index = (int)(prev - _PyCode_CODE(code));
// We know the line number must have changed, don't need to calculate int prev_line = _Py_Instrumentation_GetLine(code, prev_index);
// the line number for now because we might not need it. if (prev_line == line) {
line = -1; int prev_opcode = _PyCode_CODE(code)[prev_index].op.code;
} else { /* RESUME and INSTRUMENTED_RESUME are needed for the operation of
line = compute_line(code, i, line_delta); * instrumentation, so must never be hidden by an INSTRUMENTED_LINE.
assert(line >= 0); */
assert(prev != NULL); if (prev_opcode != RESUME && prev_opcode != INSTRUMENTED_RESUME) {
int prev_index = (int)(prev - _PyCode_CODE(code)); goto done;
int prev_line = _Py_Instrumentation_GetLine(code, prev_index);
if (prev_line == line) {
int prev_opcode = _PyCode_CODE(code)[prev_index].op.code;
/* RESUME and INSTRUMENTED_RESUME are needed for the operation of
* instrumentation, so must never be hidden by an INSTRUMENTED_LINE.
*/
if (prev_opcode != RESUME && prev_opcode != INSTRUMENTED_RESUME) {
goto done;
}
} }
} }
@ -1275,12 +1314,6 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame,
tstate->tracing++; tstate->tracing++;
/* Call c_tracefunc directly, having set the line number. */ /* Call c_tracefunc directly, having set the line number. */
Py_INCREF(frame_obj); Py_INCREF(frame_obj);
if (line == -1 && line_delta > COMPUTED_LINE) {
/* Only assign f_lineno if it's easy to calculate, otherwise
* do lazy calculation by setting the f_lineno to 0.
*/
line = compute_line(code, i, line_delta);
}
frame_obj->f_lineno = line; frame_obj->f_lineno = line;
int err = tstate->c_tracefunc(tstate->c_traceobj, frame_obj, PyTrace_LINE, Py_None); int err = tstate->c_tracefunc(tstate->c_traceobj, frame_obj, PyTrace_LINE, Py_None);
frame_obj->f_lineno = 0; frame_obj->f_lineno = 0;
@ -1297,11 +1330,6 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame,
if (tools == 0) { if (tools == 0) {
goto done; goto done;
} }
if (line == -1) {
/* Need to calculate the line number now for monitoring events */
line = compute_line(code, i, line_delta);
}
PyObject *line_obj = PyLong_FromLong(line); PyObject *line_obj = PyLong_FromLong(line);
if (line_obj == NULL) { if (line_obj == NULL) {
return -1; return -1;
@ -1333,7 +1361,7 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame,
Py_DECREF(line_obj); Py_DECREF(line_obj);
uint8_t original_opcode; uint8_t original_opcode;
done: done:
original_opcode = line_data->original_opcode; original_opcode = get_original_opcode(line_data, i);
assert(original_opcode != 0); assert(original_opcode != 0);
assert(original_opcode != INSTRUMENTED_LINE); assert(original_opcode != INSTRUMENTED_LINE);
assert(_PyOpcode_Deopt[original_opcode] == original_opcode); assert(_PyOpcode_Deopt[original_opcode] == original_opcode);
@ -1419,7 +1447,7 @@ initialize_tools(PyCodeObject *code)
int opcode = instr->op.code; int opcode = instr->op.code;
assert(opcode != ENTER_EXECUTOR); assert(opcode != ENTER_EXECUTOR);
if (opcode == INSTRUMENTED_LINE) { if (opcode == INSTRUMENTED_LINE) {
opcode = code->_co_monitoring->lines[i].original_opcode; opcode = get_original_opcode(code->_co_monitoring->lines, i);
} }
if (opcode == INSTRUMENTED_INSTRUCTION) { if (opcode == INSTRUMENTED_INSTRUCTION) {
opcode = code->_co_monitoring->per_instruction_opcodes[i]; opcode = code->_co_monitoring->per_instruction_opcodes[i];
@ -1462,63 +1490,57 @@ initialize_tools(PyCodeObject *code)
} }
} }
#define NO_LINE -128
static void static void
initialize_lines(PyCodeObject *code) initialize_lines(PyCodeObject *code, int bytes_per_entry)
{ {
ASSERT_WORLD_STOPPED_OR_LOCKED(code); ASSERT_WORLD_STOPPED_OR_LOCKED(code);
_PyCoLineInstrumentationData *line_data = code->_co_monitoring->lines; _PyCoLineInstrumentationData *line_data = code->_co_monitoring->lines;
assert(line_data != NULL); assert(line_data != NULL);
line_data->bytes_per_entry = bytes_per_entry;
int code_len = (int)Py_SIZE(code); int code_len = (int)Py_SIZE(code);
PyCodeAddressRange range; PyCodeAddressRange range;
_PyCode_InitAddressRange(code, &range); _PyCode_InitAddressRange(code, &range);
for (int i = 0; i < code->_co_firsttraceable && i < code_len; i++) {
line_data[i].original_opcode = 0;
line_data[i].line_delta = -127;
}
int current_line = -1; int current_line = -1;
for (int i = code->_co_firsttraceable; i < code_len; ) { for (int i = 0; i < code_len; ) {
int opcode = _Py_GetBaseOpcode(code, i); int opcode = _Py_GetBaseOpcode(code, i);
int line = _PyCode_CheckLineNumber(i*(int)sizeof(_Py_CODEUNIT), &range); int line = _PyCode_CheckLineNumber(i*(int)sizeof(_Py_CODEUNIT), &range);
line_data[i].line_delta = compute_line_delta(code, i, line); set_line_delta(line_data, i, compute_line_delta(code, line));
int length = _PyInstruction_GetLength(code, i); int length = _PyInstruction_GetLength(code, i);
switch (opcode) { if (i < code->_co_firsttraceable) {
case END_ASYNC_FOR: set_original_opcode(line_data, i, 0);
case END_FOR: }
case END_SEND: else {
case RESUME: switch (opcode) {
/* END_FOR cannot start a line, as it is skipped by FOR_ITER case END_ASYNC_FOR:
* END_SEND cannot start a line, as it is skipped by SEND case END_FOR:
* RESUME must not be instrumented with INSTRUMENT_LINE */ case END_SEND:
line_data[i].original_opcode = 0; case RESUME:
break; /* END_FOR cannot start a line, as it is skipped by FOR_ITER
default: * END_SEND cannot start a line, as it is skipped by SEND
/* Set original_opcode to the opcode iff the instruction * RESUME and POP_ITER must not be instrumented with INSTRUMENTED_LINE */
* starts a line, and thus should be instrumented. set_original_opcode(line_data, i, 0);
* This saves having to perform this check every time the break;
* we turn instrumentation on or off, and serves as a sanity default:
* check when debugging. /* Set original_opcode to the opcode iff the instruction
*/ * starts a line, and thus should be instrumented.
if (line != current_line && line >= 0) { * This saves having to perform this check every time the
line_data[i].original_opcode = opcode; * we turn instrumentation on or off, and serves as a sanity
if (line_data[i].line_delta == COMPUTED_LINE) { * check when debugging.
/* Label this line as a line with a line number change */
* which could help the monitoring callback to quickly if (line != current_line && line >= 0) {
* identify the line number change. set_original_opcode(line_data, i, opcode);
*/ CHECK(get_line_delta(line_data, i) != NO_LINE);
line_data[i].line_delta = COMPUTED_LINE_LINENO_CHANGE;
} }
} else {
else { set_original_opcode(line_data, i, 0);
line_data[i].original_opcode = 0; }
} current_line = line;
current_line = line; }
} }
for (int j = 1; j < length; j++) { for (int j = 1; j < length; j++) {
line_data[i+j].original_opcode = 0; set_original_opcode(line_data, i+j, 0);
line_data[i+j].line_delta = NO_LINE; set_line_delta(line_data, i+j, NO_LINE);
} }
i += length; i += length;
} }
@ -1560,13 +1582,9 @@ initialize_lines(PyCodeObject *code)
continue; continue;
} }
assert(target >= 0); assert(target >= 0);
if (line_data[target].line_delta != NO_LINE) { if (get_line_delta(line_data, target) != NO_LINE) {
line_data[target].original_opcode = _Py_GetBaseOpcode(code, target); int opcode = _Py_GetBaseOpcode(code, target);
if (line_data[target].line_delta == COMPUTED_LINE_LINENO_CHANGE) { set_original_opcode(line_data, target, opcode);
// If the line is a jump target, we are not sure if the line
// number changes, so we set it to COMPUTED_LINE.
line_data[target].line_delta = COMPUTED_LINE;
}
} }
} }
/* Scan exception table */ /* Scan exception table */
@ -1588,9 +1606,8 @@ initialize_lines(PyCodeObject *code)
* END_ASYNC_FOR is a bit special as it marks the end of * END_ASYNC_FOR is a bit special as it marks the end of
* an `async for` loop, which should not generate its own * an `async for` loop, which should not generate its own
* line event. */ * line event. */
if (line_data[handler].line_delta != NO_LINE && if (get_line_delta(line_data, handler) != NO_LINE && original_opcode != END_ASYNC_FOR) {
original_opcode != END_ASYNC_FOR) { set_original_opcode(line_data, handler, original_opcode);
line_data[handler].original_opcode = original_opcode;
} }
} }
} }
@ -1653,12 +1670,39 @@ update_instrumentation_data(PyCodeObject *code, PyInterpreterState *interp)
} }
if (all_events.tools[PY_MONITORING_EVENT_LINE]) { if (all_events.tools[PY_MONITORING_EVENT_LINE]) {
if (code->_co_monitoring->lines == NULL) { if (code->_co_monitoring->lines == NULL) {
code->_co_monitoring->lines = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); PyCodeAddressRange range;
_PyCode_InitAddressRange(code, &range);
int max_line = code->co_firstlineno + 1;
_PyCode_InitAddressRange(code, &range);
for (int i = code->_co_firsttraceable; i < code_len; ) {
int line = _PyCode_CheckLineNumber(i*(int)sizeof(_Py_CODEUNIT), &range);
if (line > max_line) {
max_line = line;
}
int length = _PyInstruction_GetLength(code, i);
i += length;
}
int bytes_per_entry;
int max_delta = max_line - code->co_firstlineno;
/* We store delta+2 in the table, so 253 is max for one byte */
if (max_delta < 256+NO_LINE) {
bytes_per_entry = 2;
}
else if (max_delta < (1 << 16)+NO_LINE) {
bytes_per_entry = 3;
}
else if (max_delta < (1 << 24)+NO_LINE) {
bytes_per_entry = 4;
}
else {
bytes_per_entry = 5;
}
code->_co_monitoring->lines = PyMem_Malloc(1 + code_len * bytes_per_entry);
if (code->_co_monitoring->lines == NULL) { if (code->_co_monitoring->lines == NULL) {
PyErr_NoMemory(); PyErr_NoMemory();
return -1; return -1;
} }
initialize_lines(code); initialize_lines(code, bytes_per_entry);
} }
if (multitools && code->_co_monitoring->line_tools == NULL) { if (multitools && code->_co_monitoring->line_tools == NULL) {
code->_co_monitoring->line_tools = PyMem_Malloc(code_len); code->_co_monitoring->line_tools = PyMem_Malloc(code_len);
@ -1774,7 +1818,7 @@ force_instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp)
if (removed_line_tools) { if (removed_line_tools) {
_PyCoLineInstrumentationData *line_data = code->_co_monitoring->lines; _PyCoLineInstrumentationData *line_data = code->_co_monitoring->lines;
for (int i = code->_co_firsttraceable; i < code_len;) { for (int i = code->_co_firsttraceable; i < code_len;) {
if (line_data[i].original_opcode) { if (get_original_opcode(line_data, i)) {
remove_line_tools(code, i, removed_line_tools); remove_line_tools(code, i, removed_line_tools);
} }
i += _PyInstruction_GetLength(code, i); i += _PyInstruction_GetLength(code, i);
@ -1801,7 +1845,7 @@ force_instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp)
if (new_line_tools) { if (new_line_tools) {
_PyCoLineInstrumentationData *line_data = code->_co_monitoring->lines; _PyCoLineInstrumentationData *line_data = code->_co_monitoring->lines;
for (int i = code->_co_firsttraceable; i < code_len;) { for (int i = code->_co_firsttraceable; i < code_len;) {
if (line_data[i].original_opcode) { if (get_original_opcode(line_data, i)) {
add_line_tools(code, i, new_line_tools); add_line_tools(code, i, new_line_tools);
} }
i += _PyInstruction_GetLength(code, i); i += _PyInstruction_GetLength(code, i);