GH-103082: Filter LINE events in VM, to simplify tool implementation. (GH-104387)

When monitoring LINE events, instrument all instructions that can have a predecessor on a different line.
Then check that the a new line has been hit in the instrumentation code.
This brings the behavior closer to that of 3.11, simplifying implementation and porting of tools.
This commit is contained in:
Mark Shannon 2023-05-12 12:21:20 +01:00 committed by GitHub
parent 19ee53d52e
commit 45f5aa8fc7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 255 additions and 161 deletions

View file

@ -3288,28 +3288,6 @@ dummy_func(
assert(oparg >= 2);
}
inst(INSTRUMENTED_LINE, ( -- )) {
_Py_CODEUNIT *here = next_instr-1;
_PyFrame_SetStackPointer(frame, stack_pointer);
int original_opcode = _Py_call_instrumentation_line(
tstate, frame, here);
stack_pointer = _PyFrame_GetStackPointer(frame);
if (original_opcode < 0) {
next_instr = here+1;
goto error;
}
next_instr = frame->prev_instr;
if (next_instr != here) {
DISPATCH();
}
if (_PyOpcode_Caches[original_opcode]) {
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)(next_instr+1);
INCREMENT_ADAPTIVE_COUNTER(cache->counter);
}
opcode = original_opcode;
DISPATCH_GOTO();
}
inst(INSTRUMENTED_INSTRUCTION, ( -- )) {
int next_opcode = _Py_call_instrumentation_instruction(
tstate, frame, next_instr-1);

View file

@ -775,6 +775,41 @@ handle_eval_breaker:
#include "generated_cases.c.h"
/* INSTRUMENTED_LINE has to be here, rather than in bytecodes.c,
* because it needs to capture frame->prev_instr before it is updated,
* as happens in the standard instruction prologue.
*/
#if USE_COMPUTED_GOTOS
TARGET_INSTRUMENTED_LINE:
#else
case INSTRUMENTED_LINE:
#endif
{
_Py_CODEUNIT *prev = frame->prev_instr;
_Py_CODEUNIT *here = frame->prev_instr = next_instr;
_PyFrame_SetStackPointer(frame, stack_pointer);
int original_opcode = _Py_call_instrumentation_line(
tstate, frame, here, prev);
stack_pointer = _PyFrame_GetStackPointer(frame);
if (original_opcode < 0) {
next_instr = here+1;
goto error;
}
next_instr = frame->prev_instr;
if (next_instr != here) {
DISPATCH();
}
if (_PyOpcode_Caches[original_opcode]) {
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)(next_instr+1);
/* Prevent the underlying instruction from specializing
* and overwriting the instrumentation. */
INCREMENT_ADAPTIVE_COUNTER(cache->counter);
}
opcode = original_opcode;
DISPATCH_GOTO();
}
#if USE_COMPUTED_GOTOS
_unknown_opcode:
#else

View file

@ -334,11 +334,10 @@ do { \
#define INSTRUMENTED_JUMP(src, dest, event) \
do { \
_PyFrame_SetStackPointer(frame, stack_pointer); \
int err = _Py_call_instrumentation_jump(tstate, event, frame, src, dest); \
next_instr = _Py_call_instrumentation_jump(tstate, event, frame, src, dest); \
stack_pointer = _PyFrame_GetStackPointer(frame); \
if (err) { \
if (next_instr == NULL) { \
next_instr = (dest)+1; \
goto error; \
} \
next_instr = frame->prev_instr; \
} while (0);

View file

@ -4568,32 +4568,8 @@
DISPATCH();
}
TARGET(INSTRUMENTED_LINE) {
#line 3292 "Python/bytecodes.c"
_Py_CODEUNIT *here = next_instr-1;
_PyFrame_SetStackPointer(frame, stack_pointer);
int original_opcode = _Py_call_instrumentation_line(
tstate, frame, here);
stack_pointer = _PyFrame_GetStackPointer(frame);
if (original_opcode < 0) {
next_instr = here+1;
goto error;
}
next_instr = frame->prev_instr;
if (next_instr != here) {
DISPATCH();
}
if (_PyOpcode_Caches[original_opcode]) {
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)(next_instr+1);
INCREMENT_ADAPTIVE_COUNTER(cache->counter);
}
opcode = original_opcode;
DISPATCH_GOTO();
#line 4593 "Python/generated_cases.c.h"
}
TARGET(INSTRUMENTED_INSTRUCTION) {
#line 3314 "Python/bytecodes.c"
#line 3292 "Python/bytecodes.c"
int next_opcode = _Py_call_instrumentation_instruction(
tstate, frame, next_instr-1);
if (next_opcode < 0) goto error;
@ -4605,26 +4581,26 @@
assert(next_opcode > 0 && next_opcode < 256);
opcode = next_opcode;
DISPATCH_GOTO();
#line 4609 "Python/generated_cases.c.h"
#line 4585 "Python/generated_cases.c.h"
}
TARGET(INSTRUMENTED_JUMP_FORWARD) {
#line 3328 "Python/bytecodes.c"
#line 3306 "Python/bytecodes.c"
INSTRUMENTED_JUMP(next_instr-1, next_instr+oparg, PY_MONITORING_EVENT_JUMP);
#line 4615 "Python/generated_cases.c.h"
#line 4591 "Python/generated_cases.c.h"
DISPATCH();
}
TARGET(INSTRUMENTED_JUMP_BACKWARD) {
#line 3332 "Python/bytecodes.c"
#line 3310 "Python/bytecodes.c"
INSTRUMENTED_JUMP(next_instr-1, next_instr-oparg, PY_MONITORING_EVENT_JUMP);
#line 4622 "Python/generated_cases.c.h"
#line 4598 "Python/generated_cases.c.h"
CHECK_EVAL_BREAKER();
DISPATCH();
}
TARGET(INSTRUMENTED_POP_JUMP_IF_TRUE) {
#line 3337 "Python/bytecodes.c"
#line 3315 "Python/bytecodes.c"
PyObject *cond = POP();
int err = PyObject_IsTrue(cond);
Py_DECREF(cond);
@ -4633,12 +4609,12 @@
assert(err == 0 || err == 1);
int offset = err*oparg;
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
#line 4637 "Python/generated_cases.c.h"
#line 4613 "Python/generated_cases.c.h"
DISPATCH();
}
TARGET(INSTRUMENTED_POP_JUMP_IF_FALSE) {
#line 3348 "Python/bytecodes.c"
#line 3326 "Python/bytecodes.c"
PyObject *cond = POP();
int err = PyObject_IsTrue(cond);
Py_DECREF(cond);
@ -4647,12 +4623,12 @@
assert(err == 0 || err == 1);
int offset = (1-err)*oparg;
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
#line 4651 "Python/generated_cases.c.h"
#line 4627 "Python/generated_cases.c.h"
DISPATCH();
}
TARGET(INSTRUMENTED_POP_JUMP_IF_NONE) {
#line 3359 "Python/bytecodes.c"
#line 3337 "Python/bytecodes.c"
PyObject *value = POP();
_Py_CODEUNIT *here = next_instr-1;
int offset;
@ -4665,12 +4641,12 @@
offset = 0;
}
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
#line 4669 "Python/generated_cases.c.h"
#line 4645 "Python/generated_cases.c.h"
DISPATCH();
}
TARGET(INSTRUMENTED_POP_JUMP_IF_NOT_NONE) {
#line 3374 "Python/bytecodes.c"
#line 3352 "Python/bytecodes.c"
PyObject *value = POP();
_Py_CODEUNIT *here = next_instr-1;
int offset;
@ -4683,30 +4659,30 @@
offset = oparg;
}
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
#line 4687 "Python/generated_cases.c.h"
#line 4663 "Python/generated_cases.c.h"
DISPATCH();
}
TARGET(EXTENDED_ARG) {
#line 3389 "Python/bytecodes.c"
#line 3367 "Python/bytecodes.c"
assert(oparg);
opcode = next_instr->op.code;
oparg = oparg << 8 | next_instr->op.arg;
PRE_DISPATCH_GOTO();
DISPATCH_GOTO();
#line 4698 "Python/generated_cases.c.h"
#line 4674 "Python/generated_cases.c.h"
}
TARGET(CACHE) {
#line 3397 "Python/bytecodes.c"
#line 3375 "Python/bytecodes.c"
assert(0 && "Executing a cache.");
Py_UNREACHABLE();
#line 4705 "Python/generated_cases.c.h"
#line 4681 "Python/generated_cases.c.h"
}
TARGET(RESERVED) {
#line 3402 "Python/bytecodes.c"
#line 3380 "Python/bytecodes.c"
assert(0 && "Executing RESERVED instruction.");
Py_UNREACHABLE();
#line 4712 "Python/generated_cases.c.h"
#line 4688 "Python/generated_cases.c.h"
}

View file

@ -14,7 +14,7 @@
/* Uncomment this to dump debugging output when assertions fail */
// #define INSTRUMENT_DEBUG 1
static PyObject DISABLE =
PyObject _PyInstrumentation_DISABLE =
{
.ob_refcnt = _Py_IMMORTAL_REFCNT,
.ob_type = &PyBaseObject_Type
@ -859,7 +859,7 @@ call_one_instrument(
return -1;
}
Py_DECREF(res);
return (res == &DISABLE);
return (res == &_PyInstrumentation_DISABLE);
}
static const int8_t MOST_SIGNIFICANT_BITS[16] = {
@ -1002,7 +1002,7 @@ _Py_call_instrumentation_2args(
return call_instrumentation_vector(tstate, event, frame, instr, 4, args);
}
int
_Py_CODEUNIT *
_Py_call_instrumentation_jump(
PyThreadState *tstate, int event,
_PyInterpreterFrame *frame, _Py_CODEUNIT *instr, _Py_CODEUNIT *target)
@ -1010,17 +1010,27 @@ _Py_call_instrumentation_jump(
assert(event == PY_MONITORING_EVENT_JUMP ||
event == PY_MONITORING_EVENT_BRANCH);
assert(frame->prev_instr == instr);
/* Event should occur after the jump */
frame->prev_instr = target;
PyCodeObject *code = frame->f_code;
int to = (int)(target - _PyCode_CODE(code));
PyObject *to_obj = PyLong_FromLong(to * (int)sizeof(_Py_CODEUNIT));
if (to_obj == NULL) {
return -1;
return NULL;
}
PyObject *args[4] = { NULL, NULL, NULL, to_obj };
int err = call_instrumentation_vector(tstate, event, frame, instr, 3, args);
Py_DECREF(to_obj);
return err;
if (err) {
return NULL;
}
if (frame->prev_instr != target) {
/* The callback has caused a jump (by setting the line number) */
return frame->prev_instr;
}
/* Reset prev_instr for INSTRUMENTED_LINE */
frame->prev_instr = instr;
return target;
}
static void
@ -1076,13 +1086,14 @@ _Py_Instrumentation_GetLine(PyCodeObject *code, int index)
}
int
_Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, _Py_CODEUNIT *instr)
_Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, _Py_CODEUNIT *instr, _Py_CODEUNIT *prev)
{
frame->prev_instr = instr;
PyCodeObject *code = frame->f_code;
assert(is_version_up_to_date(code, tstate->interp));
assert(instrumentation_cross_checks(tstate->interp, code));
int i = (int)(instr - _PyCode_CODE(code));
_PyCoMonitoringData *monitoring = code->_co_monitoring;
_PyCoLineInstrumentationData *line_data = &monitoring->lines[i];
uint8_t original_opcode = line_data->original_opcode;
@ -1092,6 +1103,18 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame,
PyInterpreterState *interp = tstate->interp;
int8_t line_delta = line_data->line_delta;
int line = compute_line(code, i, line_delta);
assert(line >= 0);
int prev_index = (int)(prev - _PyCode_CODE(code));
int prev_line = _Py_Instrumentation_GetLine(code, prev_index);
if (prev_line == line) {
int prev_opcode = _PyCode_CODE(code)[prev_index].op.code;
/* RESUME and INSTRUMENTED_RESUME are needed for the operation of
* instrumentation, so must never be hidden by an INSTRUMENTED_LINE.
*/
if (prev_opcode != RESUME && prev_opcode != INSTRUMENTED_RESUME) {
goto done;
}
}
uint8_t tools = code->_co_monitoring->line_tools != NULL ?
code->_co_monitoring->line_tools[i] :
(interp->monitors.tools[PY_MONITORING_EVENT_LINE] |
@ -1275,30 +1298,92 @@ initialize_lines(PyCodeObject *code)
line_data[i].original_opcode = 0;
break;
default:
/* Set original_opcode to the opcode iff the instruction
* starts a line, and thus should be instrumented.
* This saves having to perform this check every time the
* we turn instrumentation on or off, and serves as a sanity
* check when debugging.
*/
if (line != current_line && line >= 0) {
line_data[i].original_opcode = opcode;
}
else {
line_data[i].original_opcode = 0;
}
if (line >= 0) {
current_line = line;
}
current_line = line;
}
for (int j = 1; j < length; j++) {
line_data[i+j].original_opcode = 0;
line_data[i+j].line_delta = NO_LINE;
}
switch (opcode) {
case RETURN_VALUE:
case RAISE_VARARGS:
case RERAISE:
/* Blocks of code after these terminators
* should be treated as different lines */
current_line = -1;
}
i += length;
}
for (int i = code->_co_firsttraceable; i < code_len; ) {
int opcode = _Py_GetBaseOpcode(code, i);
int oparg = 0;
while (opcode == EXTENDED_ARG) {
oparg = (oparg << 8) | _PyCode_CODE(code)[i].op.arg;
i++;
opcode = _Py_GetBaseOpcode(code, i);
}
oparg = (oparg << 8) | _PyCode_CODE(code)[i].op.arg;
i += instruction_length(code, i);
int target = -1;
switch (opcode) {
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
case POP_JUMP_IF_NONE:
case POP_JUMP_IF_NOT_NONE:
case JUMP_FORWARD:
{
target = i + oparg;
break;
}
case FOR_ITER:
case SEND:
{
/* Skip over END_FOR/END_SEND */
target = i + oparg + 1;
break;
}
case JUMP_BACKWARD:
case JUMP_BACKWARD_NO_INTERRUPT:
{
target = i - oparg;
break;
}
default:
continue;
}
assert(target >= 0);
if (line_data[target].line_delta != NO_LINE) {
line_data[target].original_opcode = _Py_GetBaseOpcode(code, target);
}
}
/* Scan exception table */
unsigned char *start = (unsigned char *)PyBytes_AS_STRING(code->co_exceptiontable);
unsigned char *end = start + PyBytes_GET_SIZE(code->co_exceptiontable);
unsigned char *scan = start;
while (scan < end) {
int start_offset, size, handler;
scan = parse_varint(scan, &start_offset);
assert(start_offset >= 0 && start_offset < code_len);
scan = parse_varint(scan, &size);
assert(size >= 0 && start_offset+size <= code_len);
scan = parse_varint(scan, &handler);
assert(handler >= 0 && handler < code_len);
int depth_and_lasti;
scan = parse_varint(scan, &depth_and_lasti);
int original_opcode = _Py_GetBaseOpcode(code, handler);
/* Skip if not the start of a line.
* END_ASYNC_FOR is a bit special as it marks the end of
* an `async for` loop, which should not generate its own
* line event. */
if (line_data[handler].line_delta != NO_LINE &&
original_opcode != END_ASYNC_FOR) {
line_data[handler].original_opcode = original_opcode;
}
}
}
static void
@ -2010,7 +2095,7 @@ PyObject *_Py_CreateMonitoringObject(void)
if (mod == NULL) {
return NULL;
}
if (PyObject_SetAttrString(mod, "DISABLE", &DISABLE)) {
if (PyObject_SetAttrString(mod, "DISABLE", &_PyInstrumentation_DISABLE)) {
goto error;
}
if (PyObject_SetAttrString(mod, "MISSING", &_PyInstrumentation_MISSING)) {

View file

@ -4,6 +4,7 @@
#include <stddef.h>
#include "Python.h"
#include "opcode.h"
#include "pycore_ceval.h"
#include "pycore_object.h"
#include "pycore_sysmodule.h"
@ -213,7 +214,6 @@ trace_line(
if (line < 0) {
Py_RETURN_NONE;
}
frame ->f_last_traced_line = line;
Py_INCREF(frame);
frame->f_lineno = line;
int err = tstate->c_tracefunc(tstate->c_traceobj, frame, self->event, Py_None);
@ -245,14 +245,12 @@ sys_trace_line_func(
return NULL;
}
assert(args[0] == (PyObject *)frame->f_frame->f_code);
if (frame ->f_last_traced_line == line) {
/* Already traced this line */
Py_RETURN_NONE;
}
return trace_line(tstate, self, frame, line);
}
/* sys.settrace generates line events for all backward
* edges, even if on the same line.
* Handle that case here */
static PyObject *
sys_trace_jump_func(
_PyLegacyEventHandler *self, PyObject *const *args,
@ -268,61 +266,33 @@ sys_trace_jump_func(
assert(from >= 0);
int to = _PyLong_AsInt(args[2])/sizeof(_Py_CODEUNIT);
assert(to >= 0);
if (to > from) {
/* Forward jump */
return &_PyInstrumentation_DISABLE;
}
PyCodeObject *code = (PyCodeObject *)args[0];
assert(PyCode_Check(code));
/* We can call _Py_Instrumentation_GetLine because we always set
* line events for tracing */
int to_line = _Py_Instrumentation_GetLine(code, to);
int from_line = _Py_Instrumentation_GetLine(code, from);
if (to_line != from_line) {
/* Will be handled by target INSTRUMENTED_LINE */
return &_PyInstrumentation_DISABLE;
}
PyFrameObject *frame = PyEval_GetFrame();
if (frame == NULL) {
PyErr_SetString(PyExc_SystemError,
"Missing frame when calling trace function.");
return NULL;
}
if (!frame->f_trace_lines) {
Py_RETURN_NONE;
}
PyCodeObject *code = (PyCodeObject *)args[0];
assert(PyCode_Check(code));
assert(code == frame->f_frame->f_code);
/* We can call _Py_Instrumentation_GetLine because we always set
* line events for tracing */
int to_line = _Py_Instrumentation_GetLine(code, to);
/* Backward jump: Always generate event
* Forward jump: Only generate event if jumping to different line. */
if (to > from && frame->f_last_traced_line == to_line) {
/* Already traced this line */
if (!frame->f_trace_lines) {
Py_RETURN_NONE;
}
return trace_line(tstate, self, frame, to_line);
}
/* We don't care about the exception here,
* we just treat it as a possible new line
*/
static PyObject *
sys_trace_exception_handled(
_PyLegacyEventHandler *self, PyObject *const *args,
size_t nargsf, PyObject *kwnames
) {
assert(kwnames == NULL);
PyThreadState *tstate = _PyThreadState_GET();
if (tstate->c_tracefunc == NULL) {
Py_RETURN_NONE;
}
assert(PyVectorcall_NARGS(nargsf) == 3);
PyFrameObject *frame = PyEval_GetFrame();
PyCodeObject *code = (PyCodeObject *)args[0];
assert(PyCode_Check(code));
assert(code == frame->f_frame->f_code);
assert(PyLong_Check(args[1]));
int offset = _PyLong_AsInt(args[1])/sizeof(_Py_CODEUNIT);
/* We can call _Py_Instrumentation_GetLine because we always set
* line events for tracing */
int line = _Py_Instrumentation_GetLine(code, offset);
if (frame->f_last_traced_line == line) {
/* Already traced this line */
Py_RETURN_NONE;
}
return trace_line(tstate, self, frame, line);
}
PyTypeObject _PyLegacyEventHandler_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"sys.legacy_event_handler",
@ -487,7 +457,7 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg)
}
if (set_callbacks(PY_MONITORING_SYS_TRACE_ID,
(vectorcallfunc)sys_trace_jump_func, PyTrace_LINE,
PY_MONITORING_EVENT_JUMP, PY_MONITORING_EVENT_BRANCH)) {
PY_MONITORING_EVENT_JUMP, -1)) {
return -1;
}
if (set_callbacks(PY_MONITORING_SYS_TRACE_ID,
@ -495,11 +465,6 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg)
PY_MONITORING_EVENT_INSTRUCTION, -1)) {
return -1;
}
if (set_callbacks(PY_MONITORING_SYS_TRACE_ID,
(vectorcallfunc)sys_trace_exception_handled, PyTrace_LINE,
PY_MONITORING_EVENT_EXCEPTION_HANDLED, -1)) {
return -1;
}
}
int delta = (func != NULL) - (tstate->c_tracefunc != NULL);

View file

@ -367,8 +367,6 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 2;
case SWAP:
return (oparg-2) + 2;
case INSTRUMENTED_LINE:
return 0;
case INSTRUMENTED_INSTRUCTION:
return 0;
case INSTRUMENTED_JUMP_FORWARD:
@ -759,8 +757,6 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 1;
case SWAP:
return (oparg-2) + 2;
case INSTRUMENTED_LINE:
return 0;
case INSTRUMENTED_INSTRUCTION:
return 0;
case INSTRUMENTED_JUMP_FORWARD:
@ -976,7 +972,6 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = {
[COPY] = { true, INSTR_FMT_IB },
[BINARY_OP] = { true, INSTR_FMT_IBC },
[SWAP] = { true, INSTR_FMT_IB },
[INSTRUMENTED_LINE] = { true, INSTR_FMT_IX },
[INSTRUMENTED_INSTRUCTION] = { true, INSTR_FMT_IX },
[INSTRUMENTED_JUMP_FORWARD] = { true, INSTR_FMT_IB },
[INSTRUMENTED_JUMP_BACKWARD] = { true, INSTR_FMT_IB },