gh-106581: Project through calls (#108067)

This finishes the work begun in gh-107760. When, while projecting a superblock, we encounter a call to a short, simple function, the superblock will now enter the function using `_PUSH_FRAME`, continue through it, and leave it using `_POP_FRAME`, and then continue through the original code. Multiple frame pushes and pops are even possible. It is also possible to stop appending to the superblock in the middle of a called function, when running out of space or encountering an unsupported bytecode.
This commit is contained in:
Guido van Rossum 2023-08-17 11:29:58 -07:00 committed by GitHub
parent 292a22bdc2
commit 61c7249759
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 409 additions and 109 deletions

View file

@ -373,6 +373,8 @@ static PyTypeObject UOpExecutor_Type = {
.tp_as_sequence = &uop_as_sequence,
};
#define TRACE_STACK_SIZE 5
static int
translate_bytecode_to_trace(
PyCodeObject *code,
@ -380,10 +382,16 @@ translate_bytecode_to_trace(
_PyUOpInstruction *trace,
int buffer_size)
{
PyCodeObject *initial_code = code;
_Py_CODEUNIT *initial_instr = instr;
int trace_length = 0;
int max_length = buffer_size;
int reserved = 0;
struct {
PyCodeObject *code;
_Py_CODEUNIT *instr;
} trace_stack[TRACE_STACK_SIZE];
int trace_stack_depth = 0;
#ifdef Py_DEBUG
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
@ -441,6 +449,24 @@ translate_bytecode_to_trace(
// Reserve space for main+stub uops, plus 2 for SAVE_IP and EXIT_TRACE
#define RESERVE(main, stub) RESERVE_RAW((main) + (stub) + 2, uop_name(opcode))
// Trace stack operations (used by _PUSH_FRAME, _POP_FRAME)
#define TRACE_STACK_PUSH() \
if (trace_stack_depth >= TRACE_STACK_SIZE) { \
DPRINTF(2, "Trace stack overflow\n"); \
ADD_TO_TRACE(SAVE_IP, 0, 0); \
goto done; \
} \
trace_stack[trace_stack_depth].code = code; \
trace_stack[trace_stack_depth].instr = instr; \
trace_stack_depth++;
#define TRACE_STACK_POP() \
if (trace_stack_depth <= 0) { \
Py_FatalError("Trace stack underflow\n"); \
} \
trace_stack_depth--; \
code = trace_stack[trace_stack_depth].code; \
instr = trace_stack[trace_stack_depth].instr;
DPRINTF(4,
"Optimizing %s (%s:%d) at byte offset %d\n",
PyUnicode_AsUTF8(code->co_qualname),
@ -448,6 +474,7 @@ translate_bytecode_to_trace(
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code));
top: // Jump here after _PUSH_FRAME
for (;;) {
RESERVE_RAW(2, "epilogue"); // Always need space for SAVE_IP and EXIT_TRACE
ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code), 0);
@ -508,7 +535,7 @@ pop_jump_if_bool:
case JUMP_BACKWARD:
{
if (instr + 2 - oparg == initial_instr) {
if (instr + 2 - oparg == initial_instr && code == initial_code) {
RESERVE(1, 0);
ADD_TO_TRACE(JUMP_TO_TOP, 0, 0);
}
@ -573,6 +600,14 @@ pop_jump_if_bool:
// Reserve space for nuops (+ SAVE_IP + EXIT_TRACE)
int nuops = expansion->nuops;
RESERVE(nuops, 0);
if (expansion->uops[nuops-1].uop == _POP_FRAME) {
// Check for trace stack underflow now:
// We can't bail e.g. in the middle of
// LOAD_CONST + _POP_FRAME.
if (trace_stack_depth == 0) {
DPRINTF(2, "Trace stack underflow\n");
goto done;}
}
uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM
for (int i = 0; i < nuops; i++) {
oparg = orig_oparg;
@ -619,8 +654,57 @@ pop_jump_if_bool:
Py_FatalError("garbled expansion");
}
ADD_TO_TRACE(expansion->uops[i].uop, oparg, operand);
if (expansion->uops[i].uop == _POP_FRAME) {
TRACE_STACK_POP();
DPRINTF(2,
"Returning to %s (%s:%d) at byte offset %d\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(instr, code));
goto top;
}
if (expansion->uops[i].uop == _PUSH_FRAME) {
assert(i + 1 == nuops);
int func_version_offset =
offsetof(_PyCallCache, func_version)/sizeof(_Py_CODEUNIT)
// Add one to account for the actual opcode/oparg pair:
+ 1;
uint32_t func_version = read_u32(&instr[func_version_offset].cache);
PyFunctionObject *func = _PyFunction_LookupByVersion(func_version);
DPRINTF(3, "Function object: %p\n", func);
if (func != NULL) {
PyCodeObject *new_code = (PyCodeObject *)PyFunction_GET_CODE(func);
if (new_code == code) {
// Recursive call, bail (we could be here forever).
DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n",
PyUnicode_AsUTF8(new_code->co_qualname),
PyUnicode_AsUTF8(new_code->co_filename),
new_code->co_firstlineno);
ADD_TO_TRACE(SAVE_IP, 0, 0);
goto done;
}
if (new_code->co_version != func_version) {
// func.__code__ was updated.
// Perhaps it may happen again, so don't bother tracing.
// TODO: Reason about this -- is it better to bail or not?
DPRINTF(2, "Bailing because co_version != func_version\n");
ADD_TO_TRACE(SAVE_IP, 0, 0);
goto done;
}
// Increment IP to the return address
instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1;
TRACE_STACK_PUSH();
code = new_code;
instr = _PyCode_CODE(code);
DPRINTF(2,
"Continuing in %s (%s:%d) at byte offset %d\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(instr, code));
goto top;
}
ADD_TO_TRACE(SAVE_IP, 0, 0);
goto done;
}
@ -639,6 +723,10 @@ pop_jump_if_bool:
} // End for (;;)
done:
while (trace_stack_depth > 0) {
TRACE_STACK_POP();
}
assert(code == initial_code);
// Skip short traces like SAVE_IP, LOAD_FAST, SAVE_IP, EXIT_TRACE
if (trace_length > 3) {
ADD_TO_TRACE(EXIT_TRACE, 0, 0);