GH-115419: Tidy up tier 2 optimizer. Merge peephole pass into main pass (GH-117997)

This commit is contained in:
Mark Shannon 2024-04-18 11:09:30 +01:00 committed by GitHub
parent f70395786f
commit e32f6e9e4b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 169 additions and 132 deletions

View file

@ -362,6 +362,30 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit)
}
}
/* _PUSH_FRAME/_POP_FRAME's operand can be 0, a PyFunctionObject *, or a
* PyCodeObject *. Retrieve the code object if possible.
*/
static PyCodeObject *
get_code(_PyUOpInstruction *op)
{
assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME);
PyCodeObject *co = NULL;
uint64_t operand = op->operand;
if (operand == 0) {
return NULL;
}
if (operand & 1) {
co = (PyCodeObject *)(operand & ~1);
}
else {
PyFunctionObject *func = (PyFunctionObject *)operand;
assert(PyFunction_Check(func));
co = (PyCodeObject *)func->func_code;
}
assert(PyCode_Check(co));
return co;
}
/* 1 for success, 0 for not ready, cannot error at the moment. */
static int
optimize_uops(
@ -376,6 +400,10 @@ optimize_uops(
_Py_UOpsContext context;
_Py_UOpsContext *ctx = &context;
uint32_t opcode = UINT16_MAX;
int curr_space = 0;
int max_space = 0;
_PyUOpInstruction *first_valid_check_stack = NULL;
_PyUOpInstruction *corresponding_check_stack = NULL;
if (_Py_uop_abstractcontext_init(ctx) < 0) {
goto out_of_space;
@ -416,8 +444,7 @@ optimize_uops(
ctx->frame->stack_pointer = stack_pointer;
assert(STACK_LEVEL() >= 0);
}
_Py_uop_abstractcontext_fini(ctx);
return trace_len;
Py_UNREACHABLE();
out_of_space:
DPRINTF(3, "\n");
@ -443,9 +470,17 @@ hit_bottom:
_Py_uop_abstractcontext_fini(ctx);
return 0;
done:
/* Cannot optimize further, but there would be no benefit
* in retrying later */
/* Either reached the end or cannot optimize further, but there
* would be no benefit in retrying later */
_Py_uop_abstractcontext_fini(ctx);
if (first_valid_check_stack != NULL) {
assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE);
assert(max_space > 0);
assert(max_space <= INT_MAX);
assert(max_space <= INT32_MAX);
first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND;
first_valid_check_stack->operand = max_space;
}
return trace_len;
}
@ -532,124 +567,6 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
Py_UNREACHABLE();
}
/* _PUSH_FRAME/_POP_FRAME's operand can be 0, a PyFunctionObject *, or a
* PyCodeObject *. Retrieve the code object if possible.
*/
static PyCodeObject *
get_co(_PyUOpInstruction *op)
{
assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME);
PyCodeObject *co = NULL;
uint64_t operand = op->operand;
if (operand == 0) {
return NULL;
}
if (operand & 1) {
co = (PyCodeObject *)(operand & ~1);
}
else {
PyFunctionObject *func = (PyFunctionObject *)operand;
assert(PyFunction_Check(func));
co = (PyCodeObject *)func->func_code;
}
assert(PyCode_Check(co));
return co;
}
static void
peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size)
{
PyCodeObject *co = _PyFrame_GetCode(frame);
int curr_space = 0;
int max_space = 0;
_PyUOpInstruction *first_valid_check_stack = NULL;
_PyUOpInstruction *corresponding_check_stack = NULL;
for (int pc = 0; pc < buffer_size; pc++) {
int opcode = buffer[pc].opcode;
switch(opcode) {
case _LOAD_CONST: {
assert(co != NULL);
PyObject *val = PyTuple_GET_ITEM(co->co_consts, buffer[pc].oparg);
buffer[pc].opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE;
buffer[pc].operand = (uintptr_t)val;
break;
}
case _CHECK_PEP_523: {
/* Setting the eval frame function invalidates
* all executors, so no need to check dynamically */
if (_PyInterpreterState_GET()->eval_frame == NULL) {
buffer[pc].opcode = _NOP;
}
break;
}
case _CHECK_STACK_SPACE: {
assert(corresponding_check_stack == NULL);
corresponding_check_stack = &buffer[pc];
break;
}
case _PUSH_FRAME: {
assert(corresponding_check_stack != NULL);
co = get_co(&buffer[pc]);
if (co == NULL) {
// should be about to _EXIT_TRACE anyway
goto finish;
}
int framesize = co->co_framesize;
assert(framesize > 0);
curr_space += framesize;
if (curr_space < 0 || curr_space > INT32_MAX) {
// won't fit in signed 32-bit int
goto finish;
}
max_space = curr_space > max_space ? curr_space : max_space;
if (first_valid_check_stack == NULL) {
first_valid_check_stack = corresponding_check_stack;
}
else {
// delete all but the first valid _CHECK_STACK_SPACE
corresponding_check_stack->opcode = _NOP;
}
corresponding_check_stack = NULL;
break;
}
case _POP_FRAME: {
assert(corresponding_check_stack == NULL);
assert(co != NULL);
int framesize = co->co_framesize;
assert(framesize > 0);
assert(framesize <= curr_space);
curr_space -= framesize;
co = get_co(&buffer[pc]);
if (co == NULL) {
// might be impossible, but bailing is still safe
goto finish;
}
break;
}
case _JUMP_TO_TOP:
case _EXIT_TRACE:
goto finish;
#ifdef Py_DEBUG
case _CHECK_STACK_SPACE_OPERAND: {
/* We should never see _CHECK_STACK_SPACE_OPERANDs.
* They are only created at the end of this pass. */
Py_UNREACHABLE();
}
#endif
}
}
Py_UNREACHABLE();
finish:
if (first_valid_check_stack != NULL) {
assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE);
assert(max_space > 0);
assert(max_space <= INT_MAX);
assert(max_space <= INT32_MAX);
first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND;
first_valid_check_stack->operand = max_space;
}
}
// 0 - failure, no error raised, just fall back to Tier 1
// -1 - failure, and raise error
// > 0 - length of optimized trace
@ -669,8 +586,6 @@ _Py_uop_analyze_and_optimize(
return err;
}
peephole_opt(frame, buffer, length);
length = optimize_uops(
_PyFrame_GetCode(frame), buffer,
length, curr_stacklen, dependencies);