mirror of
https://github.com/python/cpython.git
synced 2025-07-12 13:55:34 +00:00
bpo-41323: Perform 'peephole' optimizations directly on the CFG. (GH-21517)
* Move 'peephole' optimizations into compile.c and perform them directly on the CFG.
This commit is contained in:
parent
ba18c0b13b
commit
6e8128f02e
12 changed files with 4365 additions and 4584 deletions
374
Python/compile.c
374
Python/compile.c
|
@ -9,7 +9,7 @@
|
|||
* 3. Generate code for basic blocks. See compiler_mod() in this file.
|
||||
* 4. Assemble the basic blocks into final code. See assemble() in
|
||||
* this file.
|
||||
* 5. Optimize the byte code (peephole optimizations). See peephole.c
|
||||
* 5. Optimize the byte code (peephole optimizations).
|
||||
*
|
||||
* Note that compiler_mod() suggests module, but the module ast type
|
||||
* (mod_ty) has cases for expressions and interactive statements.
|
||||
|
@ -69,6 +69,7 @@ typedef struct basicblock_ {
|
|||
struct basicblock_ *b_next;
|
||||
/* b_return is true if a RETURN_VALUE opcode is inserted. */
|
||||
unsigned b_return : 1;
|
||||
unsigned b_reachable : 1;
|
||||
/* depth of stack upon entry of block, computed by stackdepth() */
|
||||
int b_startdepth;
|
||||
/* instruction offset for block, computed by assemble_jump_offsets() */
|
||||
|
@ -499,7 +500,7 @@ compiler_unit_check(struct compiler_unit *u)
|
|||
assert((uintptr_t)block != 0xdbdbdbdbU);
|
||||
if (block->b_instr != NULL) {
|
||||
assert(block->b_ialloc > 0);
|
||||
assert(block->b_iused > 0);
|
||||
assert(block->b_iused >= 0);
|
||||
assert(block->b_ialloc >= block->b_iused);
|
||||
}
|
||||
else {
|
||||
|
@ -3645,6 +3646,11 @@ compiler_boolop(struct compiler *c, expr_ty e)
|
|||
for (i = 0; i < n; ++i) {
|
||||
VISIT(c, expr, (expr_ty)asdl_seq_GET(s, i));
|
||||
ADDOP_JABS(c, jumpi, end);
|
||||
basicblock *next = compiler_new_block(c);
|
||||
if (next == NULL) {
|
||||
return 0;
|
||||
}
|
||||
compiler_use_next_block(c, next);
|
||||
}
|
||||
VISIT(c, expr, (expr_ty)asdl_seq_GET(s, n));
|
||||
compiler_use_next_block(c, end);
|
||||
|
@ -5861,28 +5867,24 @@ merge_const_tuple(struct compiler *c, PyObject **tuple)
|
|||
}
|
||||
|
||||
static PyCodeObject *
|
||||
makecode(struct compiler *c, struct assembler *a)
|
||||
makecode(struct compiler *c, struct assembler *a, PyObject *consts)
|
||||
{
|
||||
PyObject *tmp;
|
||||
PyCodeObject *co = NULL;
|
||||
PyObject *consts = NULL;
|
||||
PyObject *names = NULL;
|
||||
PyObject *varnames = NULL;
|
||||
PyObject *name = NULL;
|
||||
PyObject *freevars = NULL;
|
||||
PyObject *cellvars = NULL;
|
||||
PyObject *bytecode = NULL;
|
||||
Py_ssize_t nlocals;
|
||||
int nlocals_int;
|
||||
int flags;
|
||||
int posorkeywordargcount, posonlyargcount, kwonlyargcount, maxdepth;
|
||||
|
||||
consts = consts_dict_keys_inorder(c->u->u_consts);
|
||||
names = dict_keys_inorder(c->u->u_names, 0);
|
||||
varnames = dict_keys_inorder(c->u->u_varnames, 0);
|
||||
if (!consts || !names || !varnames)
|
||||
if (!names || !varnames) {
|
||||
goto error;
|
||||
|
||||
}
|
||||
cellvars = dict_keys_inorder(c->u->u_cellvars, 0);
|
||||
if (!cellvars)
|
||||
goto error;
|
||||
|
@ -5906,16 +5908,12 @@ makecode(struct compiler *c, struct assembler *a)
|
|||
if (flags < 0)
|
||||
goto error;
|
||||
|
||||
bytecode = PyCode_Optimize(a->a_bytecode, consts, names, a->a_lnotab);
|
||||
if (!bytecode)
|
||||
consts = PyList_AsTuple(consts); /* PyCode_New requires a tuple */
|
||||
if (consts == NULL) {
|
||||
goto error;
|
||||
|
||||
tmp = PyList_AsTuple(consts); /* PyCode_New requires a tuple */
|
||||
if (!tmp)
|
||||
goto error;
|
||||
Py_DECREF(consts);
|
||||
consts = tmp;
|
||||
}
|
||||
if (!merge_const_tuple(c, &consts)) {
|
||||
Py_DECREF(consts);
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
@ -5924,21 +5922,21 @@ makecode(struct compiler *c, struct assembler *a)
|
|||
kwonlyargcount = Py_SAFE_DOWNCAST(c->u->u_kwonlyargcount, Py_ssize_t, int);
|
||||
maxdepth = stackdepth(c);
|
||||
if (maxdepth < 0) {
|
||||
Py_DECREF(consts);
|
||||
goto error;
|
||||
}
|
||||
co = PyCode_NewWithPosOnlyArgs(posonlyargcount+posorkeywordargcount,
|
||||
posonlyargcount, kwonlyargcount, nlocals_int,
|
||||
maxdepth, flags, bytecode, consts, names,
|
||||
maxdepth, flags, a->a_bytecode, consts, names,
|
||||
varnames, freevars, cellvars, c->c_filename,
|
||||
c->u->u_name, c->u->u_firstlineno, a->a_lnotab);
|
||||
Py_DECREF(consts);
|
||||
error:
|
||||
Py_XDECREF(consts);
|
||||
Py_XDECREF(names);
|
||||
Py_XDECREF(varnames);
|
||||
Py_XDECREF(name);
|
||||
Py_XDECREF(freevars);
|
||||
Py_XDECREF(cellvars);
|
||||
Py_XDECREF(bytecode);
|
||||
return co;
|
||||
}
|
||||
|
||||
|
@ -5976,6 +5974,9 @@ dump_basicblock(const basicblock *b)
|
|||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
optimize_cfg(struct assembler *a, PyObject *consts);
|
||||
|
||||
static PyCodeObject *
|
||||
assemble(struct compiler *c, int addNone)
|
||||
{
|
||||
|
@ -5983,6 +5984,7 @@ assemble(struct compiler *c, int addNone)
|
|||
struct assembler a;
|
||||
int i, j, nblocks;
|
||||
PyCodeObject *co = NULL;
|
||||
PyObject *consts = NULL;
|
||||
|
||||
/* Make sure every block that falls off the end returns None.
|
||||
XXX NEXT_BLOCK() isn't quite right, because if the last
|
||||
|
@ -6013,6 +6015,14 @@ assemble(struct compiler *c, int addNone)
|
|||
goto error;
|
||||
dfs(c, entryblock, &a, nblocks);
|
||||
|
||||
consts = consts_dict_keys_inorder(c->u->u_consts);
|
||||
if (consts == NULL) {
|
||||
goto error;
|
||||
}
|
||||
if (optimize_cfg(&a, consts)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Can't modify the bytecode after computing jump offsets. */
|
||||
assemble_jump_offsets(&a, c);
|
||||
|
||||
|
@ -6029,8 +6039,9 @@ assemble(struct compiler *c, int addNone)
|
|||
if (_PyBytes_Resize(&a.a_bytecode, a.a_offset * sizeof(_Py_CODEUNIT)) < 0)
|
||||
goto error;
|
||||
|
||||
co = makecode(c, &a);
|
||||
co = makecode(c, &a, consts);
|
||||
error:
|
||||
Py_XDECREF(consts);
|
||||
assemble_free(&a);
|
||||
return co;
|
||||
}
|
||||
|
@ -6042,3 +6053,324 @@ PyAST_Compile(mod_ty mod, const char *filename, PyCompilerFlags *flags,
|
|||
{
|
||||
return PyAST_CompileEx(mod, filename, flags, -1, arena);
|
||||
}
|
||||
|
||||
|
||||
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
|
||||
with LOAD_CONST (c1, c2, ... cn).
|
||||
The consts table must still be in list form so that the
|
||||
new constant (c1, c2, ... cn) can be appended.
|
||||
Called with codestr pointing to the first LOAD_CONST.
|
||||
*/
|
||||
static int
|
||||
fold_tuple_on_constants(struct instr *inst,
|
||||
int n, PyObject *consts)
|
||||
{
|
||||
/* Pre-conditions */
|
||||
assert(PyList_CheckExact(consts));
|
||||
assert(inst[n].i_opcode == BUILD_TUPLE);
|
||||
assert(inst[n].i_oparg == n);
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (inst[i].i_opcode != LOAD_CONST) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Buildup new tuple of constants */
|
||||
PyObject *newconst = PyTuple_New(n);
|
||||
if (newconst == NULL) {
|
||||
return -1;
|
||||
}
|
||||
for (int i = 0; i < n; i++) {
|
||||
int arg = inst[i].i_oparg;
|
||||
PyObject *constant = PyList_GET_ITEM(consts, arg);
|
||||
Py_INCREF(constant);
|
||||
PyTuple_SET_ITEM(newconst, i, constant);
|
||||
}
|
||||
Py_ssize_t index = PyList_GET_SIZE(consts);
|
||||
#if SIZEOF_SIZE_T > SIZEOF_INT
|
||||
if ((size_t)index >= UINT_MAX - 1) {
|
||||
Py_DECREF(newconst);
|
||||
PyErr_SetString(PyExc_OverflowError, "too many constants");
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
if (PyList_Append(consts, newconst)) {
|
||||
Py_DECREF(newconst);
|
||||
return -1;
|
||||
}
|
||||
Py_DECREF(newconst);
|
||||
for (int i = 0; i < n; i++) {
|
||||
inst[i].i_opcode = NOP;
|
||||
}
|
||||
inst[n].i_opcode = LOAD_CONST;
|
||||
inst[n].i_oparg = index;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Optimization */
|
||||
static int
|
||||
optimize_basic_block(basicblock *bb, PyObject *consts)
|
||||
{
|
||||
assert(PyList_CheckExact(consts));
|
||||
struct instr nop;
|
||||
nop.i_opcode = NOP;
|
||||
struct instr *target;
|
||||
int lineno;
|
||||
for (int i = 0; i < bb->b_iused; i++) {
|
||||
struct instr *inst = &bb->b_instr[i];
|
||||
int oparg = inst->i_oparg;
|
||||
int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
|
||||
if (inst->i_jabs || inst->i_jrel) {
|
||||
/* Skip over empty basic blocks. */
|
||||
while (inst->i_target->b_iused == 0) {
|
||||
inst->i_target = inst->i_target->b_next;
|
||||
}
|
||||
target = &inst->i_target->b_instr[0];
|
||||
}
|
||||
else {
|
||||
target = &nop;
|
||||
}
|
||||
switch (inst->i_opcode) {
|
||||
/* Skip over LOAD_CONST trueconst
|
||||
POP_JUMP_IF_FALSE xx. This improves
|
||||
"while 1" performance. */
|
||||
case LOAD_CONST:
|
||||
if (nextop != POP_JUMP_IF_FALSE) {
|
||||
break;
|
||||
}
|
||||
PyObject* cnt = PyList_GET_ITEM(consts, oparg);
|
||||
int is_true = PyObject_IsTrue(cnt);
|
||||
if (is_true == -1) {
|
||||
goto error;
|
||||
}
|
||||
if (is_true == 1) {
|
||||
inst->i_opcode = NOP;
|
||||
bb->b_instr[i+1].i_opcode = NOP;
|
||||
bb->b_instr[i+1].i_jabs = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
/* Try to fold tuples of constants.
|
||||
Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
|
||||
Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
|
||||
Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
|
||||
case BUILD_TUPLE:
|
||||
if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
|
||||
switch(oparg) {
|
||||
case 1:
|
||||
inst->i_opcode = NOP;
|
||||
bb->b_instr[i+1].i_opcode = NOP;
|
||||
break;
|
||||
case 2:
|
||||
inst->i_opcode = ROT_TWO;
|
||||
bb->b_instr[i+1].i_opcode = NOP;
|
||||
break;
|
||||
case 3:
|
||||
inst->i_opcode = ROT_THREE;
|
||||
bb->b_instr[i+1].i_opcode = ROT_TWO;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (i >= oparg) {
|
||||
if (fold_tuple_on_constants(inst-oparg, oparg, consts)) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* Simplify conditional jump to conditional jump where the
|
||||
result of the first test implies the success of a similar
|
||||
test or the failure of the opposite test.
|
||||
Arises in code like:
|
||||
"a and b or c"
|
||||
"(a and b) and c"
|
||||
"(a or b) or c"
|
||||
"(a or b) and c"
|
||||
x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_FALSE_OR_POP z
|
||||
--> x:JUMP_IF_FALSE_OR_POP z
|
||||
x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_TRUE_OR_POP z
|
||||
--> x:POP_JUMP_IF_FALSE y+1
|
||||
where y+1 is the instruction following the second test.
|
||||
*/
|
||||
case JUMP_IF_FALSE_OR_POP:
|
||||
switch(target->i_opcode) {
|
||||
case POP_JUMP_IF_FALSE:
|
||||
*inst = *target;
|
||||
break;
|
||||
case JUMP_ABSOLUTE:
|
||||
case JUMP_FORWARD:
|
||||
case JUMP_IF_FALSE_OR_POP:
|
||||
inst->i_target = target->i_target;
|
||||
break;
|
||||
case JUMP_IF_TRUE_OR_POP:
|
||||
assert (inst->i_target->b_iused == 1);
|
||||
inst->i_opcode = POP_JUMP_IF_FALSE;
|
||||
inst->i_target = inst->i_target->b_next;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case JUMP_IF_TRUE_OR_POP:
|
||||
switch(target->i_opcode) {
|
||||
case POP_JUMP_IF_TRUE:
|
||||
*inst = *target;
|
||||
break;
|
||||
case JUMP_ABSOLUTE:
|
||||
case JUMP_FORWARD:
|
||||
case JUMP_IF_TRUE_OR_POP:
|
||||
inst->i_target = target->i_target;
|
||||
break;
|
||||
case JUMP_IF_FALSE_OR_POP:
|
||||
assert (inst->i_target->b_iused == 1);
|
||||
inst->i_opcode = POP_JUMP_IF_TRUE;
|
||||
inst->i_target = inst->i_target->b_next;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case POP_JUMP_IF_FALSE:
|
||||
switch(target->i_opcode) {
|
||||
case JUMP_ABSOLUTE:
|
||||
case JUMP_FORWARD:
|
||||
inst->i_target = target->i_target;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case POP_JUMP_IF_TRUE:
|
||||
switch(target->i_opcode) {
|
||||
case JUMP_ABSOLUTE:
|
||||
case JUMP_FORWARD:
|
||||
inst->i_target = target->i_target;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case JUMP_ABSOLUTE:
|
||||
case JUMP_FORWARD:
|
||||
switch(target->i_opcode) {
|
||||
case JUMP_FORWARD:
|
||||
inst->i_target = target->i_target;
|
||||
break;
|
||||
case JUMP_ABSOLUTE:
|
||||
case RETURN_VALUE:
|
||||
case RERAISE:
|
||||
case RAISE_VARARGS:
|
||||
lineno = inst->i_lineno;
|
||||
*inst = *target;
|
||||
inst->i_lineno = lineno;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
error:
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
clean_basic_block(basicblock *bb) {
|
||||
/* Remove NOPs and any code following a return or re-raise. */
|
||||
int dest = 0;
|
||||
for (int src = 0; src < bb->b_iused; src++) {
|
||||
switch(bb->b_instr[src].i_opcode) {
|
||||
case NOP:
|
||||
/* skip */
|
||||
break;
|
||||
case RETURN_VALUE:
|
||||
case RERAISE:
|
||||
bb->b_next = NULL;
|
||||
bb->b_instr[dest] = bb->b_instr[src];
|
||||
dest++;
|
||||
goto end;
|
||||
default:
|
||||
if (dest != src) {
|
||||
bb->b_instr[dest] = bb->b_instr[src];
|
||||
}
|
||||
dest++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
end:
|
||||
assert(dest <= bb->b_iused);
|
||||
bb->b_iused = dest;
|
||||
}
|
||||
|
||||
static int
|
||||
mark_reachable(struct assembler *a) {
|
||||
basicblock **stack, **sp;
|
||||
sp = stack = (basicblock **)PyObject_Malloc(sizeof(basicblock *) * a->a_nblocks);
|
||||
if (stack == NULL) {
|
||||
return -1;
|
||||
}
|
||||
basicblock *entry = a->a_reverse_postorder[0];
|
||||
entry->b_reachable = 1;
|
||||
*sp++ = entry;
|
||||
while (sp > stack) {
|
||||
basicblock *b = *(--sp);
|
||||
if (b->b_next && b->b_next->b_reachable == 0) {
|
||||
b->b_next->b_reachable = 1;
|
||||
*sp++ = b->b_next;
|
||||
}
|
||||
for (int i = 0; i < b->b_iused; i++) {
|
||||
basicblock *target;
|
||||
if (b->b_instr[i].i_jrel || b->b_instr[i].i_jabs) {
|
||||
target = b->b_instr[i].i_target;
|
||||
if (target->b_reachable == 0) {
|
||||
target->b_reachable = 1;
|
||||
*sp++ = target;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
PyObject_Free(stack);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Perform basic peephole optimizations on a control flow graph.
|
||||
The consts object should still be in list form to allow new constants
|
||||
to be appended.
|
||||
|
||||
All transformations keep the code size the same or smaller.
|
||||
For those that reduce size, the gaps are initially filled with
|
||||
NOPs. Later those NOPs are removed.
|
||||
*/
|
||||
|
||||
static int
|
||||
optimize_cfg(struct assembler *a, PyObject *consts)
|
||||
{
|
||||
for (int i = 0; i < a->a_nblocks; i++) {
|
||||
if (optimize_basic_block(a->a_reverse_postorder[i], consts)) {
|
||||
return -1;
|
||||
}
|
||||
clean_basic_block(a->a_reverse_postorder[i]);
|
||||
assert(a->a_reverse_postorder[i]->b_reachable == 0);
|
||||
}
|
||||
if (mark_reachable(a)) {
|
||||
return -1;
|
||||
}
|
||||
/* Delete unreachable instructions */
|
||||
for (int i = 0; i < a->a_nblocks; i++) {
|
||||
if (a->a_reverse_postorder[i]->b_reachable == 0) {
|
||||
a->a_reverse_postorder[i]->b_iused = 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Retained for API compatibility.
|
||||
* Optimization is now done in optimize_cfg */
|
||||
|
||||
PyObject *
|
||||
PyCode_Optimize(PyObject *code, PyObject* Py_UNUSED(consts),
|
||||
PyObject *Py_UNUSED(names), PyObject *Py_UNUSED(lnotab_obj))
|
||||
{
|
||||
Py_INCREF(code);
|
||||
return code;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue