bpo-42246: Eliminate jumps to exit blocks by copying those blocks. (#23251)

* Compiler: eliminate jumps to short exit blocks by copying.
This commit is contained in:
Mark Shannon 2020-11-12 19:49:33 +00:00 committed by GitHub
parent 750c5abf43
commit cc75ab791d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 4662 additions and 4664 deletions

View file

@ -96,6 +96,10 @@ typedef struct basicblock_ {
/* b_return is true if a RETURN_VALUE opcode is inserted. */
unsigned b_return : 1;
unsigned b_reachable : 1;
/* Basic block has no fall through (it ends with a return, raise or jump) */
unsigned b_nofallthrough : 1;
/* Basic block exits scope (it ends with a return or raise) */
unsigned b_exit : 1;
/* depth of stack upon entry of block, computed by stackdepth() */
int b_startdepth;
/* instruction offset for block, computed by assemble_jump_offsets() */
@ -5434,28 +5438,14 @@ struct assembler {
PyObject *a_bytecode; /* string containing bytecode */
int a_offset; /* offset into bytecode */
int a_nblocks; /* number of reachable blocks */
basicblock **a_reverse_postorder; /* list of blocks in dfs postorder */
PyObject *a_lnotab; /* string containing lnotab */
int a_lnotab_off; /* offset into lnotab */
int a_prevlineno; /* lineno of last emitted line in line table */
int a_lineno; /* lineno of last emitted instruction */
int a_lineno_start; /* bytecode start offset of current lineno */
basicblock *a_entry;
};
static void
dfs(struct compiler *c, basicblock *b, struct assembler *a, int end)
{
/* There is no real depth-first-search to do here because all the
* blocks are emitted in topological order already, so we just need to
* follow the b_next pointers and place them in a->a_reverse_postorder in
* reverse order and make sure that the first one starts at 0. */
for (a->a_nblocks = 0; b != NULL; b = b->b_next) {
a->a_reverse_postorder[a->a_nblocks++] = b;
}
}
Py_LOCAL_INLINE(void)
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
{
@ -5553,12 +5543,7 @@ assemble_init(struct assembler *a, int nblocks, int firstlineno)
PyErr_NoMemory();
return 0;
}
a->a_reverse_postorder = (basicblock **)PyObject_Malloc(
sizeof(basicblock *) * nblocks);
if (!a->a_reverse_postorder) {
PyErr_NoMemory();
return 0;
}
return 1;
}
@ -5567,8 +5552,6 @@ assemble_free(struct assembler *a)
{
Py_XDECREF(a->a_bytecode);
Py_XDECREF(a->a_lnotab);
if (a->a_reverse_postorder)
PyObject_Free(a->a_reverse_postorder);
}
static int
@ -5697,8 +5680,7 @@ assemble_jump_offsets(struct assembler *a, struct compiler *c)
Replace block pointer with position in bytecode. */
do {
totsize = 0;
for (i = 0; i < a->a_nblocks; i++) {
b = a->a_reverse_postorder[i];
for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
bsize = blocksize(b);
b->b_offset = totsize;
totsize += bsize;
@ -5966,7 +5948,7 @@ assemble(struct compiler *c, int addNone)
{
basicblock *b, *entryblock;
struct assembler a;
int i, j, nblocks;
int j, nblocks;
PyCodeObject *co = NULL;
PyObject *consts = NULL;
@ -5997,7 +5979,8 @@ assemble(struct compiler *c, int addNone)
}
if (!assemble_init(&a, nblocks, c->u->u_firstlineno))
goto error;
dfs(c, entryblock, &a, nblocks);
a.a_entry = entryblock;
a.a_nblocks = nblocks;
consts = consts_dict_keys_inorder(c->u->u_consts);
if (consts == NULL) {
@ -6010,9 +5993,8 @@ assemble(struct compiler *c, int addNone)
/* Can't modify the bytecode after computing jump offsets. */
assemble_jump_offsets(&a, c);
/* Emit code in reverse postorder from dfs. */
for (i = 0; i < a.a_nblocks; i++) {
b = a.a_reverse_postorder[i];
/* Emit code. */
for(b = entryblock; b != NULL; b = b->b_next) {
for (j = 0; j < b->b_iused; j++)
if (!assemble_emit(&a, &b->b_instr[j]))
goto error;
@ -6097,6 +6079,8 @@ fold_tuple_on_constants(struct instr *inst,
return 0;
}
/* Maximum size of basic block that should be copied in optimizer */
#define MAX_COPY_SIZE 4
/* Optimization */
static int
@ -6238,19 +6222,29 @@ optimize_basic_block(basicblock *bb, PyObject *consts)
case JUMP_ABSOLUTE:
case JUMP_FORWARD:
assert (i == bb->b_iused-1);
switch(target->i_opcode) {
case JUMP_FORWARD:
inst->i_target = target->i_target;
break;
case JUMP_ABSOLUTE:
case RETURN_VALUE:
case RERAISE:
case RAISE_VARARGS:
lineno = inst->i_lineno;
*inst = *target;
inst->i_lineno = lineno;
break;
}
if (inst->i_target->b_exit && inst->i_target->b_iused <= MAX_COPY_SIZE) {
basicblock *to_copy = inst->i_target;
*inst = to_copy->b_instr[0];
for (i = 1; i < to_copy->b_iused; i++) {
int index = compiler_next_instr(bb);
if (index < 0) {
return -1;
}
bb->b_instr[index] = to_copy->b_instr[i];
}
bb->b_exit = 1;
}
break;
}
}
@ -6262,52 +6256,63 @@ error:
static void
clean_basic_block(basicblock *bb) {
/* Remove NOPs and any code following a return or re-raise. */
/* Remove NOPs. */
int dest = 0;
int prev_lineno = -1;
for (int src = 0; src < bb->b_iused; src++) {
int lineno = bb->b_instr[src].i_lineno;
switch(bb->b_instr[src].i_opcode) {
case RETURN_VALUE:
case RERAISE:
bb->b_next = NULL;
bb->b_instr[dest] = bb->b_instr[src];
dest++;
goto end;
case NOP:
{
/* Eliminate no-op if it doesn't have a line number, or
* if the next instruction has same line number or no line number, or
* if the previous instruction had the same line number. */
if (lineno < 0) {
break;
}
if (prev_lineno == lineno) {
break;
}
if (src < bb->b_iused - 1) {
int next_lineno = bb->b_instr[src+1].i_lineno;
if (next_lineno < 0 || next_lineno == lineno) {
bb->b_instr[src+1].i_lineno = lineno;
break;
}
if (bb->b_instr[src].i_opcode == NOP) {
/* Eliminate no-op if it doesn't have a line number, or
* if the next instruction has same line number or no line number, or
* if the previous instruction had the same line number. */
if (lineno < 0) {
continue;
}
if (prev_lineno == lineno) {
continue;
}
if (src < bb->b_iused - 1) {
int next_lineno = bb->b_instr[src+1].i_lineno;
if (next_lineno < 0 || next_lineno == lineno) {
bb->b_instr[src+1].i_lineno = lineno;
continue;
}
}
/* fallthrough */
default:
if (dest != src) {
bb->b_instr[dest] = bb->b_instr[src];
}
dest++;
prev_lineno = lineno;
break;
}
if (dest != src) {
bb->b_instr[dest] = bb->b_instr[src];
}
dest++;
prev_lineno = lineno;
}
end:
assert(dest <= bb->b_iused);
bb->b_iused = dest;
}
static void
normalise_basic_block(basicblock *bb) {
/* Remove any code following a return or re-raise,
and mark those blocks as exit and/or nofallthrough. */
for (int i = 0; i < bb->b_iused; i++) {
switch(bb->b_instr[i].i_opcode) {
case RETURN_VALUE:
case RAISE_VARARGS:
case RERAISE:
bb->b_iused = i+1;
bb->b_exit = 1;
bb->b_nofallthrough = 1;
return;
case JUMP_ABSOLUTE:
case JUMP_FORWARD:
bb->b_iused = i+1;
bb->b_nofallthrough = 1;
return;
}
}
}
static int
mark_reachable(struct assembler *a) {
basicblock **stack, **sp;
@ -6315,12 +6320,11 @@ mark_reachable(struct assembler *a) {
if (stack == NULL) {
return -1;
}
basicblock *entry = a->a_reverse_postorder[0];
entry->b_reachable = 1;
*sp++ = entry;
a->a_entry->b_reachable = 1;
*sp++ = a->a_entry;
while (sp > stack) {
basicblock *b = *(--sp);
if (b->b_next && b->b_next->b_reachable == 0) {
if (b->b_next && !b->b_nofallthrough && b->b_next->b_reachable == 0) {
b->b_next->b_reachable = 1;
*sp++ = b->b_next;
}
@ -6352,20 +6356,23 @@ mark_reachable(struct assembler *a) {
static int
optimize_cfg(struct assembler *a, PyObject *consts)
{
for (int i = 0; i < a->a_nblocks; i++) {
if (optimize_basic_block(a->a_reverse_postorder[i], consts)) {
for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
normalise_basic_block(b);
}
for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
if (optimize_basic_block(b, consts)) {
return -1;
}
clean_basic_block(a->a_reverse_postorder[i]);
assert(a->a_reverse_postorder[i]->b_reachable == 0);
clean_basic_block(b);
assert(b->b_reachable == 0);
}
if (mark_reachable(a)) {
return -1;
}
/* Delete unreachable instructions */
for (int i = 0; i < a->a_nblocks; i++) {
if (a->a_reverse_postorder[i]->b_reachable == 0) {
a->a_reverse_postorder[i]->b_iused = 0;
for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
if (b->b_reachable == 0) {
b->b_iused = 0;
}
}
return 0;