This patch by Antoine Pitrou optimizes the bytecode for conditional branches by
merging the following "POP_TOP" instruction into the conditional jump.  For
example, the list comprehension "[x for x in l if not x]" produced the
following bytecode:

  1           0 BUILD_LIST               0
              3 LOAD_FAST                0 (.0)
        >>    6 FOR_ITER                23 (to 32)
              9 STORE_FAST               1 (x)
             12 LOAD_FAST                1 (x)
             15 JUMP_IF_TRUE            10 (to 28)
             18 POP_TOP
             19 LOAD_FAST                1 (x)
             22 LIST_APPEND              2
             25 JUMP_ABSOLUTE            6
        >>   28 POP_TOP
             29 JUMP_ABSOLUTE            6
        >>   32 RETURN_VALUE

but after the patch it produces the following bytecode:

  1           0 BUILD_LIST               0
              3 LOAD_FAST                0 (.0)
        >>    6 FOR_ITER                18 (to 27)
              9 STORE_FAST               1 (x)
             12 LOAD_FAST                1 (x)
             15 POP_JUMP_IF_TRUE         6
             18 LOAD_FAST                1 (x)
             21 LIST_APPEND              2
             24 JUMP_ABSOLUTE            6
        >>   27 RETURN_VALUE

Notice that not only the code is shorter, but the conditional jump
(POP_JUMP_IF_TRUE) jumps right to the start of the loop instead of going through
the JUMP_ABSOLUTE at the end. "continue" statements are helped
similarly.

Furthermore, the old jump opcodes (JUMP_IF_FALSE, JUMP_IF_TRUE) have been
replaced by two new opcodes:
- JUMP_IF_TRUE_OR_POP, which jumps if true and pops otherwise
- JUMP_IF_FALSE_OR_POP, which jumps if false and pops otherwise
This commit is contained in:
Jeffrey Yasskin 2009-02-25 02:25:04 +00:00
parent 0a68b01d64
commit 9de7ec7868
9 changed files with 195 additions and 129 deletions

View file

@ -1295,7 +1295,6 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
SETLOCAL(oparg, v);
FAST_DISPATCH();
PREDICTED(POP_TOP);
TARGET(POP_TOP)
v = POP();
Py_DECREF(v);
@ -2204,8 +2203,8 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
Py_DECREF(w);
SET_TOP(x);
if (x == NULL) break;
PREDICT(JUMP_IF_FALSE);
PREDICT(JUMP_IF_TRUE);
PREDICT(POP_JUMP_IF_FALSE);
PREDICT(POP_JUMP_IF_TRUE);
DISPATCH();
TARGET(IMPORT_NAME)
@ -2282,44 +2281,95 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
JUMPBY(oparg);
FAST_DISPATCH();
PREDICTED_WITH_ARG(JUMP_IF_FALSE);
TARGET(JUMP_IF_FALSE)
w = TOP();
PREDICTED_WITH_ARG(POP_JUMP_IF_FALSE);
TARGET(POP_JUMP_IF_FALSE)
w = POP();
if (w == Py_True) {
PREDICT(POP_TOP);
Py_DECREF(w);
FAST_DISPATCH();
}
if (w == Py_False) {
JUMPBY(oparg);
Py_DECREF(w);
JUMPTO(oparg);
FAST_DISPATCH();
}
err = PyObject_IsTrue(w);
Py_DECREF(w);
if (err > 0)
err = 0;
else if (err == 0)
JUMPBY(oparg);
JUMPTO(oparg);
else
break;
DISPATCH();
PREDICTED_WITH_ARG(JUMP_IF_TRUE);
TARGET(JUMP_IF_TRUE)
w = TOP();
PREDICTED_WITH_ARG(POP_JUMP_IF_TRUE);
TARGET(POP_JUMP_IF_TRUE)
w = POP();
if (w == Py_False) {
PREDICT(POP_TOP);
Py_DECREF(w);
FAST_DISPATCH();
}
if (w == Py_True) {
JUMPBY(oparg);
Py_DECREF(w);
JUMPTO(oparg);
FAST_DISPATCH();
}
err = PyObject_IsTrue(w);
Py_DECREF(w);
if (err > 0) {
err = 0;
JUMPTO(oparg);
}
else if (err == 0)
;
else
break;
DISPATCH();
TARGET(JUMP_IF_FALSE_OR_POP)
w = TOP();
if (w == Py_True) {
STACKADJ(-1);
Py_DECREF(w);
FAST_DISPATCH();
}
if (w == Py_False) {
JUMPTO(oparg);
FAST_DISPATCH();
}
err = PyObject_IsTrue(w);
if (err > 0) {
STACKADJ(-1);
Py_DECREF(w);
err = 0;
}
else if (err == 0)
JUMPTO(oparg);
else
break;
DISPATCH();
TARGET(JUMP_IF_TRUE_OR_POP)
w = TOP();
if (w == Py_False) {
STACKADJ(-1);
Py_DECREF(w);
FAST_DISPATCH();
}
if (w == Py_True) {
JUMPTO(oparg);
FAST_DISPATCH();
}
err = PyObject_IsTrue(w);
if (err > 0) {
err = 0;
JUMPBY(oparg);
JUMPTO(oparg);
}
else if (err == 0) {
STACKADJ(-1);
Py_DECREF(w);
}
else if (err == 0)
;
else
break;
DISPATCH();

View file

@ -818,11 +818,15 @@ opcode_stack_effect(int opcode, int oparg)
return 1;
case JUMP_FORWARD:
case JUMP_IF_FALSE:
case JUMP_IF_TRUE:
case JUMP_IF_TRUE_OR_POP: /* -1 if jump not taken */
case JUMP_IF_FALSE_OR_POP: /* "" */
case JUMP_ABSOLUTE:
return 0;
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
return -1;
case LOAD_GLOBAL:
return 1;
@ -1664,12 +1668,10 @@ compiler_ifexp(struct compiler *c, expr_ty e)
if (next == NULL)
return 0;
VISIT(c, expr, e->v.IfExp.test);
ADDOP_JREL(c, JUMP_IF_FALSE, next);
ADDOP(c, POP_TOP);
ADDOP_JABS(c, POP_JUMP_IF_FALSE, next);
VISIT(c, expr, e->v.IfExp.body);
ADDOP_JREL(c, JUMP_FORWARD, end);
compiler_use_next_block(c, next);
ADDOP(c, POP_TOP);
VISIT(c, expr, e->v.IfExp.orelse);
compiler_use_next_block(c, end);
return 1;
@ -1732,9 +1734,6 @@ compiler_if(struct compiler *c, stmt_ty s)
end = compiler_new_block(c);
if (end == NULL)
return 0;
next = compiler_new_block(c);
if (next == NULL)
return 0;
constant = expr_constant(s->v.If.test);
/* constant = 0: "if 0"
@ -1746,15 +1745,21 @@ compiler_if(struct compiler *c, stmt_ty s)
} else if (constant == 1) {
VISIT_SEQ(c, stmt, s->v.If.body);
} else {
if (s->v.If.orelse) {
next = compiler_new_block(c);
if (next == NULL)
return 0;
}
else
next = end;
VISIT(c, expr, s->v.If.test);
ADDOP_JREL(c, JUMP_IF_FALSE, next);
ADDOP(c, POP_TOP);
ADDOP_JABS(c, POP_JUMP_IF_FALSE, next);
VISIT_SEQ(c, stmt, s->v.If.body);
ADDOP_JREL(c, JUMP_FORWARD, end);
compiler_use_next_block(c, next);
ADDOP(c, POP_TOP);
if (s->v.If.orelse)
if (s->v.If.orelse) {
compiler_use_next_block(c, next);
VISIT_SEQ(c, stmt, s->v.If.orelse);
}
}
compiler_use_next_block(c, end);
return 1;
@ -1828,8 +1833,7 @@ compiler_while(struct compiler *c, stmt_ty s)
so we need to set an extra line number. */
c->u->u_lineno_set = 0;
VISIT(c, expr, s->v.While.test);
ADDOP_JREL(c, JUMP_IF_FALSE, anchor);
ADDOP(c, POP_TOP);
ADDOP_JABS(c, POP_JUMP_IF_FALSE, anchor);
}
VISIT_SEQ(c, stmt, s->v.While.body);
ADDOP_JABS(c, JUMP_ABSOLUTE, loop);
@ -1840,7 +1844,6 @@ compiler_while(struct compiler *c, stmt_ty s)
if (constant == -1) {
compiler_use_next_block(c, anchor);
ADDOP(c, POP_TOP);
ADDOP(c, POP_BLOCK);
}
compiler_pop_fblock(c, LOOP, loop);
@ -1947,7 +1950,7 @@ compiler_try_finally(struct compiler *c, stmt_ty s)
}
/*
Code generated for "try: S except E1, V1: S1 except E2, V2: S2 ...":
Code generated for "try: S except E1 as V1: S1 except E2 as V2: S2 ...":
(The contents of the value stack is shown in [], with the top
at the right; 'tb' is trace-back info, 'val' the exception's
associated value, and 'exc' the exception.)
@ -1961,20 +1964,17 @@ compiler_try_finally(struct compiler *c, stmt_ty s)
[tb, val, exc] L1: DUP )
[tb, val, exc, exc] <evaluate E1> )
[tb, val, exc, exc, E1] COMPARE_OP EXC_MATCH ) only if E1
[tb, val, exc, 1-or-0] JUMP_IF_FALSE L2 )
[tb, val, exc, 1] POP )
[tb, val, exc, 1-or-0] POP_JUMP_IF_FALSE L2 )
[tb, val, exc] POP
[tb, val] <assign to V1> (or POP if no V1)
[tb] POP
[] <code for S1>
JUMP_FORWARD L0
[tb, val, exc, 0] L2: POP
[tb, val, exc] DUP
[tb, val, exc] L2: DUP
.............................etc.......................
[tb, val, exc, 0] Ln+1: POP
[tb, val, exc] END_FINALLY # re-raise exception
[tb, val, exc] Ln+1: END_FINALLY # re-raise exception
[] L0: <next statement>
@ -2016,8 +2016,7 @@ compiler_try_except(struct compiler *c, stmt_ty s)
ADDOP(c, DUP_TOP);
VISIT(c, expr, handler->v.ExceptHandler.type);
ADDOP_I(c, COMPARE_OP, PyCmp_EXC_MATCH);
ADDOP_JREL(c, JUMP_IF_FALSE, except);
ADDOP(c, POP_TOP);
ADDOP_JABS(c, POP_JUMP_IF_FALSE, except);
}
ADDOP(c, POP_TOP);
if (handler->v.ExceptHandler.name) {
@ -2088,8 +2087,6 @@ compiler_try_except(struct compiler *c, stmt_ty s)
}
ADDOP_JREL(c, JUMP_FORWARD, end);
compiler_use_next_block(c, except);
if (handler->v.ExceptHandler.type)
ADDOP(c, POP_TOP);
}
ADDOP(c, END_FINALLY);
compiler_use_next_block(c, orelse);
@ -2268,8 +2265,7 @@ compiler_assert(struct compiler *c, stmt_ty s)
end = compiler_new_block(c);
if (end == NULL)
return 0;
ADDOP_JREL(c, JUMP_IF_TRUE, end);
ADDOP(c, POP_TOP);
ADDOP_JABS(c, POP_JUMP_IF_TRUE, end);
ADDOP_O(c, LOAD_GLOBAL, assertion_error, names);
if (s->v.Assert.msg) {
VISIT(c, expr, s->v.Assert.msg);
@ -2277,7 +2273,6 @@ compiler_assert(struct compiler *c, stmt_ty s)
}
ADDOP_I(c, RAISE_VARARGS, 1);
compiler_use_next_block(c, end);
ADDOP(c, POP_TOP);
return 1;
}
@ -2629,9 +2624,9 @@ compiler_boolop(struct compiler *c, expr_ty e)
assert(e->kind == BoolOp_kind);
if (e->v.BoolOp.op == And)
jumpi = JUMP_IF_FALSE;
jumpi = JUMP_IF_FALSE_OR_POP;
else
jumpi = JUMP_IF_TRUE;
jumpi = JUMP_IF_TRUE_OR_POP;
end = compiler_new_block(c);
if (end == NULL)
return 0;
@ -2640,8 +2635,7 @@ compiler_boolop(struct compiler *c, expr_ty e)
assert(n >= 0);
for (i = 0; i < n; ++i) {
VISIT(c, expr, (expr_ty)asdl_seq_GET(s, i));
ADDOP_JREL(c, jumpi, end);
ADDOP(c, POP_TOP)
ADDOP_JABS(c, jumpi, end);
}
VISIT(c, expr, (expr_ty)asdl_seq_GET(s, n));
compiler_use_next_block(c, end);
@ -2737,9 +2731,8 @@ compiler_compare(struct compiler *c, expr_ty e)
ADDOP_I(c, COMPARE_OP,
cmpop((cmpop_ty)(asdl_seq_GET(
e->v.Compare.ops, i - 1))));
ADDOP_JREL(c, JUMP_IF_FALSE, cleanup);
ADDOP_JABS(c, JUMP_IF_FALSE_OR_POP, cleanup);
NEXT_BLOCK(c);
ADDOP(c, POP_TOP);
if (i < (n - 1))
VISIT(c, expr,
(expr_ty)asdl_seq_GET(e->v.Compare.comparators, i));
@ -2872,10 +2865,9 @@ compiler_comprehension_generator(struct compiler *c,
for (i = 0; i < n; i++) {
expr_ty e = (expr_ty)asdl_seq_GET(gen->ifs, i);
VISIT(c, expr, e);
ADDOP_JREL(c, JUMP_IF_FALSE, if_cleanup);
ADDOP_JABS(c, POP_JUMP_IF_FALSE, if_cleanup);
NEXT_BLOCK(c);
ADDOP(c, POP_TOP);
}
}
if (++gen_index < asdl_seq_LEN(generators))
if (!compiler_comprehension_generator(c,
@ -2913,13 +2905,7 @@ compiler_comprehension_generator(struct compiler *c,
compiler_use_next_block(c, skip);
}
for (i = 0; i < n; i++) {
ADDOP_I(c, JUMP_FORWARD, 1);
if (i == 0)
compiler_use_next_block(c, if_cleanup);
ADDOP(c, POP_TOP);
}
compiler_use_next_block(c, if_cleanup);
ADDOP_JABS(c, JUMP_ABSOLUTE, start);
compiler_use_next_block(c, anchor);

View file

@ -87,8 +87,10 @@ typedef unsigned short mode_t;
Python 3.0a5: 3130 (lexical exception stacking, including POP_EXCEPT)
Python 3.1a0: 3140 (optimize list, set and dict comprehensions:
change LIST_APPEND and SET_ADD, add MAP_ADD)
Python 3.1a0: 3150 (optimize conditional branches:
introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
*/
#define MAGIC (3140 | ((long)'\r'<<16) | ((long)'\n'<<24))
#define MAGIC (3150 | ((long)'\r'<<16) | ((long)'\n'<<24))
/* Magic word as global; note that _PyImport_Init() can change the
value of this global to accommodate for alterations of how the

View file

@ -110,11 +110,11 @@ static void *opcode_targets[256] = {
&&TARGET_IMPORT_NAME,
&&TARGET_IMPORT_FROM,
&&TARGET_JUMP_FORWARD,
&&TARGET_JUMP_IF_FALSE,
&&TARGET_JUMP_IF_TRUE,
&&TARGET_JUMP_IF_FALSE_OR_POP,
&&TARGET_JUMP_IF_TRUE_OR_POP,
&&TARGET_JUMP_ABSOLUTE,
&&_unknown_opcode,
&&_unknown_opcode,
&&TARGET_POP_JUMP_IF_FALSE,
&&TARGET_POP_JUMP_IF_TRUE,
&&TARGET_LOAD_GLOBAL,
&&_unknown_opcode,
&&_unknown_opcode,

View file

@ -13,7 +13,12 @@
#define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1]))
#define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP)
#define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
|| op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \
|| op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
|| op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
#define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP)
#define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
#define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1)
@ -237,8 +242,10 @@ markblocks(unsigned char *code, Py_ssize_t len)
switch (opcode) {
case FOR_ITER:
case JUMP_FORWARD:
case JUMP_IF_FALSE:
case JUMP_IF_TRUE:
case JUMP_IF_FALSE_OR_POP:
case JUMP_IF_TRUE_OR_POP:
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
case JUMP_ABSOLUTE:
case CONTINUE_LOOP:
case SETUP_LOOP:
@ -363,29 +370,24 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
assert(PyList_Check(consts));
for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
reoptimize_current:
opcode = codestr[i];
lastlc = cumlc;
cumlc = 0;
switch (opcode) {
/* Replace UNARY_NOT JUMP_IF_FALSE POP_TOP with
with JUMP_IF_TRUE POP_TOP */
/* Replace UNARY_NOT POP_JUMP_IF_FALSE
with POP_JUMP_IF_TRUE */
case UNARY_NOT:
if (codestr[i+1] != JUMP_IF_FALSE ||
codestr[i+4] != POP_TOP ||
!ISBASICBLOCK(blocks,i,5))
if (codestr[i+1] != POP_JUMP_IF_FALSE
|| !ISBASICBLOCK(blocks,i,4))
continue;
tgt = GETJUMPTGT(codestr, (i+1));
if (codestr[tgt] != POP_TOP)
continue;
j = GETARG(codestr, i+1) + 1;
codestr[i] = JUMP_IF_TRUE;
j = GETARG(codestr, i+1);
codestr[i] = POP_JUMP_IF_TRUE;
SETARG(codestr, i, j);
codestr[i+3] = POP_TOP;
codestr[i+4] = NOP;
break;
codestr[i+3] = NOP;
goto reoptimize_current;
/* not a is b --> a is not b
not a in b --> a not in b
@ -417,29 +419,19 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
break;
/* Skip over LOAD_CONST trueconst
JUMP_IF_FALSE xx POP_TOP */
POP_JUMP_IF_FALSE xx. This improves
"while 1" performance. */
case LOAD_CONST:
cumlc = lastlc + 1;
j = GETARG(codestr, i);
if (codestr[i+3] != JUMP_IF_FALSE ||
codestr[i+6] != POP_TOP ||
if (codestr[i+3] != POP_JUMP_IF_FALSE ||
!ISBASICBLOCK(blocks,i,7) ||
!PyObject_IsTrue(PyList_GET_ITEM(consts, j)))
continue;
memset(codestr+i, NOP, 7);
memset(codestr+i, NOP, 6);
cumlc = 0;
break;
/* Replace POP_TOP JUMP_FORWARD 1 POP_TOP
with NOP NOP NOP NOP POP_TOP. */
case POP_TOP:
if (UNCONDITIONAL_JUMP(codestr[i+1]) &&
GETJUMPTGT(codestr, i+1) == i+5 &&
codestr[i+4] == POP_TOP &&
ISBASICBLOCK(blocks,i,4))
memset(codestr+i, NOP, 4);
break;
/* Try to fold tuples of constants (includes a case for lists
which are only used for "in" and "not in" tests).
Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
@ -524,27 +516,47 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
"if a or b:"
"a and b or c"
"(a and b) and c"
x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z
x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3
x:POP_OR_JUMP y y:POP_OR_JUMP z --> x:POP_OR_JUMP z
x:POP_OR_JUMP y y:JUMP_OR_POP z --> x:POP_JUMP_IF_FALSE y+3
where y+3 is the instruction following the second test.
*/
case JUMP_IF_FALSE:
case JUMP_IF_TRUE:
case JUMP_IF_FALSE_OR_POP:
case JUMP_IF_TRUE_OR_POP:
tgt = GETJUMPTGT(codestr, i);
j = codestr[tgt];
if (j == JUMP_IF_FALSE || j == JUMP_IF_TRUE) {
if (j == opcode) {
tgttgt = GETJUMPTGT(codestr, tgt) - i - 3;
if (CONDITIONAL_JUMP(j)) {
/* NOTE: all possible jumps here are
absolute! */
if (JUMPS_ON_TRUE(j) == JUMPS_ON_TRUE(opcode)) {
/* The second jump will be
taken iff the first is. */
tgttgt = GETJUMPTGT(codestr, tgt);
/* The current opcode inherits
its target's stack behaviour */
codestr[i] = j;
SETARG(codestr, i, tgttgt);
goto reoptimize_current;
} else {
tgt -= i;
SETARG(codestr, i, tgt);
/* The second jump is not taken
if the first is (so jump past
it), and all conditional
jumps pop their argument when
they're not taken (so change
the first jump to pop its
argument when it's taken). */
if (JUMPS_ON_TRUE(opcode))
codestr[i] = POP_JUMP_IF_TRUE;
else
codestr[i] = POP_JUMP_IF_FALSE;
SETARG(codestr, i, (tgt + 3));
goto reoptimize_current;
}
break;
}
/* Intentional fallthrough */
/* Intentional fallthrough */
/* Replace jumps to unconditional jumps */
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
case FOR_ITER:
case JUMP_FORWARD:
case JUMP_ABSOLUTE:
@ -621,14 +633,16 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
case JUMP_ABSOLUTE:
case CONTINUE_LOOP:
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
case JUMP_IF_FALSE_OR_POP:
case JUMP_IF_TRUE_OR_POP:
j = addrmap[GETARG(codestr, i)];
SETARG(codestr, i, j);
break;
case FOR_ITER:
case JUMP_FORWARD:
case JUMP_IF_FALSE:
case JUMP_IF_TRUE:
case SETUP_LOOP:
case SETUP_EXCEPT:
case SETUP_FINALLY: