Backport r69961 to trunk, replacing JUMP_IF_{TRUE,FALSE} with

POP_JUMP_IF_{TRUE,FALSE} and JUMP_IF_{TRUE,FALSE}_OR_POP. This avoids executing
a POP_TOP on each conditional and sometimes allows the peephole optimizer to
skip a JUMP_ABSOLUTE entirely. It speeds up list comprehensions significantly.
This commit is contained in:
Jeffrey Yasskin 2009-02-28 19:03:21 +00:00
parent de28d6841e
commit 68d6852006
11 changed files with 228 additions and 178 deletions

View file

@ -764,8 +764,8 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
/* OpCode prediction macros
Some opcodes tend to come in pairs thus making it possible to
predict the second code when the first is run. For example,
COMPARE_OP is often followed by JUMP_IF_FALSE or JUMP_IF_TRUE. And,
those opcodes are often followed by a POP_TOP.
GET_ITER is often followed by FOR_ITER. And FOR_ITER is often
followed by STORE_FAST or UNPACK_SEQUENCE.
Verifying the prediction costs a single high-speed test of a register
variable against a constant. If the pairing was good, then the
@ -1110,7 +1110,6 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
SETLOCAL(oparg, v);
goto fast_next_opcode;
PREDICTED(POP_TOP);
case POP_TOP:
v = POP();
Py_DECREF(v);
@ -2220,8 +2219,8 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
Py_DECREF(w);
SET_TOP(x);
if (x == NULL) break;
PREDICT(JUMP_IF_FALSE);
PREDICT(JUMP_IF_TRUE);
PREDICT(POP_JUMP_IF_FALSE);
PREDICT(POP_JUMP_IF_TRUE);
continue;
case IMPORT_NAME:
@ -2298,44 +2297,95 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
JUMPBY(oparg);
goto fast_next_opcode;
PREDICTED_WITH_ARG(JUMP_IF_FALSE);
case JUMP_IF_FALSE:
w = TOP();
PREDICTED_WITH_ARG(POP_JUMP_IF_FALSE);
case POP_JUMP_IF_FALSE:
w = POP();
if (w == Py_True) {
PREDICT(POP_TOP);
Py_DECREF(w);
goto fast_next_opcode;
}
if (w == Py_False) {
JUMPBY(oparg);
Py_DECREF(w);
JUMPTO(oparg);
goto fast_next_opcode;
}
err = PyObject_IsTrue(w);
Py_DECREF(w);
if (err > 0)
err = 0;
else if (err == 0)
JUMPBY(oparg);
JUMPTO(oparg);
else
break;
continue;
PREDICTED_WITH_ARG(JUMP_IF_TRUE);
case JUMP_IF_TRUE:
w = TOP();
PREDICTED_WITH_ARG(POP_JUMP_IF_TRUE);
case POP_JUMP_IF_TRUE:
w = POP();
if (w == Py_False) {
PREDICT(POP_TOP);
Py_DECREF(w);
goto fast_next_opcode;
}
if (w == Py_True) {
JUMPBY(oparg);
Py_DECREF(w);
JUMPTO(oparg);
goto fast_next_opcode;
}
err = PyObject_IsTrue(w);
Py_DECREF(w);
if (err > 0) {
err = 0;
JUMPTO(oparg);
}
else if (err == 0)
;
else
break;
continue;
case JUMP_IF_FALSE_OR_POP:
w = TOP();
if (w == Py_True) {
STACKADJ(-1);
Py_DECREF(w);
goto fast_next_opcode;
}
if (w == Py_False) {
JUMPTO(oparg);
goto fast_next_opcode;
}
err = PyObject_IsTrue(w);
if (err > 0) {
STACKADJ(-1);
Py_DECREF(w);
err = 0;
}
else if (err == 0)
JUMPTO(oparg);
else
break;
continue;
case JUMP_IF_TRUE_OR_POP:
w = TOP();
if (w == Py_False) {
STACKADJ(-1);
Py_DECREF(w);
goto fast_next_opcode;
}
if (w == Py_True) {
JUMPTO(oparg);
goto fast_next_opcode;
}
err = PyObject_IsTrue(w);
if (err > 0) {
err = 0;
JUMPBY(oparg);
JUMPTO(oparg);
}
else if (err == 0) {
STACKADJ(-1);
Py_DECREF(w);
}
else if (err == 0)
;
else
break;
continue;

View file

@ -836,11 +836,15 @@ opcode_stack_effect(int opcode, int oparg)
return 1;
case JUMP_FORWARD:
case JUMP_IF_FALSE:
case JUMP_IF_TRUE:
case JUMP_IF_TRUE_OR_POP: /* -1 if jump not taken */
case JUMP_IF_FALSE_OR_POP: /* "" */
case JUMP_ABSOLUTE:
return 0;
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
return -1;
case LOAD_GLOBAL:
return 1;
@ -1499,12 +1503,10 @@ compiler_ifexp(struct compiler *c, expr_ty e)
if (next == NULL)
return 0;
VISIT(c, expr, e->v.IfExp.test);
ADDOP_JREL(c, JUMP_IF_FALSE, next);
ADDOP(c, POP_TOP);
ADDOP_JABS(c, POP_JUMP_IF_FALSE, next);
VISIT(c, expr, e->v.IfExp.body);
ADDOP_JREL(c, JUMP_FORWARD, end);
compiler_use_next_block(c, next);
ADDOP(c, POP_TOP);
VISIT(c, expr, e->v.IfExp.orelse);
compiler_use_next_block(c, end);
return 1;
@ -1597,9 +1599,6 @@ compiler_if(struct compiler *c, stmt_ty s)
end = compiler_new_block(c);
if (end == NULL)
return 0;
next = compiler_new_block(c);
if (next == NULL)
return 0;
constant = expr_constant(s->v.If.test);
/* constant = 0: "if 0"
@ -1611,15 +1610,21 @@ compiler_if(struct compiler *c, stmt_ty s)
} else if (constant == 1) {
VISIT_SEQ(c, stmt, s->v.If.body);
} else {
if (s->v.If.orelse) {
next = compiler_new_block(c);
if (next == NULL)
return 0;
}
else
next = end;
VISIT(c, expr, s->v.If.test);
ADDOP_JREL(c, JUMP_IF_FALSE, next);
ADDOP(c, POP_TOP);
ADDOP_JABS(c, POP_JUMP_IF_FALSE, next);
VISIT_SEQ(c, stmt, s->v.If.body);
ADDOP_JREL(c, JUMP_FORWARD, end);
compiler_use_next_block(c, next);
ADDOP(c, POP_TOP);
if (s->v.If.orelse)
if (s->v.If.orelse) {
compiler_use_next_block(c, next);
VISIT_SEQ(c, stmt, s->v.If.orelse);
}
}
compiler_use_next_block(c, end);
return 1;
@ -1693,8 +1698,7 @@ compiler_while(struct compiler *c, stmt_ty s)
so we need to set an extra line number. */
c->u->u_lineno_set = false;
VISIT(c, expr, s->v.While.test);
ADDOP_JREL(c, JUMP_IF_FALSE, anchor);
ADDOP(c, POP_TOP);
ADDOP_JABS(c, POP_JUMP_IF_FALSE, anchor);
}
VISIT_SEQ(c, stmt, s->v.While.body);
ADDOP_JABS(c, JUMP_ABSOLUTE, loop);
@ -1705,7 +1709,6 @@ compiler_while(struct compiler *c, stmt_ty s)
if (constant == -1) {
compiler_use_next_block(c, anchor);
ADDOP(c, POP_TOP);
ADDOP(c, POP_BLOCK);
}
compiler_pop_fblock(c, LOOP, loop);
@ -1826,20 +1829,17 @@ compiler_try_finally(struct compiler *c, stmt_ty s)
[tb, val, exc] L1: DUP )
[tb, val, exc, exc] <evaluate E1> )
[tb, val, exc, exc, E1] COMPARE_OP EXC_MATCH ) only if E1
[tb, val, exc, 1-or-0] JUMP_IF_FALSE L2 )
[tb, val, exc, 1] POP )
[tb, val, exc, 1-or-0] POP_JUMP_IF_FALSE L2 )
[tb, val, exc] POP
[tb, val] <assign to V1> (or POP if no V1)
[tb] POP
[] <code for S1>
JUMP_FORWARD L0
[tb, val, exc, 0] L2: POP
[tb, val, exc] DUP
[tb, val, exc] L2: DUP
.............................etc.......................
[tb, val, exc, 0] Ln+1: POP
[tb, val, exc] END_FINALLY # re-raise exception
[tb, val, exc] Ln+1: END_FINALLY # re-raise exception
[] L0: <next statement>
@ -1881,8 +1881,7 @@ compiler_try_except(struct compiler *c, stmt_ty s)
ADDOP(c, DUP_TOP);
VISIT(c, expr, handler->v.ExceptHandler.type);
ADDOP_I(c, COMPARE_OP, PyCmp_EXC_MATCH);
ADDOP_JREL(c, JUMP_IF_FALSE, except);
ADDOP(c, POP_TOP);
ADDOP_JABS(c, POP_JUMP_IF_FALSE, except);
}
ADDOP(c, POP_TOP);
if (handler->v.ExceptHandler.name) {
@ -1895,8 +1894,6 @@ compiler_try_except(struct compiler *c, stmt_ty s)
VISIT_SEQ(c, stmt, handler->v.ExceptHandler.body);
ADDOP_JREL(c, JUMP_FORWARD, end);
compiler_use_next_block(c, except);
if (handler->v.ExceptHandler.type)
ADDOP(c, POP_TOP);
}
ADDOP(c, END_FINALLY);
compiler_use_next_block(c, orelse);
@ -2084,8 +2081,7 @@ compiler_assert(struct compiler *c, stmt_ty s)
end = compiler_new_block(c);
if (end == NULL)
return 0;
ADDOP_JREL(c, JUMP_IF_TRUE, end);
ADDOP(c, POP_TOP);
ADDOP_JABS(c, POP_JUMP_IF_TRUE, end);
ADDOP_O(c, LOAD_GLOBAL, assertion_error, names);
if (s->v.Assert.msg) {
VISIT(c, expr, s->v.Assert.msg);
@ -2095,7 +2091,6 @@ compiler_assert(struct compiler *c, stmt_ty s)
ADDOP_I(c, RAISE_VARARGS, 1);
}
compiler_use_next_block(c, end);
ADDOP(c, POP_TOP);
return 1;
}
@ -2473,9 +2468,9 @@ compiler_boolop(struct compiler *c, expr_ty e)
assert(e->kind == BoolOp_kind);
if (e->v.BoolOp.op == And)
jumpi = JUMP_IF_FALSE;
jumpi = JUMP_IF_FALSE_OR_POP;
else
jumpi = JUMP_IF_TRUE;
jumpi = JUMP_IF_TRUE_OR_POP;
end = compiler_new_block(c);
if (end == NULL)
return 0;
@ -2484,8 +2479,7 @@ compiler_boolop(struct compiler *c, expr_ty e)
assert(n >= 0);
for (i = 0; i < n; ++i) {
VISIT(c, expr, (expr_ty)asdl_seq_GET(s, i));
ADDOP_JREL(c, jumpi, end);
ADDOP(c, POP_TOP)
ADDOP_JABS(c, jumpi, end);
}
VISIT(c, expr, (expr_ty)asdl_seq_GET(s, n));
compiler_use_next_block(c, end);
@ -2543,9 +2537,8 @@ compiler_compare(struct compiler *c, expr_ty e)
ADDOP_I(c, COMPARE_OP,
cmpop((cmpop_ty)(asdl_seq_GET(
e->v.Compare.ops, i - 1))));
ADDOP_JREL(c, JUMP_IF_FALSE, cleanup);
ADDOP_JABS(c, JUMP_IF_FALSE_OR_POP, cleanup);
NEXT_BLOCK(c);
ADDOP(c, POP_TOP);
if (i < (n - 1))
VISIT(c, expr,
(expr_ty)asdl_seq_GET(e->v.Compare.comparators, i));
@ -2636,9 +2629,8 @@ compiler_listcomp_generator(struct compiler *c, asdl_seq *generators,
for (i = 0; i < n; i++) {
expr_ty e = (expr_ty)asdl_seq_GET(l->ifs, i);
VISIT(c, expr, e);
ADDOP_JREL(c, JUMP_IF_FALSE, if_cleanup);
ADDOP_JABS(c, POP_JUMP_IF_FALSE, if_cleanup);
NEXT_BLOCK(c);
ADDOP(c, POP_TOP);
}
if (++gen_index < asdl_seq_LEN(generators))
@ -2652,12 +2644,7 @@ compiler_listcomp_generator(struct compiler *c, asdl_seq *generators,
compiler_use_next_block(c, skip);
}
for (i = 0; i < n; i++) {
ADDOP_I(c, JUMP_FORWARD, 1);
if (i == 0)
compiler_use_next_block(c, if_cleanup);
ADDOP(c, POP_TOP);
}
compiler_use_next_block(c, if_cleanup);
ADDOP_JABS(c, JUMP_ABSOLUTE, start);
compiler_use_next_block(c, anchor);
@ -2720,10 +2707,9 @@ compiler_genexp_generator(struct compiler *c,
for (i = 0; i < n; i++) {
expr_ty e = (expr_ty)asdl_seq_GET(ge->ifs, i);
VISIT(c, expr, e);
ADDOP_JREL(c, JUMP_IF_FALSE, if_cleanup);
ADDOP_JABS(c, POP_JUMP_IF_FALSE, if_cleanup);
NEXT_BLOCK(c);
ADDOP(c, POP_TOP);
}
}
if (++gen_index < asdl_seq_LEN(generators))
if (!compiler_genexp_generator(c, generators, gen_index, elt))
@ -2737,13 +2723,7 @@ compiler_genexp_generator(struct compiler *c,
compiler_use_next_block(c, skip);
}
for (i = 0; i < n; i++) {
ADDOP_I(c, JUMP_FORWARD, 1);
if (i == 0)
compiler_use_next_block(c, if_cleanup);
ADDOP(c, POP_TOP);
}
compiler_use_next_block(c, if_cleanup);
ADDOP_JABS(c, JUMP_ABSOLUTE, start);
compiler_use_next_block(c, anchor);
ADDOP(c, POP_BLOCK);

View file

@ -72,9 +72,11 @@ typedef unsigned short mode_t;
Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode)
Python 2.6a1: 62161 (WITH_CLEANUP optimization)
Python 2.7a0: 62171 (optimize list comprehensions/change LIST_APPEND)
Python 2.7a0: 62181 (optimize conditional branches:
introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
.
*/
#define MAGIC (62171 | ((long)'\r'<<16) | ((long)'\n'<<24))
#define MAGIC (62181 | ((long)'\r'<<16) | ((long)'\n'<<24))
/* Magic word as global; note that _PyImport_Init() can change the
value of this global to accommodate for alterations of how the

View file

@ -13,7 +13,12 @@
#define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1]))
#define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP)
#define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
|| op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \
|| op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
|| op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
#define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP)
#define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
#define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1)
@ -245,8 +250,10 @@ markblocks(unsigned char *code, Py_ssize_t len)
switch (opcode) {
case FOR_ITER:
case JUMP_FORWARD:
case JUMP_IF_FALSE:
case JUMP_IF_TRUE:
case JUMP_IF_FALSE_OR_POP:
case JUMP_IF_TRUE_OR_POP:
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
case JUMP_ABSOLUTE:
case CONTINUE_LOOP:
case SETUP_LOOP:
@ -338,29 +345,24 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
assert(PyList_Check(consts));
for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
reoptimize_current:
opcode = codestr[i];
lastlc = cumlc;
cumlc = 0;
switch (opcode) {
/* Replace UNARY_NOT JUMP_IF_FALSE POP_TOP with
with JUMP_IF_TRUE POP_TOP */
/* Replace UNARY_NOT POP_JUMP_IF_FALSE
with POP_JUMP_IF_TRUE */
case UNARY_NOT:
if (codestr[i+1] != JUMP_IF_FALSE ||
codestr[i+4] != POP_TOP ||
!ISBASICBLOCK(blocks,i,5))
if (codestr[i+1] != POP_JUMP_IF_FALSE
|| !ISBASICBLOCK(blocks,i,4))
continue;
tgt = GETJUMPTGT(codestr, (i+1));
if (codestr[tgt] != POP_TOP)
continue;
j = GETARG(codestr, i+1) + 1;
codestr[i] = JUMP_IF_TRUE;
j = GETARG(codestr, i+1);
codestr[i] = POP_JUMP_IF_TRUE;
SETARG(codestr, i, j);
codestr[i+3] = POP_TOP;
codestr[i+4] = NOP;
break;
codestr[i+3] = NOP;
goto reoptimize_current;
/* not a is b --> a is not b
not a in b --> a not in b
@ -400,16 +402,16 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
break;
/* Skip over LOAD_CONST trueconst
JUMP_IF_FALSE xx POP_TOP */
POP_JUMP_IF_FALSE xx. This improves
"while 1" performance. */
case LOAD_CONST:
cumlc = lastlc + 1;
j = GETARG(codestr, i);
if (codestr[i+3] != JUMP_IF_FALSE ||
codestr[i+6] != POP_TOP ||
!ISBASICBLOCK(blocks,i,7) ||
if (codestr[i+3] != POP_JUMP_IF_FALSE ||
!ISBASICBLOCK(blocks,i,6) ||
!PyObject_IsTrue(PyList_GET_ITEM(consts, j)))
continue;
memset(codestr+i, NOP, 7);
memset(codestr+i, NOP, 6);
cumlc = 0;
break;
@ -498,27 +500,49 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
"if a or b:"
"a and b or c"
"(a and b) and c"
x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z
x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3
x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_FALSE_OR_POP z
--> x:JUMP_IF_FALSE_OR_POP z
x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_TRUE_OR_POP z
--> x:POP_JUMP_IF_FALSE y+3
where y+3 is the instruction following the second test.
*/
case JUMP_IF_FALSE:
case JUMP_IF_TRUE:
case JUMP_IF_FALSE_OR_POP:
case JUMP_IF_TRUE_OR_POP:
tgt = GETJUMPTGT(codestr, i);
j = codestr[tgt];
if (j == JUMP_IF_FALSE || j == JUMP_IF_TRUE) {
if (j == opcode) {
tgttgt = GETJUMPTGT(codestr, tgt) - i - 3;
if (CONDITIONAL_JUMP(j)) {
/* NOTE: all possible jumps here are
absolute! */
if (JUMPS_ON_TRUE(j) == JUMPS_ON_TRUE(opcode)) {
/* The second jump will be
taken iff the first is. */
tgttgt = GETJUMPTGT(codestr, tgt);
/* The current opcode inherits
its target's stack behaviour */
codestr[i] = j;
SETARG(codestr, i, tgttgt);
goto reoptimize_current;
} else {
tgt -= i;
SETARG(codestr, i, tgt);
/* The second jump is not taken
if the first is (so jump past
it), and all conditional
jumps pop their argument when
they're not taken (so change
the first jump to pop its
argument when it's taken). */
if (JUMPS_ON_TRUE(opcode))
codestr[i] = POP_JUMP_IF_TRUE;
else
codestr[i] = POP_JUMP_IF_FALSE;
SETARG(codestr, i, (tgt + 3));
goto reoptimize_current;
}
break;
}
/* Intentional fallthrough */
/* Intentional fallthrough */
/* Replace jumps to unconditional jumps */
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
case FOR_ITER:
case JUMP_FORWARD:
case JUMP_ABSOLUTE:
@ -591,14 +615,16 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
case JUMP_ABSOLUTE:
case CONTINUE_LOOP:
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
case JUMP_IF_FALSE_OR_POP:
case JUMP_IF_TRUE_OR_POP:
j = addrmap[GETARG(codestr, i)];
SETARG(codestr, i, j);
break;
case FOR_ITER:
case JUMP_FORWARD:
case JUMP_IF_FALSE:
case JUMP_IF_TRUE:
case SETUP_LOOP:
case SETUP_EXCEPT:
case SETUP_FINALLY: