Issue #11244: The peephole optimizer is now able to constant-fold

arbitrarily complex expressions.  This also fixes a 3.2 regression where
operations involving negative numbers were not constant-folded.
This commit is contained in:
Antoine Pitrou 2011-03-11 17:27:02 +01:00
parent 336c729563
commit 17b880a5d6
3 changed files with 153 additions and 49 deletions

View file

@ -8,8 +8,10 @@ def disassemble(func):
f = StringIO() f = StringIO()
tmp = sys.stdout tmp = sys.stdout
sys.stdout = f sys.stdout = f
dis.dis(func) try:
sys.stdout = tmp dis.dis(func)
finally:
sys.stdout = tmp
result = f.getvalue() result = f.getvalue()
f.close() f.close()
return result return result
@ -99,6 +101,12 @@ class TestTranforms(unittest.TestCase):
self.assertIn(elem, asm) self.assertIn(elem, asm)
self.assertNotIn('BUILD_TUPLE', asm) self.assertNotIn('BUILD_TUPLE', asm)
# Long tuples should be folded too.
asm = dis_single(repr(tuple(range(10000))))
# One LOAD_CONST for the tuple, one for the None return value
self.assertEqual(asm.count('LOAD_CONST'), 2)
self.assertNotIn('BUILD_TUPLE', asm)
# Bug 1053819: Tuple of constants misidentified when presented with: # Bug 1053819: Tuple of constants misidentified when presented with:
# . . . opcode_with_arg 100 unary_opcode BUILD_TUPLE 1 . . . # . . . opcode_with_arg 100 unary_opcode BUILD_TUPLE 1 . . .
# The following would segfault upon compilation # The following would segfault upon compilation
@ -267,6 +275,25 @@ class TestTranforms(unittest.TestCase):
asm = disassemble(f) asm = disassemble(f)
self.assertNotIn('BINARY_ADD', asm) self.assertNotIn('BINARY_ADD', asm)
def test_constant_folding(self):
# Issue #11244: aggressive constant folding.
exprs = [
"3 * -5",
"-3 * 5",
"2 * (3 * 4)",
"(2 * 3) * 4",
"(-1, 2, 3)",
"(1, -2, 3)",
"(1, 2, -3)",
"(1, 2, -3) * 6",
"lambda x: x in {(3 * -5) + (-1 - 6), (1, -2, 3) * 2, None}",
]
for e in exprs:
asm = dis_single(e)
self.assertNotIn('UNARY_', asm, e)
self.assertNotIn('BINARY_', asm, e)
self.assertNotIn('BUILD_', asm, e)
def test_main(verbose=None): def test_main(verbose=None):
import sys import sys

View file

@ -10,6 +10,10 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #11244: The peephole optimizer is now able to constant-fold
arbitrarily complex expressions. This also fixes a 3.2 regression where
operations involving negative numbers were not constant-folded.
- Issue #11450: Don't truncate hg version info in Py_GetBuildInfo() when - Issue #11450: Don't truncate hg version info in Py_GetBuildInfo() when
there are many tags (e.g. when using mq). Patch by Nadeem Vawda. there are many tags (e.g. when using mq). Patch by Nadeem Vawda.

View file

@ -23,6 +23,64 @@
#define ISBASICBLOCK(blocks, start, bytes) \ #define ISBASICBLOCK(blocks, start, bytes) \
(blocks[start]==blocks[start+bytes-1]) (blocks[start]==blocks[start+bytes-1])
#define CONST_STACK_CREATE() { \
const_stack_size = 256; \
const_stack = PyMem_New(PyObject *, const_stack_size); \
load_const_stack = PyMem_New(Py_ssize_t, const_stack_size); \
if (!const_stack || !load_const_stack) { \
PyErr_NoMemory(); \
goto exitError; \
} \
}
#define CONST_STACK_DELETE() do { \
if (const_stack) \
PyMem_Free(const_stack); \
if (load_const_stack) \
PyMem_Free(load_const_stack); \
} while(0)
#define CONST_STACK_LEN() (const_stack_top + 1)
#define CONST_STACK_PUSH_OP(i) do { \
PyObject *_x; \
assert(codestr[i] == LOAD_CONST); \
assert(PyList_GET_SIZE(consts) > GETARG(codestr, i)); \
_x = PyList_GET_ITEM(consts, GETARG(codestr, i)); \
if (++const_stack_top >= const_stack_size) { \
const_stack_size *= 2; \
PyMem_Resize(const_stack, PyObject *, const_stack_size); \
PyMem_Resize(load_const_stack, Py_ssize_t, const_stack_size); \
if (!const_stack || !load_const_stack) { \
PyErr_NoMemory(); \
goto exitError; \
} \
} \
load_const_stack[const_stack_top] = i; \
const_stack[const_stack_top] = _x; \
in_consts = 1; \
} while(0)
#define CONST_STACK_RESET() do { \
const_stack_top = -1; \
} while(0)
#define CONST_STACK_TOP(x) \
const_stack[const_stack_top]
#define CONST_STACK_LASTN(i) \
&const_stack[const_stack_top - i + 1]
#define CONST_STACK_POP(i) do { \
assert(const_stack_top + 1 >= i); \
const_stack_top -= i; \
} while(0)
#define CONST_STACK_OP_LASTN(i) \
((const_stack_top >= i - 1) ? load_const_stack[const_stack_top - i + 1] : -1)
/* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n /* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n
with LOAD_CONST (c1, c2, ... cn). with LOAD_CONST (c1, c2, ... cn).
The consts table must still be in list form so that the The consts table must still be in list form so that the
@ -33,17 +91,14 @@
test; for BUILD_SET it assembles a frozenset rather than a tuple. test; for BUILD_SET it assembles a frozenset rather than a tuple.
*/ */
static int static int
tuple_of_constants(unsigned char *codestr, Py_ssize_t n, PyObject *consts) tuple_of_constants(unsigned char *codestr, Py_ssize_t n,
PyObject *consts, PyObject **objs)
{ {
PyObject *newconst, *constant; PyObject *newconst, *constant;
Py_ssize_t i, arg, len_consts; Py_ssize_t i, len_consts;
/* Pre-conditions */ /* Pre-conditions */
assert(PyList_CheckExact(consts)); assert(PyList_CheckExact(consts));
assert(codestr[n*3] == BUILD_TUPLE || codestr[n*3] == BUILD_LIST || codestr[n*3] == BUILD_SET);
assert(GETARG(codestr, (n*3)) == n);
for (i=0 ; i<n ; i++)
assert(codestr[i*3] == LOAD_CONST);
/* Buildup new tuple of constants */ /* Buildup new tuple of constants */
newconst = PyTuple_New(n); newconst = PyTuple_New(n);
@ -51,16 +106,14 @@ tuple_of_constants(unsigned char *codestr, Py_ssize_t n, PyObject *consts)
return 0; return 0;
len_consts = PyList_GET_SIZE(consts); len_consts = PyList_GET_SIZE(consts);
for (i=0 ; i<n ; i++) { for (i=0 ; i<n ; i++) {
arg = GETARG(codestr, (i*3)); constant = objs[i];
assert(arg < len_consts);
constant = PyList_GET_ITEM(consts, arg);
Py_INCREF(constant); Py_INCREF(constant);
PyTuple_SET_ITEM(newconst, i, constant); PyTuple_SET_ITEM(newconst, i, constant);
} }
/* If it's a BUILD_SET, use the PyTuple we just built to create a /* If it's a BUILD_SET, use the PyTuple we just built to create a
PyFrozenSet, and use that as the constant instead: */ PyFrozenSet, and use that as the constant instead: */
if (codestr[n*3] == BUILD_SET) { if (codestr[0] == BUILD_SET) {
PyObject *tuple = newconst; PyObject *tuple = newconst;
newconst = PyFrozenSet_New(tuple); newconst = PyFrozenSet_New(tuple);
Py_DECREF(tuple); Py_DECREF(tuple);
@ -77,9 +130,8 @@ tuple_of_constants(unsigned char *codestr, Py_ssize_t n, PyObject *consts)
/* Write NOPs over old LOAD_CONSTS and /* Write NOPs over old LOAD_CONSTS and
add a new LOAD_CONST newconst on top of the BUILD_TUPLE n */ add a new LOAD_CONST newconst on top of the BUILD_TUPLE n */
memset(codestr, NOP, n*3); codestr[0] = LOAD_CONST;
codestr[n*3] = LOAD_CONST; SETARG(codestr, 0, len_consts);
SETARG(codestr, (n*3), len_consts);
return 1; return 1;
} }
@ -87,14 +139,14 @@ tuple_of_constants(unsigned char *codestr, Py_ssize_t n, PyObject *consts)
with LOAD_CONST binop(c1,c2) with LOAD_CONST binop(c1,c2)
The consts table must still be in list form so that the The consts table must still be in list form so that the
new constant can be appended. new constant can be appended.
Called with codestr pointing to the first LOAD_CONST. Called with codestr pointing to the BINOP.
Abandons the transformation if the folding fails (i.e. 1+'a'). Abandons the transformation if the folding fails (i.e. 1+'a').
If the new constant is a sequence, only folds when the size If the new constant is a sequence, only folds when the size
is below a threshold value. That keeps pyc files from is below a threshold value. That keeps pyc files from
becoming large in the presence of code like: (None,)*1000. becoming large in the presence of code like: (None,)*1000.
*/ */
static int static int
fold_binops_on_constants(unsigned char *codestr, PyObject *consts) fold_binops_on_constants(unsigned char *codestr, PyObject *consts, PyObject **objs)
{ {
PyObject *newconst, *v, *w; PyObject *newconst, *v, *w;
Py_ssize_t len_consts, size; Py_ssize_t len_consts, size;
@ -102,13 +154,11 @@ fold_binops_on_constants(unsigned char *codestr, PyObject *consts)
/* Pre-conditions */ /* Pre-conditions */
assert(PyList_CheckExact(consts)); assert(PyList_CheckExact(consts));
assert(codestr[0] == LOAD_CONST);
assert(codestr[3] == LOAD_CONST);
/* Create new constant */ /* Create new constant */
v = PyList_GET_ITEM(consts, GETARG(codestr, 0)); v = objs[0];
w = PyList_GET_ITEM(consts, GETARG(codestr, 3)); w = objs[1];
opcode = codestr[6]; opcode = codestr[0];
switch (opcode) { switch (opcode) {
case BINARY_POWER: case BINARY_POWER:
newconst = PyNumber_Power(v, w, Py_None); newconst = PyNumber_Power(v, w, Py_None);
@ -180,16 +230,15 @@ fold_binops_on_constants(unsigned char *codestr, PyObject *consts)
Py_DECREF(newconst); Py_DECREF(newconst);
/* Write NOP NOP NOP NOP LOAD_CONST newconst */ /* Write NOP NOP NOP NOP LOAD_CONST newconst */
memset(codestr, NOP, 4); codestr[-2] = LOAD_CONST;
codestr[4] = LOAD_CONST; SETARG(codestr, -2, len_consts);
SETARG(codestr, 4, len_consts);
return 1; return 1;
} }
static int static int
fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts) fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts, PyObject *v)
{ {
PyObject *newconst=NULL, *v; PyObject *newconst=NULL/*, *v*/;
Py_ssize_t len_consts; Py_ssize_t len_consts;
int opcode; int opcode;
@ -198,7 +247,6 @@ fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts)
assert(codestr[0] == LOAD_CONST); assert(codestr[0] == LOAD_CONST);
/* Create new constant */ /* Create new constant */
v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
opcode = codestr[3]; opcode = codestr[3];
switch (opcode) { switch (opcode) {
case UNARY_NEGATIVE: case UNARY_NEGATIVE:
@ -340,7 +388,11 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
unsigned char *lineno; unsigned char *lineno;
int *addrmap = NULL; int *addrmap = NULL;
int new_line, cum_orig_line, last_line, tabsiz; int new_line, cum_orig_line, last_line, tabsiz;
int cumlc=0, lastlc=0; /* Count runs of consecutive LOAD_CONSTs */ PyObject **const_stack = NULL;
Py_ssize_t *load_const_stack = NULL;
Py_ssize_t const_stack_top = -1;
Py_ssize_t const_stack_size = 0;
int in_consts = 0; /* whether we are in a LOAD_CONST sequence */
unsigned int *blocks = NULL; unsigned int *blocks = NULL;
char *name; char *name;
@ -386,12 +438,16 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
goto exitError; goto exitError;
assert(PyList_Check(consts)); assert(PyList_Check(consts));
CONST_STACK_CREATE();
for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) { for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
reoptimize_current: reoptimize_current:
opcode = codestr[i]; opcode = codestr[i];
lastlc = cumlc; if (!in_consts) {
cumlc = 0; CONST_STACK_RESET();
}
in_consts = 0;
switch (opcode) { switch (opcode) {
/* Replace UNARY_NOT POP_JUMP_IF_FALSE /* Replace UNARY_NOT POP_JUMP_IF_FALSE
@ -432,21 +488,21 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
goto exitError; goto exitError;
else if (h == 0) else if (h == 0)
continue; continue;
cumlc = lastlc + 1; CONST_STACK_PUSH_OP(i);
break; break;
/* Skip over LOAD_CONST trueconst /* Skip over LOAD_CONST trueconst
POP_JUMP_IF_FALSE xx. This improves POP_JUMP_IF_FALSE xx. This improves
"while 1" performance. */ "while 1" performance. */
case LOAD_CONST: case LOAD_CONST:
cumlc = lastlc + 1; CONST_STACK_PUSH_OP(i);
j = GETARG(codestr, i); j = GETARG(codestr, i);
if (codestr[i+3] != POP_JUMP_IF_FALSE || if (codestr[i+3] != POP_JUMP_IF_FALSE ||
!ISBASICBLOCK(blocks,i,6) || !ISBASICBLOCK(blocks,i,6) ||
!PyObject_IsTrue(PyList_GET_ITEM(consts, j))) !PyObject_IsTrue(PyList_GET_ITEM(consts, j)))
continue; continue;
memset(codestr+i, NOP, 6); memset(codestr+i, NOP, 6);
cumlc = 0; CONST_STACK_RESET();
break; break;
/* Try to fold tuples of constants (includes a case for lists and sets /* Try to fold tuples of constants (includes a case for lists and sets
@ -458,19 +514,23 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
case BUILD_LIST: case BUILD_LIST:
case BUILD_SET: case BUILD_SET:
j = GETARG(codestr, i); j = GETARG(codestr, i);
h = i - 3 * j; if (j == 0)
if (h >= 0 && break;
j <= lastlc && h = CONST_STACK_OP_LASTN(j);
assert((h >= 0 || CONST_STACK_LEN() < j));
if (h >= 0 && j > 0 && j <= CONST_STACK_LEN() &&
((opcode == BUILD_TUPLE && ((opcode == BUILD_TUPLE &&
ISBASICBLOCK(blocks, h, 3*(j+1))) || ISBASICBLOCK(blocks, h, i-h+3)) ||
((opcode == BUILD_LIST || opcode == BUILD_SET) && ((opcode == BUILD_LIST || opcode == BUILD_SET) &&
codestr[i+3]==COMPARE_OP && codestr[i+3]==COMPARE_OP &&
ISBASICBLOCK(blocks, h, 3*(j+2)) && ISBASICBLOCK(blocks, h, i-h+6) &&
(GETARG(codestr,i+3)==6 || (GETARG(codestr,i+3)==6 ||
GETARG(codestr,i+3)==7))) && GETARG(codestr,i+3)==7))) &&
tuple_of_constants(&codestr[h], j, consts)) { tuple_of_constants(&codestr[i], j, consts, CONST_STACK_LASTN(j))) {
assert(codestr[i] == LOAD_CONST); assert(codestr[i] == LOAD_CONST);
cumlc = 1; memset(&codestr[h], NOP, i - h);
CONST_STACK_POP(j);
CONST_STACK_PUSH_OP(i);
break; break;
} }
if (codestr[i+3] != UNPACK_SEQUENCE || if (codestr[i+3] != UNPACK_SEQUENCE ||
@ -482,10 +542,12 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
} else if (j == 2) { } else if (j == 2) {
codestr[i] = ROT_TWO; codestr[i] = ROT_TWO;
memset(codestr+i+1, NOP, 5); memset(codestr+i+1, NOP, 5);
CONST_STACK_RESET();
} else if (j == 3) { } else if (j == 3) {
codestr[i] = ROT_THREE; codestr[i] = ROT_THREE;
codestr[i+1] = ROT_TWO; codestr[i+1] = ROT_TWO;
memset(codestr+i+2, NOP, 4); memset(codestr+i+2, NOP, 4);
CONST_STACK_RESET();
} }
break; break;
@ -504,12 +566,18 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
case BINARY_AND: case BINARY_AND:
case BINARY_XOR: case BINARY_XOR:
case BINARY_OR: case BINARY_OR:
if (lastlc >= 2 && /* NOTE: LOAD_CONST is saved at `i-2` since it has an arg
ISBASICBLOCK(blocks, i-6, 7) && while BINOP hasn't */
fold_binops_on_constants(&codestr[i-6], consts)) { h = CONST_STACK_OP_LASTN(2);
assert((h >= 0 || CONST_STACK_LEN() < 2));
if (h >= 0 &&
ISBASICBLOCK(blocks, h, i-h+1) &&
fold_binops_on_constants(&codestr[i], consts, CONST_STACK_LASTN(2))) {
i -= 2; i -= 2;
memset(&codestr[h], NOP, i - h);
assert(codestr[i] == LOAD_CONST); assert(codestr[i] == LOAD_CONST);
cumlc = 1; CONST_STACK_POP(2);
CONST_STACK_PUSH_OP(i);
} }
break; break;
@ -518,12 +586,15 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
case UNARY_NEGATIVE: case UNARY_NEGATIVE:
case UNARY_INVERT: case UNARY_INVERT:
case UNARY_POSITIVE: case UNARY_POSITIVE:
if (lastlc >= 1 && h = CONST_STACK_OP_LASTN(1);
ISBASICBLOCK(blocks, i-3, 4) && assert((h >= 0 || CONST_STACK_LEN() < 1));
fold_unaryops_on_constants(&codestr[i-3], consts)) { if (h >= 0 &&
ISBASICBLOCK(blocks, h, i-h+1) &&
fold_unaryops_on_constants(&codestr[i-3], consts, CONST_STACK_TOP())) {
i -= 2; i -= 2;
assert(codestr[i] == LOAD_CONST); assert(codestr[i] == LOAD_CONST);
cumlc = 1; CONST_STACK_POP(1);
CONST_STACK_PUSH_OP(i);
} }
break; break;
@ -680,6 +751,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
assert(h + nops == codelen); assert(h + nops == codelen);
code = PyBytes_FromStringAndSize((char *)codestr, h); code = PyBytes_FromStringAndSize((char *)codestr, h);
CONST_STACK_DELETE();
PyMem_Free(addrmap); PyMem_Free(addrmap);
PyMem_Free(codestr); PyMem_Free(codestr);
PyMem_Free(blocks); PyMem_Free(blocks);
@ -689,6 +761,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
code = NULL; code = NULL;
exitUnchanged: exitUnchanged:
CONST_STACK_DELETE();
if (blocks != NULL) if (blocks != NULL)
PyMem_Free(blocks); PyMem_Free(blocks);
if (addrmap != NULL) if (addrmap != NULL)