gh-126835: Move const folding of lists & sets from ast_opt.c to flowgraph.c (#130032)

This commit is contained in:
Yan Yanchii 2025-02-13 13:11:07 +01:00 committed by GitHub
parent c7a9d06e06
commit 140e69c4a8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 231 additions and 120 deletions

View file

@ -3239,46 +3239,6 @@ class ASTOptimiziationTests(unittest.TestCase):
self.assert_ast(code, non_optimized_target, optimized_target) self.assert_ast(code, non_optimized_target, optimized_target)
def test_folding_comparator(self):
code = "1 %s %s1%s"
operators = [("in", ast.In()), ("not in", ast.NotIn())]
braces = [
("[", "]", ast.List, (1,)),
("{", "}", ast.Set, frozenset({1})),
]
for left, right, non_optimized_comparator, optimized_comparator in braces:
for op, node in operators:
non_optimized_target = self.wrap_expr(ast.Compare(
left=ast.Constant(1), ops=[node],
comparators=[non_optimized_comparator(elts=[ast.Constant(1)])]
))
optimized_target = self.wrap_expr(ast.Compare(
left=ast.Constant(1), ops=[node],
comparators=[ast.Constant(value=optimized_comparator)]
))
self.assert_ast(code % (op, left, right), non_optimized_target, optimized_target)
def test_folding_iter(self):
code = "for _ in %s1%s: pass"
braces = [
("[", "]", ast.List, (1,)),
("{", "}", ast.Set, frozenset({1})),
]
for left, right, ast_cls, optimized_iter in braces:
non_optimized_target = self.wrap_statement(ast.For(
target=ast.Name(id="_", ctx=ast.Store()),
iter=ast_cls(elts=[ast.Constant(1)]),
body=[ast.Pass()]
))
optimized_target = self.wrap_statement(ast.For(
target=ast.Name(id="_", ctx=ast.Store()),
iter=ast.Constant(value=optimized_iter),
body=[ast.Pass()]
))
self.assert_ast(code % (left, right), non_optimized_target, optimized_target)
def test_folding_type_param_in_function_def(self): def test_folding_type_param_in_function_def(self):
code = "def foo[%s = 1 + 1](): pass" code = "def foo[%s = 1 + 1](): pass"

View file

@ -798,7 +798,7 @@ class TestSpecifics(unittest.TestCase):
f3 = lambda x: x in {("not a name",)} f3 = lambda x: x in {("not a name",)}
self.assertIs(f1.__code__.co_consts[0], self.assertIs(f1.__code__.co_consts[0],
f2.__code__.co_consts[0][0]) f2.__code__.co_consts[0][0])
self.assertIs(next(iter(f3.__code__.co_consts[0])), self.assertIs(next(iter(f3.__code__.co_consts[1])),
f2.__code__.co_consts[0]) f2.__code__.co_consts[0])
# {0} is converted to a constant frozenset({0}) by the peephole # {0} is converted to a constant frozenset({0}) by the peephole

View file

@ -1261,6 +1261,202 @@ class DirectCfgOptimizerTests(CfgOptimizationTestCase):
] ]
self.cfg_optimization_test(same, same, consts=[]) self.cfg_optimization_test(same, same, consts=[])
def test_optimize_literal_list_for_iter(self):
# for _ in [1, 2]: pass ==> for _ in (1, 2): pass
before = [
('LOAD_SMALL_INT', 1, 0),
('LOAD_SMALL_INT', 2, 0),
('BUILD_LIST', 2, 0),
('GET_ITER', None, 0),
start := self.Label(),
('FOR_ITER', end := self.Label(), 0),
('STORE_FAST', 0, 0),
('JUMP', start, 0),
end,
('END_FOR', None, 0),
('POP_ITER', None, 0),
('LOAD_CONST', 0, 0),
('RETURN_VALUE', None, 0),
]
after = [
('LOAD_CONST', 1, 0),
('GET_ITER', None, 0),
start := self.Label(),
('FOR_ITER', end := self.Label(), 0),
('STORE_FAST', 0, 0),
('JUMP', start, 0),
end,
('END_FOR', None, 0),
('POP_ITER', None, 0),
('LOAD_CONST', 0, 0),
('RETURN_VALUE', None, 0),
]
self.cfg_optimization_test(before, after, consts=[None], expected_consts=[None, (1, 2)])
# for _ in [1, x]: pass ==> for _ in (1, x): pass
before = [
('LOAD_SMALL_INT', 1, 0),
('LOAD_NAME', 0, 0),
('BUILD_LIST', 2, 0),
('GET_ITER', None, 0),
start := self.Label(),
('FOR_ITER', end := self.Label(), 0),
('STORE_FAST', 0, 0),
('JUMP', start, 0),
end,
('END_FOR', None, 0),
('POP_ITER', None, 0),
('LOAD_CONST', 0, 0),
('RETURN_VALUE', None, 0),
]
after = [
('LOAD_SMALL_INT', 1, 0),
('LOAD_NAME', 0, 0),
('BUILD_TUPLE', 2, 0),
('GET_ITER', None, 0),
start := self.Label(),
('FOR_ITER', end := self.Label(), 0),
('STORE_FAST', 0, 0),
('JUMP', start, 0),
end,
('END_FOR', None, 0),
('POP_ITER', None, 0),
('LOAD_CONST', 0, 0),
('RETURN_VALUE', None, 0),
]
self.cfg_optimization_test(before, after, consts=[None], expected_consts=[None])
def test_optimize_literal_set_for_iter(self):
# for _ in {1, 2}: pass ==> for _ in (1, 2): pass
before = [
('LOAD_SMALL_INT', 1, 0),
('LOAD_SMALL_INT', 2, 0),
('BUILD_SET', 2, 0),
('GET_ITER', None, 0),
start := self.Label(),
('FOR_ITER', end := self.Label(), 0),
('STORE_FAST', 0, 0),
('JUMP', start, 0),
end,
('END_FOR', None, 0),
('POP_ITER', None, 0),
('LOAD_CONST', 0, 0),
('RETURN_VALUE', None, 0),
]
after = [
('LOAD_CONST', 1, 0),
('GET_ITER', None, 0),
start := self.Label(),
('FOR_ITER', end := self.Label(), 0),
('STORE_FAST', 0, 0),
('JUMP', start, 0),
end,
('END_FOR', None, 0),
('POP_ITER', None, 0),
('LOAD_CONST', 0, 0),
('RETURN_VALUE', None, 0),
]
self.cfg_optimization_test(before, after, consts=[None], expected_consts=[None, frozenset({1, 2})])
# non constant literal set is not changed
# for _ in {1, x}: pass ==> for _ in {1, x}: pass
same = [
('LOAD_SMALL_INT', 1, 0),
('LOAD_NAME', 0, 0),
('BUILD_SET', 2, 0),
('GET_ITER', None, 0),
start := self.Label(),
('FOR_ITER', end := self.Label(), 0),
('STORE_FAST', 0, 0),
('JUMP', start, 0),
end,
('END_FOR', None, 0),
('POP_ITER', None, 0),
('LOAD_CONST', 0, 0),
('RETURN_VALUE', None, 0),
]
self.cfg_optimization_test(same, same, consts=[None], expected_consts=[None])
def test_optimize_literal_list_contains(self):
# x in [1, 2] ==> x in (1, 2)
before = [
('LOAD_NAME', 0, 0),
('LOAD_SMALL_INT', 1, 0),
('LOAD_SMALL_INT', 2, 0),
('BUILD_LIST', 2, 0),
('CONTAINS_OP', 0, 0),
('POP_TOP', None, 0),
('LOAD_CONST', 0, 0),
('RETURN_VALUE', None, 0),
]
after = [
('LOAD_NAME', 0, 0),
('LOAD_CONST', 1, 0),
('CONTAINS_OP', 0, 0),
('POP_TOP', None, 0),
('LOAD_CONST', 0, 0),
('RETURN_VALUE', None, 0),
]
self.cfg_optimization_test(before, after, consts=[None], expected_consts=[None, (1, 2)])
# x in [1, y] ==> x in (1, y)
before = [
('LOAD_NAME', 0, 0),
('LOAD_SMALL_INT', 1, 0),
('LOAD_NAME', 1, 0),
('BUILD_LIST', 2, 0),
('CONTAINS_OP', 0, 0),
('POP_TOP', None, 0),
('LOAD_CONST', 0, 0),
('RETURN_VALUE', None, 0),
]
after = [
('LOAD_NAME', 0, 0),
('LOAD_SMALL_INT', 1, 0),
('LOAD_NAME', 1, 0),
('BUILD_TUPLE', 2, 0),
('CONTAINS_OP', 0, 0),
('POP_TOP', None, 0),
('LOAD_CONST', 0, 0),
('RETURN_VALUE', None, 0),
]
self.cfg_optimization_test(before, after, consts=[None], expected_consts=[None])
def test_optimize_literal_set_contains(self):
# x in {1, 2} ==> x in (1, 2)
before = [
('LOAD_NAME', 0, 0),
('LOAD_SMALL_INT', 1, 0),
('LOAD_SMALL_INT', 2, 0),
('BUILD_SET', 2, 0),
('CONTAINS_OP', 0, 0),
('POP_TOP', None, 0),
('LOAD_CONST', 0, 0),
('RETURN_VALUE', None, 0),
]
after = [
('LOAD_NAME', 0, 0),
('LOAD_CONST', 1, 0),
('CONTAINS_OP', 0, 0),
('POP_TOP', None, 0),
('LOAD_CONST', 0, 0),
('RETURN_VALUE', None, 0),
]
self.cfg_optimization_test(before, after, consts=[None], expected_consts=[None, frozenset({1, 2})])
# non constant literal set is not changed
# x in {1, y} ==> x in {1, y}
same = [
('LOAD_NAME', 0, 0),
('LOAD_SMALL_INT', 1, 0),
('LOAD_NAME', 1, 0),
('BUILD_SET', 2, 0),
('CONTAINS_OP', 0, 0),
('POP_TOP', None, 0),
('LOAD_CONST', 0, 0),
('RETURN_VALUE', None, 0),
]
self.cfg_optimization_test(same, same, consts=[None], expected_consts=[None])
def test_conditional_jump_forward_const_condition(self): def test_conditional_jump_forward_const_condition(self):
# The unreachable branch of the jump is removed, the jump # The unreachable branch of the jump is removed, the jump

View file

@ -567,62 +567,6 @@ fold_tuple(expr_ty node, PyArena *arena, _PyASTOptimizeState *state)
return make_const(node, newval, arena); return make_const(node, newval, arena);
} }
/* Change literal list or set of constants into constant
tuple or frozenset respectively. Change literal list of
non-constants into tuple.
Used for right operand of "in" and "not in" tests and for iterable
in "for" loop and comprehensions.
*/
static int
fold_iter(expr_ty arg, PyArena *arena, _PyASTOptimizeState *state)
{
PyObject *newval;
if (arg->kind == List_kind) {
/* First change a list into tuple. */
asdl_expr_seq *elts = arg->v.List.elts;
if (has_starred(elts)) {
return 1;
}
expr_context_ty ctx = arg->v.List.ctx;
arg->kind = Tuple_kind;
arg->v.Tuple.elts = elts;
arg->v.Tuple.ctx = ctx;
/* Try to create a constant tuple. */
newval = make_const_tuple(elts);
}
else if (arg->kind == Set_kind) {
newval = make_const_tuple(arg->v.Set.elts);
if (newval) {
Py_SETREF(newval, PyFrozenSet_New(newval));
}
}
else {
return 1;
}
return make_const(arg, newval, arena);
}
static int
fold_compare(expr_ty node, PyArena *arena, _PyASTOptimizeState *state)
{
asdl_int_seq *ops;
asdl_expr_seq *args;
Py_ssize_t i;
ops = node->v.Compare.ops;
args = node->v.Compare.comparators;
/* Change literal list or set in 'in' or 'not in' into
tuple or frozenset respectively. */
i = asdl_seq_LEN(ops) - 1;
int op = asdl_seq_GET(ops, i);
if (op == In || op == NotIn) {
if (!fold_iter((expr_ty)asdl_seq_GET(args, i), arena, state)) {
return 0;
}
}
return 1;
}
static int astfold_mod(mod_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); static int astfold_mod(mod_ty node_, PyArena *ctx_, _PyASTOptimizeState *state);
static int astfold_stmt(stmt_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); static int astfold_stmt(stmt_ty node_, PyArena *ctx_, _PyASTOptimizeState *state);
static int astfold_expr(expr_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); static int astfold_expr(expr_ty node_, PyArena *ctx_, _PyASTOptimizeState *state);
@ -783,7 +727,6 @@ astfold_expr(expr_ty node_, PyArena *ctx_, _PyASTOptimizeState *state)
case Compare_kind: case Compare_kind:
CALL(astfold_expr, expr_ty, node_->v.Compare.left); CALL(astfold_expr, expr_ty, node_->v.Compare.left);
CALL_SEQ(astfold_expr, expr, node_->v.Compare.comparators); CALL_SEQ(astfold_expr, expr, node_->v.Compare.comparators);
CALL(fold_compare, expr_ty, node_);
break; break;
case Call_kind: case Call_kind:
CALL(astfold_expr, expr_ty, node_->v.Call.func); CALL(astfold_expr, expr_ty, node_->v.Call.func);
@ -852,8 +795,6 @@ astfold_comprehension(comprehension_ty node_, PyArena *ctx_, _PyASTOptimizeState
CALL(astfold_expr, expr_ty, node_->target); CALL(astfold_expr, expr_ty, node_->target);
CALL(astfold_expr, expr_ty, node_->iter); CALL(astfold_expr, expr_ty, node_->iter);
CALL_SEQ(astfold_expr, expr, node_->ifs); CALL_SEQ(astfold_expr, expr, node_->ifs);
CALL(fold_iter, expr_ty, node_->iter);
return 1; return 1;
} }
@ -940,8 +881,6 @@ astfold_stmt(stmt_ty node_, PyArena *ctx_, _PyASTOptimizeState *state)
CALL(astfold_expr, expr_ty, node_->v.For.iter); CALL(astfold_expr, expr_ty, node_->v.For.iter);
CALL_SEQ(astfold_stmt, stmt, node_->v.For.body); CALL_SEQ(astfold_stmt, stmt, node_->v.For.body);
CALL_SEQ(astfold_stmt, stmt, node_->v.For.orelse); CALL_SEQ(astfold_stmt, stmt, node_->v.For.orelse);
CALL(fold_iter, expr_ty, node_->v.For.iter);
break; break;
case AsyncFor_kind: case AsyncFor_kind:
CALL(astfold_expr, expr_ty, node_->v.AsyncFor.target); CALL(astfold_expr, expr_ty, node_->v.AsyncFor.target);

View file

@ -1428,31 +1428,41 @@ fold_tuple_of_constants(basicblock *bb, int n, PyObject *consts, PyObject *const
} }
#define MIN_CONST_SEQUENCE_SIZE 3 #define MIN_CONST_SEQUENCE_SIZE 3
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N /*
with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1, Optimize lists and sets for:
or BUILD_SET & SET_UPDATE respectively. 1. "for" loop, comprehension or "in"/"not in" tests:
Change literal list or set of constants into constant
tuple or frozenset respectively. Change list of
non-constants into tuple.
2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
or BUILD_SET & SET_UPDATE respectively.
*/ */
static int static int
optimize_if_const_list_or_set(basicblock *bb, int n, PyObject *consts, PyObject *const_cache) optimize_lists_and_sets(basicblock *bb, int i, int nextop,
PyObject *consts, PyObject *const_cache)
{ {
assert(PyDict_CheckExact(const_cache)); assert(PyDict_CheckExact(const_cache));
assert(PyList_CheckExact(consts)); assert(PyList_CheckExact(consts));
cfg_instr *instr = &bb->b_instr[n]; cfg_instr *instr = &bb->b_instr[i];
assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET); assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
int seq_size = instr->i_oparg; int seq_size = instr->i_oparg;
if (seq_size < MIN_CONST_SEQUENCE_SIZE) { if (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter) {
return SUCCESS; return SUCCESS;
} }
PyObject *newconst; PyObject *newconst;
RETURN_IF_ERROR(get_constant_sequence(bb, n-1, seq_size, consts, &newconst)); RETURN_IF_ERROR(get_constant_sequence(bb, i-1, seq_size, consts, &newconst));
if (newconst == NULL) { if (newconst == NULL) { /* not a const sequence */
/* not a const sequence */ if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
/* iterate over a tuple instead of list */
INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
}
return SUCCESS; return SUCCESS;
} }
assert(PyTuple_CheckExact(newconst) && PyTuple_GET_SIZE(newconst) == seq_size); assert(PyTuple_CheckExact(newconst) && PyTuple_GET_SIZE(newconst) == seq_size);
int build = instr->i_opcode; if (instr->i_opcode == BUILD_SET) {
int extend = build == BUILD_LIST ? LIST_EXTEND : SET_UPDATE;
if (build == BUILD_SET) {
PyObject *frozenset = PyFrozenSet_New(newconst); PyObject *frozenset = PyFrozenSet_New(newconst);
if (frozenset == NULL) { if (frozenset == NULL) {
Py_DECREF(newconst); Py_DECREF(newconst);
@ -1462,11 +1472,17 @@ optimize_if_const_list_or_set(basicblock *bb, int n, PyObject *consts, PyObject
} }
int index = add_const(newconst, consts, const_cache); int index = add_const(newconst, consts, const_cache);
RETURN_IF_ERROR(index); RETURN_IF_ERROR(index);
nop_out(bb, n-1, seq_size); nop_out(bb, i-1, seq_size);
assert(n >= 2); if (contains_or_iter) {
INSTR_SET_OP1(&bb->b_instr[n-2], build, 0); INSTR_SET_OP1(instr, LOAD_CONST, index);
INSTR_SET_OP1(&bb->b_instr[n-1], LOAD_CONST, index); }
INSTR_SET_OP1(&bb->b_instr[n], extend, 1); else {
assert(i >= 2);
assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
}
return SUCCESS; return SUCCESS;
} }
@ -1923,7 +1939,7 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
break; break;
case BUILD_LIST: case BUILD_LIST:
case BUILD_SET: case BUILD_SET:
RETURN_IF_ERROR(optimize_if_const_list_or_set(bb, i, consts, const_cache)); RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
break; break;
case POP_JUMP_IF_NOT_NONE: case POP_JUMP_IF_NOT_NONE:
case POP_JUMP_IF_NONE: case POP_JUMP_IF_NONE: