mirror of
https://github.com/python/cpython.git
synced 2025-08-02 16:13:13 +00:00
bpo-37500: Make sure dead code does not generate bytecode but also detect syntax errors (GH-14612)
https://bugs.python.org/issue37500
Add a new field to the compiler structure that allows to be configured
so no bytecode is emitted. In this way is possible to detect errors by
walking the nodes while preserving optimizations.
https://bugs.python.org/issue37500
(cherry picked from commit 18c5f9d44d
)
Co-authored-by: Pablo Galindo <Pablogsal@gmail.com>
This commit is contained in:
parent
cf52bd0b9b
commit
9ea738e580
4 changed files with 174 additions and 18 deletions
|
@ -697,6 +697,40 @@ if 1:
|
||||||
# complex statements.
|
# complex statements.
|
||||||
compile("if a: b\n" * 200000, "<dummy>", "exec")
|
compile("if a: b\n" * 200000, "<dummy>", "exec")
|
||||||
|
|
||||||
|
# Multiple users rely on the fact that CPython does not generate
|
||||||
|
# bytecode for dead code blocks. See bpo-37500 for more context.
|
||||||
|
@support.cpython_only
|
||||||
|
def test_dead_blocks_do_not_generate_bytecode(self):
|
||||||
|
def unused_block_if():
|
||||||
|
if 0:
|
||||||
|
return 42
|
||||||
|
|
||||||
|
def unused_block_while():
|
||||||
|
while 0:
|
||||||
|
return 42
|
||||||
|
|
||||||
|
def unused_block_if_else():
|
||||||
|
if 1:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return 42
|
||||||
|
|
||||||
|
def unused_block_while_else():
|
||||||
|
while 1:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return 42
|
||||||
|
|
||||||
|
funcs = [unused_block_if, unused_block_while,
|
||||||
|
unused_block_if_else, unused_block_while_else]
|
||||||
|
|
||||||
|
for func in funcs:
|
||||||
|
opcodes = list(dis.get_instructions(func))
|
||||||
|
self.assertEqual(2, len(opcodes))
|
||||||
|
self.assertEqual('LOAD_CONST', opcodes[0].opname)
|
||||||
|
self.assertEqual(None, opcodes[0].argval)
|
||||||
|
self.assertEqual('RETURN_VALUE', opcodes[1].opname)
|
||||||
|
|
||||||
|
|
||||||
class TestExpressionStackSize(unittest.TestCase):
|
class TestExpressionStackSize(unittest.TestCase):
|
||||||
# These tests check that the computed stack size for a code object
|
# These tests check that the computed stack size for a code object
|
||||||
|
|
|
@ -697,18 +697,47 @@ class SyntaxTestCase(unittest.TestCase):
|
||||||
self._check_error("break", "outside loop")
|
self._check_error("break", "outside loop")
|
||||||
|
|
||||||
def test_yield_outside_function(self):
|
def test_yield_outside_function(self):
|
||||||
self._check_error("if 0: yield", "outside function")
|
self._check_error("if 0: yield", "outside function")
|
||||||
self._check_error("class C:\n if 0: yield", "outside function")
|
self._check_error("if 0: yield\nelse: x=1", "outside function")
|
||||||
|
self._check_error("if 1: pass\nelse: yield", "outside function")
|
||||||
|
self._check_error("while 0: yield", "outside function")
|
||||||
|
self._check_error("while 0: yield\nelse: x=1", "outside function")
|
||||||
|
self._check_error("class C:\n if 0: yield", "outside function")
|
||||||
|
self._check_error("class C:\n if 1: pass\n else: yield",
|
||||||
|
"outside function")
|
||||||
|
self._check_error("class C:\n while 0: yield", "outside function")
|
||||||
|
self._check_error("class C:\n while 0: yield\n else: x = 1",
|
||||||
|
"outside function")
|
||||||
|
|
||||||
def test_return_outside_function(self):
|
def test_return_outside_function(self):
|
||||||
self._check_error("if 0: return", "outside function")
|
self._check_error("if 0: return", "outside function")
|
||||||
self._check_error("class C:\n if 0: return", "outside function")
|
self._check_error("if 0: return\nelse: x=1", "outside function")
|
||||||
|
self._check_error("if 1: pass\nelse: return", "outside function")
|
||||||
|
self._check_error("while 0: return", "outside function")
|
||||||
|
self._check_error("class C:\n if 0: return", "outside function")
|
||||||
|
self._check_error("class C:\n while 0: return", "outside function")
|
||||||
|
self._check_error("class C:\n while 0: return\n else: x=1",
|
||||||
|
"outside function")
|
||||||
|
self._check_error("class C:\n if 0: return\n else: x= 1",
|
||||||
|
"outside function")
|
||||||
|
self._check_error("class C:\n if 1: pass\n else: return",
|
||||||
|
"outside function")
|
||||||
|
|
||||||
def test_break_outside_loop(self):
|
def test_break_outside_loop(self):
|
||||||
self._check_error("if 0: break", "outside loop")
|
self._check_error("if 0: break", "outside loop")
|
||||||
|
self._check_error("if 0: break\nelse: x=1", "outside loop")
|
||||||
|
self._check_error("if 1: pass\nelse: break", "outside loop")
|
||||||
|
self._check_error("class C:\n if 0: break", "outside loop")
|
||||||
|
self._check_error("class C:\n if 1: pass\n else: break",
|
||||||
|
"outside loop")
|
||||||
|
|
||||||
def test_continue_outside_loop(self):
|
def test_continue_outside_loop(self):
|
||||||
self._check_error("if 0: continue", "not properly in loop")
|
self._check_error("if 0: continue", "not properly in loop")
|
||||||
|
self._check_error("if 0: continue\nelse: x=1", "not properly in loop")
|
||||||
|
self._check_error("if 1: pass\nelse: continue", "not properly in loop")
|
||||||
|
self._check_error("class C:\n if 0: continue", "not properly in loop")
|
||||||
|
self._check_error("class C:\n if 1: pass\n else: continue",
|
||||||
|
"not properly in loop")
|
||||||
|
|
||||||
def test_unexpected_indent(self):
|
def test_unexpected_indent(self):
|
||||||
self._check_error("foo()\n bar()\n", "unexpected indent",
|
self._check_error("foo()\n bar()\n", "unexpected indent",
|
||||||
|
|
|
@ -53,22 +53,52 @@ basic.events = [(0, 'call'),
|
||||||
# following that clause?
|
# following that clause?
|
||||||
|
|
||||||
|
|
||||||
# The entire "while 0:" statement is optimized away. No code
|
# Some constructs like "while 0:", "if 0:" or "if 1:...else:..." are optimized
|
||||||
# exists for it, so the line numbers skip directly from "del x"
|
# away. No code # exists for them, so the line numbers skip directly from
|
||||||
# to "x = 1".
|
# "del x" to "x = 1".
|
||||||
def arigo_example():
|
def arigo_example0():
|
||||||
x = 1
|
x = 1
|
||||||
del x
|
del x
|
||||||
while 0:
|
while 0:
|
||||||
pass
|
pass
|
||||||
x = 1
|
x = 1
|
||||||
|
|
||||||
arigo_example.events = [(0, 'call'),
|
arigo_example0.events = [(0, 'call'),
|
||||||
(1, 'line'),
|
(1, 'line'),
|
||||||
(2, 'line'),
|
(2, 'line'),
|
||||||
(5, 'line'),
|
(5, 'line'),
|
||||||
(5, 'return')]
|
(5, 'return')]
|
||||||
|
|
||||||
|
def arigo_example1():
|
||||||
|
x = 1
|
||||||
|
del x
|
||||||
|
if 0:
|
||||||
|
pass
|
||||||
|
x = 1
|
||||||
|
|
||||||
|
arigo_example1.events = [(0, 'call'),
|
||||||
|
(1, 'line'),
|
||||||
|
(2, 'line'),
|
||||||
|
(5, 'line'),
|
||||||
|
(5, 'return')]
|
||||||
|
|
||||||
|
def arigo_example2():
|
||||||
|
x = 1
|
||||||
|
del x
|
||||||
|
if 1:
|
||||||
|
x = 1
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
arigo_example2.events = [(0, 'call'),
|
||||||
|
(1, 'line'),
|
||||||
|
(2, 'line'),
|
||||||
|
(4, 'line'),
|
||||||
|
(7, 'line'),
|
||||||
|
(7, 'return')]
|
||||||
|
|
||||||
|
|
||||||
# check that lines consisting of just one instruction get traced:
|
# check that lines consisting of just one instruction get traced:
|
||||||
def one_instr_line():
|
def one_instr_line():
|
||||||
x = 1
|
x = 1
|
||||||
|
@ -349,8 +379,12 @@ class TraceTestCase(unittest.TestCase):
|
||||||
|
|
||||||
def test_01_basic(self):
|
def test_01_basic(self):
|
||||||
self.run_test(basic)
|
self.run_test(basic)
|
||||||
def test_02_arigo(self):
|
def test_02_arigo0(self):
|
||||||
self.run_test(arigo_example)
|
self.run_test(arigo_example0)
|
||||||
|
def test_02_arigo1(self):
|
||||||
|
self.run_test(arigo_example1)
|
||||||
|
def test_02_arigo2(self):
|
||||||
|
self.run_test(arigo_example2)
|
||||||
def test_03_one_instr(self):
|
def test_03_one_instr(self):
|
||||||
self.run_test(one_instr_line)
|
self.run_test(one_instr_line)
|
||||||
def test_04_no_pop_blocks(self):
|
def test_04_no_pop_blocks(self):
|
||||||
|
|
|
@ -161,6 +161,11 @@ struct compiler {
|
||||||
int c_optimize; /* optimization level */
|
int c_optimize; /* optimization level */
|
||||||
int c_interactive; /* true if in interactive mode */
|
int c_interactive; /* true if in interactive mode */
|
||||||
int c_nestlevel;
|
int c_nestlevel;
|
||||||
|
int c_do_not_emit_bytecode; /* The compiler won't emit any bytecode
|
||||||
|
if this value is different from zero.
|
||||||
|
This can be used to temporarily visit
|
||||||
|
nodes without emitting bytecode to
|
||||||
|
check only errors. */
|
||||||
|
|
||||||
PyObject *c_const_cache; /* Python dict holding all constants,
|
PyObject *c_const_cache; /* Python dict holding all constants,
|
||||||
including names tuple */
|
including names tuple */
|
||||||
|
@ -340,6 +345,7 @@ PyAST_CompileObject(mod_ty mod, PyObject *filename, PyCompilerFlags *flags,
|
||||||
c.c_flags = flags;
|
c.c_flags = flags;
|
||||||
c.c_optimize = (optimize == -1) ? config->optimization_level : optimize;
|
c.c_optimize = (optimize == -1) ? config->optimization_level : optimize;
|
||||||
c.c_nestlevel = 0;
|
c.c_nestlevel = 0;
|
||||||
|
c.c_do_not_emit_bytecode = 0;
|
||||||
|
|
||||||
if (!_PyAST_Optimize(mod, arena, c.c_optimize)) {
|
if (!_PyAST_Optimize(mod, arena, c.c_optimize)) {
|
||||||
goto finally;
|
goto finally;
|
||||||
|
@ -1152,6 +1158,9 @@ compiler_addop(struct compiler *c, int opcode)
|
||||||
struct instr *i;
|
struct instr *i;
|
||||||
int off;
|
int off;
|
||||||
assert(!HAS_ARG(opcode));
|
assert(!HAS_ARG(opcode));
|
||||||
|
if (c->c_do_not_emit_bytecode) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
off = compiler_next_instr(c, c->u->u_curblock);
|
off = compiler_next_instr(c, c->u->u_curblock);
|
||||||
if (off < 0)
|
if (off < 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1305,6 +1314,10 @@ merge_consts_recursive(struct compiler *c, PyObject *o)
|
||||||
static Py_ssize_t
|
static Py_ssize_t
|
||||||
compiler_add_const(struct compiler *c, PyObject *o)
|
compiler_add_const(struct compiler *c, PyObject *o)
|
||||||
{
|
{
|
||||||
|
if (c->c_do_not_emit_bytecode) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
PyObject *key = merge_consts_recursive(c, o);
|
PyObject *key = merge_consts_recursive(c, o);
|
||||||
if (key == NULL) {
|
if (key == NULL) {
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -1318,6 +1331,10 @@ compiler_add_const(struct compiler *c, PyObject *o)
|
||||||
static int
|
static int
|
||||||
compiler_addop_load_const(struct compiler *c, PyObject *o)
|
compiler_addop_load_const(struct compiler *c, PyObject *o)
|
||||||
{
|
{
|
||||||
|
if (c->c_do_not_emit_bytecode) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
Py_ssize_t arg = compiler_add_const(c, o);
|
Py_ssize_t arg = compiler_add_const(c, o);
|
||||||
if (arg < 0)
|
if (arg < 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1328,6 +1345,10 @@ static int
|
||||||
compiler_addop_o(struct compiler *c, int opcode, PyObject *dict,
|
compiler_addop_o(struct compiler *c, int opcode, PyObject *dict,
|
||||||
PyObject *o)
|
PyObject *o)
|
||||||
{
|
{
|
||||||
|
if (c->c_do_not_emit_bytecode) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
Py_ssize_t arg = compiler_add_o(c, dict, o);
|
Py_ssize_t arg = compiler_add_o(c, dict, o);
|
||||||
if (arg < 0)
|
if (arg < 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1339,6 +1360,11 @@ compiler_addop_name(struct compiler *c, int opcode, PyObject *dict,
|
||||||
PyObject *o)
|
PyObject *o)
|
||||||
{
|
{
|
||||||
Py_ssize_t arg;
|
Py_ssize_t arg;
|
||||||
|
|
||||||
|
if (c->c_do_not_emit_bytecode) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
PyObject *mangled = _Py_Mangle(c->u->u_private, o);
|
PyObject *mangled = _Py_Mangle(c->u->u_private, o);
|
||||||
if (!mangled)
|
if (!mangled)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1359,6 +1385,10 @@ compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg)
|
||||||
struct instr *i;
|
struct instr *i;
|
||||||
int off;
|
int off;
|
||||||
|
|
||||||
|
if (c->c_do_not_emit_bytecode) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
/* oparg value is unsigned, but a signed C int is usually used to store
|
/* oparg value is unsigned, but a signed C int is usually used to store
|
||||||
it in the C code (like Python/ceval.c).
|
it in the C code (like Python/ceval.c).
|
||||||
|
|
||||||
|
@ -1385,6 +1415,10 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute)
|
||||||
struct instr *i;
|
struct instr *i;
|
||||||
int off;
|
int off;
|
||||||
|
|
||||||
|
if (c->c_do_not_emit_bytecode) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
assert(HAS_ARG(opcode));
|
assert(HAS_ARG(opcode));
|
||||||
assert(b != NULL);
|
assert(b != NULL);
|
||||||
off = compiler_next_instr(c, c->u->u_curblock);
|
off = compiler_next_instr(c, c->u->u_curblock);
|
||||||
|
@ -1519,6 +1553,17 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute)
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* These macros allows to check only for errors and not emmit bytecode
|
||||||
|
* while visiting nodes.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define BEGIN_DO_NOT_EMIT_BYTECODE { \
|
||||||
|
c->c_do_not_emit_bytecode++;
|
||||||
|
|
||||||
|
#define END_DO_NOT_EMIT_BYTECODE \
|
||||||
|
c->c_do_not_emit_bytecode--; \
|
||||||
|
}
|
||||||
|
|
||||||
/* Search if variable annotations are present statically in a block. */
|
/* Search if variable annotations are present statically in a block. */
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -2546,13 +2591,23 @@ compiler_if(struct compiler *c, stmt_ty s)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
constant = expr_constant(s->v.If.test);
|
constant = expr_constant(s->v.If.test);
|
||||||
/* constant = 0: "if 0" Leave the optimizations to
|
/* constant = 0: "if 0"
|
||||||
* the pephole optimizer to check for syntax errors
|
|
||||||
* in the block.
|
|
||||||
* constant = 1: "if 1", "if 2", ...
|
* constant = 1: "if 1", "if 2", ...
|
||||||
* constant = -1: rest */
|
* constant = -1: rest */
|
||||||
if (constant == 1) {
|
if (constant == 0) {
|
||||||
|
BEGIN_DO_NOT_EMIT_BYTECODE
|
||||||
VISIT_SEQ(c, stmt, s->v.If.body);
|
VISIT_SEQ(c, stmt, s->v.If.body);
|
||||||
|
END_DO_NOT_EMIT_BYTECODE
|
||||||
|
if (s->v.If.orelse) {
|
||||||
|
VISIT_SEQ(c, stmt, s->v.If.orelse);
|
||||||
|
}
|
||||||
|
} else if (constant == 1) {
|
||||||
|
VISIT_SEQ(c, stmt, s->v.If.body);
|
||||||
|
if (s->v.If.orelse) {
|
||||||
|
BEGIN_DO_NOT_EMIT_BYTECODE
|
||||||
|
VISIT_SEQ(c, stmt, s->v.If.orelse);
|
||||||
|
END_DO_NOT_EMIT_BYTECODE
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if (asdl_seq_LEN(s->v.If.orelse)) {
|
if (asdl_seq_LEN(s->v.If.orelse)) {
|
||||||
next = compiler_new_block(c);
|
next = compiler_new_block(c);
|
||||||
|
@ -2662,8 +2717,12 @@ compiler_while(struct compiler *c, stmt_ty s)
|
||||||
int constant = expr_constant(s->v.While.test);
|
int constant = expr_constant(s->v.While.test);
|
||||||
|
|
||||||
if (constant == 0) {
|
if (constant == 0) {
|
||||||
if (s->v.While.orelse)
|
BEGIN_DO_NOT_EMIT_BYTECODE
|
||||||
|
VISIT_SEQ(c, stmt, s->v.While.body);
|
||||||
|
END_DO_NOT_EMIT_BYTECODE
|
||||||
|
if (s->v.While.orelse) {
|
||||||
VISIT_SEQ(c, stmt, s->v.While.orelse);
|
VISIT_SEQ(c, stmt, s->v.While.orelse);
|
||||||
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
loop = compiler_new_block(c);
|
loop = compiler_new_block(c);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue