mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
gh-97912: Avoid quadratic behavior when adding LOAD_FAST_CHECK (GH-97952)
* The compiler analyzes the usage of the first 64 local variables all at once using bit masks. * Local variables beyond the first 64 are only partially analyzed, achieving linear time.
This commit is contained in:
parent
6f15ca8c7a
commit
39bc70e267
3 changed files with 176 additions and 65 deletions
|
@ -776,6 +776,45 @@ class TestMarkingVariablesAsUnKnown(BytecodeTestCase):
|
||||||
self.assertInBytecode(f, 'LOAD_FAST_CHECK')
|
self.assertInBytecode(f, 'LOAD_FAST_CHECK')
|
||||||
self.assertNotInBytecode(f, 'LOAD_FAST')
|
self.assertNotInBytecode(f, 'LOAD_FAST')
|
||||||
|
|
||||||
|
def test_load_fast_too_many_locals(self):
|
||||||
|
# When there get to be too many locals to analyze completely,
|
||||||
|
# later locals are all converted to LOAD_FAST_CHECK, except
|
||||||
|
# when a store or prior load occurred in the same basicblock.
|
||||||
|
def f():
|
||||||
|
a00 = a01 = a02 = a03 = a04 = a05 = a06 = a07 = a08 = a09 = 1
|
||||||
|
a10 = a11 = a12 = a13 = a14 = a15 = a16 = a17 = a18 = a19 = 1
|
||||||
|
a20 = a21 = a22 = a23 = a24 = a25 = a26 = a27 = a28 = a29 = 1
|
||||||
|
a30 = a31 = a32 = a33 = a34 = a35 = a36 = a37 = a38 = a39 = 1
|
||||||
|
a40 = a41 = a42 = a43 = a44 = a45 = a46 = a47 = a48 = a49 = 1
|
||||||
|
a50 = a51 = a52 = a53 = a54 = a55 = a56 = a57 = a58 = a59 = 1
|
||||||
|
a60 = a61 = a62 = a63 = a64 = a65 = a66 = a67 = a68 = a69 = 1
|
||||||
|
a70 = a71 = a72 = a73 = a74 = a75 = a76 = a77 = a78 = a79 = 1
|
||||||
|
del a72, a73
|
||||||
|
print(a73)
|
||||||
|
print(a70, a71, a72, a73)
|
||||||
|
while True:
|
||||||
|
print(a00, a01, a62, a63)
|
||||||
|
print(a64, a65, a78, a79)
|
||||||
|
|
||||||
|
for i in 0, 1, 62, 63:
|
||||||
|
# First 64 locals: analyze completely
|
||||||
|
self.assertInBytecode(f, 'LOAD_FAST', f"a{i:02}")
|
||||||
|
self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}")
|
||||||
|
for i in 64, 65, 78, 79:
|
||||||
|
# Locals >=64 not in the same basicblock
|
||||||
|
self.assertInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}")
|
||||||
|
self.assertNotInBytecode(f, 'LOAD_FAST', f"a{i:02}")
|
||||||
|
for i in 70, 71:
|
||||||
|
# Locals >=64 in the same basicblock
|
||||||
|
self.assertInBytecode(f, 'LOAD_FAST', f"a{i:02}")
|
||||||
|
self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}")
|
||||||
|
# del statements should invalidate within basicblocks.
|
||||||
|
self.assertInBytecode(f, 'LOAD_FAST_CHECK', "a72")
|
||||||
|
self.assertNotInBytecode(f, 'LOAD_FAST', "a72")
|
||||||
|
# previous checked loads within a basicblock enable unchecked loads
|
||||||
|
self.assertInBytecode(f, 'LOAD_FAST_CHECK', "a73")
|
||||||
|
self.assertInBytecode(f, 'LOAD_FAST', "a73")
|
||||||
|
|
||||||
def test_setting_lineno_adds_check(self):
|
def test_setting_lineno_adds_check(self):
|
||||||
code = textwrap.dedent("""\
|
code = textwrap.dedent("""\
|
||||||
def f():
|
def f():
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
The compiler now avoids quadratic behavior when finding which instructions should use the :opcode:`LOAD_FAST_CHECK` opcode.
|
193
Python/compile.c
193
Python/compile.c
|
@ -114,6 +114,13 @@
|
||||||
(opcode) == RAISE_VARARGS || \
|
(opcode) == RAISE_VARARGS || \
|
||||||
(opcode) == RERAISE)
|
(opcode) == RERAISE)
|
||||||
|
|
||||||
|
#define IS_SUPERINSTRUCTION_OPCODE(opcode) \
|
||||||
|
((opcode) == LOAD_FAST__LOAD_FAST || \
|
||||||
|
(opcode) == LOAD_FAST__LOAD_CONST || \
|
||||||
|
(opcode) == LOAD_CONST__LOAD_FAST || \
|
||||||
|
(opcode) == STORE_FAST__LOAD_FAST || \
|
||||||
|
(opcode) == STORE_FAST__STORE_FAST)
|
||||||
|
|
||||||
#define IS_TOP_LEVEL_AWAIT(c) ( \
|
#define IS_TOP_LEVEL_AWAIT(c) ( \
|
||||||
(c->c_flags->cf_flags & PyCF_ALLOW_TOP_LEVEL_AWAIT) \
|
(c->c_flags->cf_flags & PyCF_ALLOW_TOP_LEVEL_AWAIT) \
|
||||||
&& (c->u->u_ste->ste_type == ModuleBlock))
|
&& (c->u->u_ste->ste_type == ModuleBlock))
|
||||||
|
@ -258,6 +265,8 @@ typedef struct basicblock_ {
|
||||||
int b_iused;
|
int b_iused;
|
||||||
/* length of instruction array (b_instr) */
|
/* length of instruction array (b_instr) */
|
||||||
int b_ialloc;
|
int b_ialloc;
|
||||||
|
/* Used by add_checks_for_loads_of_unknown_variables */
|
||||||
|
uint64_t b_unsafe_locals_mask;
|
||||||
/* Number of predecessors that a block has. */
|
/* Number of predecessors that a block has. */
|
||||||
int b_predecessors;
|
int b_predecessors;
|
||||||
/* depth of stack upon entry of block, computed by stackdepth() */
|
/* depth of stack upon entry of block, computed by stackdepth() */
|
||||||
|
@ -8052,103 +8061,165 @@ assemble_jump_offsets(basicblock *entryblock)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Ensure each basicblock is only put onto the stack once.
|
// helper functions for add_checks_for_loads_of_unknown_variables
|
||||||
#define MAYBE_PUSH(B) do { \
|
static inline void
|
||||||
if ((B)->b_visited == 0) { \
|
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
|
||||||
*(*stack_top)++ = (B); \
|
{
|
||||||
(B)->b_visited = 1; \
|
// Push b if the unsafe mask is giving us any new information.
|
||||||
} \
|
// To avoid overflowing the stack, only allow each block once.
|
||||||
} while (0)
|
// Use b->b_visited=1 to mean that b is currently on the stack.
|
||||||
|
uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
|
||||||
|
if (b->b_unsafe_locals_mask != both) {
|
||||||
|
b->b_unsafe_locals_mask = both;
|
||||||
|
// More work left to do.
|
||||||
|
if (!b->b_visited) {
|
||||||
|
// not on the stack, so push it.
|
||||||
|
*(*sp)++ = b;
|
||||||
|
b->b_visited = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
scan_block_for_local(int target, basicblock *b, bool unsafe_to_start,
|
scan_block_for_locals(basicblock *b, basicblock ***sp)
|
||||||
basicblock ***stack_top)
|
|
||||||
{
|
{
|
||||||
bool unsafe = unsafe_to_start;
|
// bit i is set if local i is potentially uninitialized
|
||||||
|
uint64_t unsafe_mask = b->b_unsafe_locals_mask;
|
||||||
for (int i = 0; i < b->b_iused; i++) {
|
for (int i = 0; i < b->b_iused; i++) {
|
||||||
struct instr *instr = &b->b_instr[i];
|
struct instr *instr = &b->b_instr[i];
|
||||||
assert(instr->i_opcode != EXTENDED_ARG);
|
assert(instr->i_opcode != EXTENDED_ARG);
|
||||||
assert(instr->i_opcode != EXTENDED_ARG_QUICK);
|
assert(instr->i_opcode != EXTENDED_ARG_QUICK);
|
||||||
assert(instr->i_opcode != LOAD_FAST__LOAD_FAST);
|
assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode));
|
||||||
assert(instr->i_opcode != STORE_FAST__LOAD_FAST);
|
if (instr->i_except != NULL) {
|
||||||
assert(instr->i_opcode != LOAD_CONST__LOAD_FAST);
|
maybe_push(instr->i_except, unsafe_mask, sp);
|
||||||
assert(instr->i_opcode != STORE_FAST__STORE_FAST);
|
|
||||||
assert(instr->i_opcode != LOAD_FAST__LOAD_CONST);
|
|
||||||
if (unsafe && instr->i_except != NULL) {
|
|
||||||
MAYBE_PUSH(instr->i_except);
|
|
||||||
}
|
}
|
||||||
if (instr->i_oparg != target) {
|
if (instr->i_oparg >= 64) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
assert(instr->i_oparg >= 0);
|
||||||
|
uint64_t bit = (uint64_t)1 << instr->i_oparg;
|
||||||
switch (instr->i_opcode) {
|
switch (instr->i_opcode) {
|
||||||
case LOAD_FAST_CHECK:
|
case DELETE_FAST:
|
||||||
// if this doesn't raise, then var is defined
|
unsafe_mask |= bit;
|
||||||
unsafe = false;
|
|
||||||
break;
|
|
||||||
case LOAD_FAST:
|
|
||||||
if (unsafe) {
|
|
||||||
instr->i_opcode = LOAD_FAST_CHECK;
|
|
||||||
}
|
|
||||||
unsafe = false;
|
|
||||||
break;
|
break;
|
||||||
case STORE_FAST:
|
case STORE_FAST:
|
||||||
unsafe = false;
|
unsafe_mask &= ~bit;
|
||||||
break;
|
break;
|
||||||
case DELETE_FAST:
|
case LOAD_FAST_CHECK:
|
||||||
unsafe = true;
|
// If this doesn't raise, then the local is defined.
|
||||||
|
unsafe_mask &= ~bit;
|
||||||
|
break;
|
||||||
|
case LOAD_FAST:
|
||||||
|
if (unsafe_mask & bit) {
|
||||||
|
instr->i_opcode = LOAD_FAST_CHECK;
|
||||||
|
}
|
||||||
|
unsafe_mask &= ~bit;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (unsafe) {
|
|
||||||
// unsafe at end of this block,
|
|
||||||
// so unsafe at start of next blocks
|
|
||||||
if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
|
if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
|
||||||
MAYBE_PUSH(b->b_next);
|
maybe_push(b->b_next, unsafe_mask, sp);
|
||||||
}
|
}
|
||||||
struct instr *last = basicblock_last_instr(b);
|
struct instr *last = basicblock_last_instr(b);
|
||||||
if (last != NULL) {
|
if (last && is_jump(last)) {
|
||||||
if (is_jump(last)) {
|
|
||||||
assert(last->i_target != NULL);
|
assert(last->i_target != NULL);
|
||||||
MAYBE_PUSH(last->i_target);
|
maybe_push(last->i_target, unsafe_mask, sp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
fast_scan_many_locals(basicblock *entryblock, int nlocals)
|
||||||
|
{
|
||||||
|
assert(nlocals > 64);
|
||||||
|
Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
|
||||||
|
if (states == NULL) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
Py_ssize_t blocknum = 0;
|
||||||
|
// state[i - 64] == blocknum if local i is guaranteed to
|
||||||
|
// be initialized, i.e., if it has had a previous LOAD_FAST or
|
||||||
|
// STORE_FAST within that basicblock (not followed by DELETE_FAST).
|
||||||
|
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
|
||||||
|
blocknum++;
|
||||||
|
for (int i = 0; i < b->b_iused; i++) {
|
||||||
|
struct instr *instr = &b->b_instr[i];
|
||||||
|
assert(instr->i_opcode != EXTENDED_ARG);
|
||||||
|
assert(instr->i_opcode != EXTENDED_ARG_QUICK);
|
||||||
|
assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode));
|
||||||
|
int arg = instr->i_oparg;
|
||||||
|
if (arg < 64) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
assert(arg >= 0);
|
||||||
|
switch (instr->i_opcode) {
|
||||||
|
case DELETE_FAST:
|
||||||
|
states[arg - 64] = blocknum - 1;
|
||||||
|
break;
|
||||||
|
case STORE_FAST:
|
||||||
|
states[arg - 64] = blocknum;
|
||||||
|
break;
|
||||||
|
case LOAD_FAST:
|
||||||
|
if (states[arg - 64] != blocknum) {
|
||||||
|
instr->i_opcode = LOAD_FAST_CHECK;
|
||||||
|
}
|
||||||
|
states[arg - 64] = blocknum;
|
||||||
|
break;
|
||||||
|
case LOAD_FAST_CHECK:
|
||||||
|
Py_UNREACHABLE();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
PyMem_Free(states);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
#undef MAYBE_PUSH
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
|
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
|
||||||
struct compiler *c)
|
struct compiler *c)
|
||||||
{
|
{
|
||||||
|
int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames);
|
||||||
|
if (nlocals == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (nlocals > 64) {
|
||||||
|
// To avoid O(nlocals**2) compilation, locals beyond the first
|
||||||
|
// 64 are only analyzed one basicblock at a time: initialization
|
||||||
|
// info is not passed between basicblocks.
|
||||||
|
if (fast_scan_many_locals(entryblock, nlocals) < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
nlocals = 64;
|
||||||
|
}
|
||||||
basicblock **stack = make_cfg_traversal_stack(entryblock);
|
basicblock **stack = make_cfg_traversal_stack(entryblock);
|
||||||
if (stack == NULL) {
|
if (stack == NULL) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
Py_ssize_t nparams = PyList_GET_SIZE(c->u->u_ste->ste_varnames);
|
basicblock **sp = stack;
|
||||||
int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames);
|
|
||||||
for (int target = 0; target < nlocals; target++) {
|
// First origin of being uninitialized:
|
||||||
|
// The non-parameter locals in the entry block.
|
||||||
|
int nparams = (int)PyList_GET_SIZE(c->u->u_ste->ste_varnames);
|
||||||
|
uint64_t start_mask = 0;
|
||||||
|
for (int i = nparams; i < nlocals; i++) {
|
||||||
|
start_mask |= (uint64_t)1 << i;
|
||||||
|
}
|
||||||
|
maybe_push(entryblock, start_mask, &sp);
|
||||||
|
|
||||||
|
// Second origin of being uninitialized:
|
||||||
|
// There could be DELETE_FAST somewhere, so
|
||||||
|
// be sure to scan each basicblock at least once.
|
||||||
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
|
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
|
||||||
|
scan_block_for_locals(b, &sp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now propagate the uncertainty from the origins we found: Use
|
||||||
|
// LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
|
||||||
|
while (sp > stack) {
|
||||||
|
basicblock *b = *--sp;
|
||||||
|
// mark as no longer on stack
|
||||||
b->b_visited = 0;
|
b->b_visited = 0;
|
||||||
}
|
scan_block_for_locals(b, &sp);
|
||||||
basicblock **stack_top = stack;
|
|
||||||
|
|
||||||
// First pass: find the relevant DFS starting points:
|
|
||||||
// the places where "being uninitialized" originates,
|
|
||||||
// which are the entry block and any DELETE_FAST statements.
|
|
||||||
if (target >= nparams) {
|
|
||||||
// only non-parameter locals start out uninitialized.
|
|
||||||
*(stack_top++) = entryblock;
|
|
||||||
entryblock->b_visited = 1;
|
|
||||||
}
|
|
||||||
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
|
|
||||||
scan_block_for_local(target, b, false, &stack_top);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Second pass: Depth-first search to propagate uncertainty
|
|
||||||
while (stack_top > stack) {
|
|
||||||
basicblock *b = *--stack_top;
|
|
||||||
scan_block_for_local(target, b, true, &stack_top);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
PyMem_Free(stack);
|
PyMem_Free(stack);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue