gh-99254: remove all unused consts from code objects (GH-99255)

This commit is contained in:
Irit Katriel 2022-11-11 10:53:43 +00:00 committed by GitHub
parent faf7dfa656
commit 3dd6ee2c00
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 142 additions and 33 deletions

View file

@ -8472,7 +8472,7 @@ static int
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache);
static int
trim_unused_consts(basicblock *entryblock, PyObject *consts);
remove_unused_consts(basicblock *entryblock, PyObject *consts);
/* Duplicates exit BBs, so that line numbers can be propagated to them */
static int
@ -8813,6 +8813,9 @@ assemble(struct compiler *c, int addNone)
if (add_checks_for_loads_of_uninitialized_variables(g->g_entryblock, c) < 0) {
goto error;
}
if (remove_unused_consts(g->g_entryblock, consts)) {
goto error;
}
/** line numbers (TODO: move this before optimization stage) */
if (duplicate_exits_without_lineno(g) < 0) {
@ -8844,10 +8847,6 @@ assemble(struct compiler *c, int addNone)
/* Can't modify the bytecode after computing jump offsets. */
assemble_jump_offsets(g->g_entryblock);
if (trim_unused_consts(g->g_entryblock, consts)) {
goto error;
}
/* Create assembler */
if (!assemble_init(&a, c->u->u_firstlineno))
goto error;
@ -9706,32 +9705,106 @@ optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache)
return 0;
}
// Remove trailing unused constants.
static int
trim_unused_consts(basicblock *entryblock, PyObject *consts)
remove_unused_consts(basicblock *entryblock, PyObject *consts)
{
assert(PyList_CheckExact(consts));
Py_ssize_t nconsts = PyList_GET_SIZE(consts);
if (nconsts == 0) {
return 0; /* nothing to do */
}
Py_ssize_t *index_map = NULL;
Py_ssize_t *reverse_index_map = NULL;
int err = 1;
index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
if (index_map == NULL) {
goto end;
}
for (Py_ssize_t i = 1; i < nconsts; i++) {
index_map[i] = -1;
}
// The first constant may be docstring; keep it always.
int max_const_index = 0;
index_map[0] = 0;
/* mark used consts */
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
for (int i = 0; i < b->b_iused; i++) {
if ((b->b_instr[i].i_opcode == LOAD_CONST ||
b->b_instr[i].i_opcode == KW_NAMES) &&
b->b_instr[i].i_oparg > max_const_index) {
max_const_index = b->b_instr[i].i_oparg;
if (b->b_instr[i].i_opcode == LOAD_CONST ||
b->b_instr[i].i_opcode == KW_NAMES) {
int index = b->b_instr[i].i_oparg;
index_map[index] = index;
}
}
}
if (max_const_index+1 < PyList_GET_SIZE(consts)) {
//fprintf(stderr, "removing trailing consts: max=%d, size=%d\n",
// max_const_index, (int)PyList_GET_SIZE(consts));
if (PyList_SetSlice(consts, max_const_index+1,
PyList_GET_SIZE(consts), NULL) < 0) {
return 1;
/* now index_map[i] == i if consts[i] is used, -1 otherwise */
/* condense consts */
Py_ssize_t n_used_consts = 0;
for (int i = 0; i < nconsts; i++) {
if (index_map[i] != -1) {
assert(index_map[i] == i);
index_map[n_used_consts++] = index_map[i];
}
}
return 0;
if (n_used_consts == nconsts) {
/* nothing to do */
err = 0;
goto end;
}
/* move all used consts to the beginning of the consts list */
assert(n_used_consts < nconsts);
for (Py_ssize_t i = 0; i < n_used_consts; i++) {
Py_ssize_t old_index = index_map[i];
assert(i <= old_index && old_index < nconsts);
if (i != old_index) {
PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
assert(value != NULL);
PyList_SetItem(consts, i, Py_NewRef(value));
}
}
/* truncate the consts list at its new size */
if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
goto end;
}
/* adjust const indices in the bytecode */
reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
if (reverse_index_map == NULL) {
goto end;
}
for (Py_ssize_t i = 0; i < nconsts; i++) {
reverse_index_map[i] = -1;
}
for (Py_ssize_t i = 0; i < n_used_consts; i++) {
assert(index_map[i] != -1);
assert(reverse_index_map[index_map[i]] == -1);
reverse_index_map[index_map[i]] = i;
}
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
for (int i = 0; i < b->b_iused; i++) {
if (b->b_instr[i].i_opcode == LOAD_CONST ||
b->b_instr[i].i_opcode == KW_NAMES) {
int index = b->b_instr[i].i_oparg;
assert(reverse_index_map[index] >= 0);
assert(reverse_index_map[index] < n_used_consts);
b->b_instr[i].i_oparg = (int)reverse_index_map[index];
}
}
}
err = 0;
end:
PyMem_Free(index_map);
PyMem_Free(reverse_index_map);
return err;
}
static inline int