Skip to content

gh-99254: remove all unused consts from code objects #99255

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Nov 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Lib/importlib/_bootstrap_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ def _write_atomic(path, data, mode=0o666):
# Python 3.12a1 3509 (Conditional jumps only jump forward)
# Python 3.12a1 3510 (FOR_ITER leaves iterator on the stack)
# Python 3.12a1 3511 (Add STOPITERATION_ERROR instruction)
# Python 3.12a1 3512 (Remove all unused consts from code objects)

# Python 3.13 will start with 3550

Expand All @@ -437,7 +438,7 @@ def _write_atomic(path, data, mode=0o666):
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated.

MAGIC_NUMBER = (3511).to_bytes(2, 'little') + b'\r\n'
MAGIC_NUMBER = (3512).to_bytes(2, 'little') + b'\r\n'

_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c

Expand Down
38 changes: 36 additions & 2 deletions Lib/test/test_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ def test_merge_code_attrs(self):
self.assertIs(f1.__code__.co_linetable, f2.__code__.co_linetable)

@support.cpython_only
def test_strip_unused_consts(self):
def test_remove_unused_consts(self):
def f():
"docstring"
if True:
Expand All @@ -679,7 +679,41 @@ def f():
return "unused"

self.assertEqual(f.__code__.co_consts,
("docstring", True, "used"))
("docstring", "used"))

@support.cpython_only
def test_remove_unused_consts_no_docstring(self):
# the first item (None for no docstring in this case) is
# always retained.
def f():
if True:
return "used"
else:
return "unused"

self.assertEqual(f.__code__.co_consts,
(None, "used"))

@support.cpython_only
def test_remove_unused_consts_extended_args(self):
N = 1000
code = ["def f():\n"]
code.append("\ts = ''\n")
code.append("\tfor i in range(1):\n")
for i in range(N):
code.append(f"\t\tif True: s += 't{i}'\n")
code.append(f"\t\tif False: s += 'f{i}'\n")
code.append("\treturn s\n")

code = "".join(code)
g = {}
eval(compile(code, "file.py", "exec"), g)
exec(code, g)
f = g['f']
expected = tuple([None, '', 1] + [f't{i}' for i in range(N)])
self.assertEqual(f.__code__.co_consts, expected)
expected = "".join(expected[3:])
self.assertEqual(expected, f())

# Stripping unused constants is not a strict requirement for the
# Python semantics, it's a more an implementation detail.
Expand Down
22 changes: 11 additions & 11 deletions Lib/test/test_dis.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,13 +168,13 @@ def bug1333982(x=[]):
%3d RESUME 0

%3d LOAD_ASSERTION_ERROR
LOAD_CONST 2 (<code object <listcomp> at 0x..., file "%s", line %d>)
LOAD_CONST 1 (<code object <listcomp> at 0x..., file "%s", line %d>)
MAKE_FUNCTION 0
LOAD_FAST 0 (x)
GET_ITER
CALL 0

%3d LOAD_CONST 3 (1)
%3d LOAD_CONST 2 (1)

%3d BINARY_OP 0 (+)
CALL 0
Expand Down Expand Up @@ -1446,9 +1446,9 @@ def jumpy():
# End fodder for opinfo generation tests
expected_outer_line = 1
_line_offset = outer.__code__.co_firstlineno - 1
code_object_f = outer.__code__.co_consts[3]
code_object_f = outer.__code__.co_consts[1]
expected_f_line = code_object_f.co_firstlineno - _line_offset
code_object_inner = code_object_f.co_consts[3]
code_object_inner = code_object_f.co_consts[1]
expected_inner_line = code_object_inner.co_firstlineno - _line_offset
expected_jumpy_line = 1

Expand Down Expand Up @@ -1485,21 +1485,21 @@ def _prepare_test_cases():
Instruction(opname='MAKE_CELL', opcode=135, arg=0, argval='a', argrepr='a', offset=0, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='MAKE_CELL', opcode=135, arg=1, argval='b', argrepr='b', offset=2, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='RESUME', opcode=151, arg=0, argval=0, argrepr='', offset=4, starts_line=1, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=7, argval=(3, 4), argrepr='(3, 4)', offset=6, starts_line=2, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=5, argval=(3, 4), argrepr='(3, 4)', offset=6, starts_line=2, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=0, argval='a', argrepr='a', offset=8, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=1, argval='b', argrepr='b', offset=10, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='BUILD_TUPLE', opcode=102, arg=2, argval=2, argrepr='', offset=12, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=code_object_f, argrepr=repr(code_object_f), offset=14, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=code_object_f, argrepr=repr(code_object_f), offset=14, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='MAKE_FUNCTION', opcode=132, arg=9, argval=9, argrepr='defaults, closure', offset=16, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='STORE_FAST', opcode=125, arg=2, argval='f', argrepr='f', offset=18, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_GLOBAL', opcode=116, arg=1, argval='print', argrepr='NULL + print', offset=20, starts_line=7, is_jump_target=False, positions=None),
Instruction(opname='LOAD_DEREF', opcode=137, arg=0, argval='a', argrepr='a', offset=32, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_DEREF', opcode=137, arg=1, argval='b', argrepr='b', offset=34, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval='', argrepr="''", offset=36, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=5, argval=1, argrepr='1', offset=38, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=2, argval='', argrepr="''", offset=36, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=1, argrepr='1', offset=38, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='BUILD_LIST', opcode=103, arg=0, argval=0, argrepr='', offset=40, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='BUILD_MAP', opcode=105, arg=0, argval=0, argrepr='', offset=42, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=6, argval='Hello world!', argrepr="'Hello world!'", offset=44, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval='Hello world!', argrepr="'Hello world!'", offset=44, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='CALL', opcode=171, arg=7, argval=7, argrepr='', offset=46, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='POP_TOP', opcode=1, arg=None, argval=None, argrepr='', offset=56, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_FAST', opcode=124, arg=2, argval='f', argrepr='f', offset=58, starts_line=8, is_jump_target=False, positions=None),
Expand All @@ -1511,13 +1511,13 @@ def _prepare_test_cases():
Instruction(opname='MAKE_CELL', opcode=135, arg=0, argval='c', argrepr='c', offset=2, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='MAKE_CELL', opcode=135, arg=1, argval='d', argrepr='d', offset=4, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='RESUME', opcode=151, arg=0, argval=0, argrepr='', offset=6, starts_line=2, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval=(5, 6), argrepr='(5, 6)', offset=8, starts_line=3, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=2, argval=(5, 6), argrepr='(5, 6)', offset=8, starts_line=3, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=3, argval='a', argrepr='a', offset=10, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=4, argval='b', argrepr='b', offset=12, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=0, argval='c', argrepr='c', offset=14, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=1, argval='d', argrepr='d', offset=16, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='BUILD_TUPLE', opcode=102, arg=4, argval=4, argrepr='', offset=18, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=code_object_inner, argrepr=repr(code_object_inner), offset=20, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=code_object_inner, argrepr=repr(code_object_inner), offset=20, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='MAKE_FUNCTION', opcode=132, arg=9, argval=9, argrepr='defaults, closure', offset=22, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='STORE_FAST', opcode=125, arg=2, argval='inner', argrepr='inner', offset=24, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_GLOBAL', opcode=116, arg=1, argval='print', argrepr='NULL + print', offset=26, starts_line=5, is_jump_target=False, positions=None),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The compiler now removes all unused constants from code objects (except the first one, which may be a docstring).
111 changes: 92 additions & 19 deletions Python/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -8472,7 +8472,7 @@ static int
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache);

static int
trim_unused_consts(basicblock *entryblock, PyObject *consts);
remove_unused_consts(basicblock *entryblock, PyObject *consts);

/* Duplicates exit BBs, so that line numbers can be propagated to them */
static int
Expand Down Expand Up @@ -8813,6 +8813,9 @@ assemble(struct compiler *c, int addNone)
if (add_checks_for_loads_of_uninitialized_variables(g->g_entryblock, c) < 0) {
goto error;
}
if (remove_unused_consts(g->g_entryblock, consts)) {
goto error;
}

/** line numbers (TODO: move this before optimization stage) */
if (duplicate_exits_without_lineno(g) < 0) {
Expand Down Expand Up @@ -8844,10 +8847,6 @@ assemble(struct compiler *c, int addNone)
/* Can't modify the bytecode after computing jump offsets. */
assemble_jump_offsets(g->g_entryblock);

if (trim_unused_consts(g->g_entryblock, consts)) {
goto error;
}

/* Create assembler */
if (!assemble_init(&a, c->u->u_firstlineno))
goto error;
Expand Down Expand Up @@ -9706,32 +9705,106 @@ optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache)
return 0;
}

// Remove trailing unused constants.

static int
trim_unused_consts(basicblock *entryblock, PyObject *consts)
remove_unused_consts(basicblock *entryblock, PyObject *consts)
{
assert(PyList_CheckExact(consts));
Py_ssize_t nconsts = PyList_GET_SIZE(consts);
if (nconsts == 0) {
return 0; /* nothing to do */
}

Py_ssize_t *index_map = NULL;
Py_ssize_t *reverse_index_map = NULL;
int err = 1;

index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
if (index_map == NULL) {
goto end;
}
for (Py_ssize_t i = 1; i < nconsts; i++) {
index_map[i] = -1;
}
// The first constant may be docstring; keep it always.
int max_const_index = 0;
index_map[0] = 0;

/* mark used consts */
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
for (int i = 0; i < b->b_iused; i++) {
if ((b->b_instr[i].i_opcode == LOAD_CONST ||
b->b_instr[i].i_opcode == KW_NAMES) &&
b->b_instr[i].i_oparg > max_const_index) {
max_const_index = b->b_instr[i].i_oparg;
if (b->b_instr[i].i_opcode == LOAD_CONST ||
b->b_instr[i].i_opcode == KW_NAMES) {

int index = b->b_instr[i].i_oparg;
index_map[index] = index;
}
}
}
if (max_const_index+1 < PyList_GET_SIZE(consts)) {
//fprintf(stderr, "removing trailing consts: max=%d, size=%d\n",
// max_const_index, (int)PyList_GET_SIZE(consts));
if (PyList_SetSlice(consts, max_const_index+1,
PyList_GET_SIZE(consts), NULL) < 0) {
return 1;
/* now index_map[i] == i if consts[i] is used, -1 otherwise */

/* condense consts */
Py_ssize_t n_used_consts = 0;
for (int i = 0; i < nconsts; i++) {
if (index_map[i] != -1) {
assert(index_map[i] == i);
index_map[n_used_consts++] = index_map[i];
}
}
return 0;
if (n_used_consts == nconsts) {
/* nothing to do */
err = 0;
goto end;
}

/* move all used consts to the beginning of the consts list */
assert(n_used_consts < nconsts);
for (Py_ssize_t i = 0; i < n_used_consts; i++) {
Py_ssize_t old_index = index_map[i];
assert(i <= old_index && old_index < nconsts);
if (i != old_index) {
PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
assert(value != NULL);
PyList_SetItem(consts, i, Py_NewRef(value));
}
}

/* truncate the consts list at its new size */
if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
goto end;
}

/* adjust const indices in the bytecode */
reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
if (reverse_index_map == NULL) {
goto end;
}
for (Py_ssize_t i = 0; i < nconsts; i++) {
reverse_index_map[i] = -1;
}
for (Py_ssize_t i = 0; i < n_used_consts; i++) {
assert(index_map[i] != -1);
assert(reverse_index_map[index_map[i]] == -1);
reverse_index_map[index_map[i]] = i;
}

for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
for (int i = 0; i < b->b_iused; i++) {
if (b->b_instr[i].i_opcode == LOAD_CONST ||
b->b_instr[i].i_opcode == KW_NAMES) {

int index = b->b_instr[i].i_oparg;
assert(reverse_index_map[index] >= 0);
assert(reverse_index_map[index] < n_used_consts);
b->b_instr[i].i_oparg = (int)reverse_index_map[index];
}
}
}

err = 0;
end:
PyMem_Free(index_map);
PyMem_Free(reverse_index_map);
return err;
}

static inline int
Expand Down