GH-95150: Use position and exception tables for code hashing and equality (GH-95509)

(cherry picked from commit c7e5bbaee8)

Co-authored-by: Brandt Bucher <brandtbucher@gmail.com>
This commit is contained in:
Miss Islington (bot) 2022-08-01 11:33:49 -07:00 committed by GitHub
parent 76d83b1dfe
commit 7baca3c05e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 68 additions and 5 deletions

View file

@ -428,6 +428,27 @@ class CodeTest(unittest.TestCase):
self.assertIsNone(line) self.assertIsNone(line)
self.assertEqual(end_line, new_code.co_firstlineno + 1) self.assertEqual(end_line, new_code.co_firstlineno + 1)
def test_code_equality(self):
def f():
try:
a()
except:
b()
else:
c()
finally:
d()
code_a = f.__code__
code_b = code_a.replace(co_linetable=b"")
code_c = code_a.replace(co_exceptiontable=b"")
code_d = code_b.replace(co_exceptiontable=b"")
self.assertNotEqual(code_a, code_b)
self.assertNotEqual(code_a, code_c)
self.assertNotEqual(code_a, code_d)
self.assertNotEqual(code_b, code_c)
self.assertNotEqual(code_b, code_d)
self.assertNotEqual(code_c, code_d)
def isinterned(s): def isinterned(s):
return s is sys.intern(('_' + s + '_')[1:-1]) return s is sys.intern(('_' + s + '_')[1:-1])

View file

@ -613,7 +613,7 @@ if 1:
exec(code, ns) exec(code, ns)
f1 = ns['f1'] f1 = ns['f1']
f2 = ns['f2'] f2 = ns['f2']
self.assertIs(f1.__code__, f2.__code__) self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
self.check_constant(f1, const) self.check_constant(f1, const)
self.assertEqual(repr(f1()), repr(const)) self.assertEqual(repr(f1()), repr(const))
@ -626,7 +626,7 @@ if 1:
# Note: "lambda: ..." emits "LOAD_CONST Ellipsis", # Note: "lambda: ..." emits "LOAD_CONST Ellipsis",
# whereas "lambda: Ellipsis" emits "LOAD_GLOBAL Ellipsis" # whereas "lambda: Ellipsis" emits "LOAD_GLOBAL Ellipsis"
f1, f2 = lambda: ..., lambda: ... f1, f2 = lambda: ..., lambda: ...
self.assertIs(f1.__code__, f2.__code__) self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
self.check_constant(f1, Ellipsis) self.check_constant(f1, Ellipsis)
self.assertEqual(repr(f1()), repr(Ellipsis)) self.assertEqual(repr(f1()), repr(Ellipsis))
@ -641,7 +641,7 @@ if 1:
# {0} is converted to a constant frozenset({0}) by the peephole # {0} is converted to a constant frozenset({0}) by the peephole
# optimizer # optimizer
f1, f2 = lambda x: x in {0}, lambda x: x in {0} f1, f2 = lambda x: x in {0}, lambda x: x in {0}
self.assertIs(f1.__code__, f2.__code__) self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
self.check_constant(f1, frozenset({0})) self.check_constant(f1, frozenset({0}))
self.assertTrue(f1(0)) self.assertTrue(f1(0))
@ -1264,6 +1264,27 @@ f(
self.assertIsNotNone(end_column) self.assertIsNotNone(end_column)
self.assertLessEqual((line, column), (end_line, end_column)) self.assertLessEqual((line, column), (end_line, end_column))
@support.cpython_only
def test_column_offset_deduplication(self):
# GH-95150: Code with different column offsets shouldn't be merged!
for source in [
"lambda: a",
"(a for b in c)",
"[a for b in c]",
"{a for b in c}",
"{a: b for c in d}",
]:
with self.subTest(source):
code = compile(f"{source}, {source}", "<test>", "eval")
self.assertEqual(len(code.co_consts), 2)
self.assertIsInstance(code.co_consts[0], types.CodeType)
self.assertIsInstance(code.co_consts[1], types.CodeType)
self.assertNotEqual(code.co_consts[0], code.co_consts[1])
self.assertNotEqual(
list(code.co_consts[0].co_positions()),
list(code.co_consts[1].co_positions()),
)
class TestExpressionStackSize(unittest.TestCase): class TestExpressionStackSize(unittest.TestCase):
# These tests check that the computed stack size for a code object # These tests check that the computed stack size for a code object

View file

@ -2012,7 +2012,8 @@ def fib(n):
a, b = 0, 1 a, b = 0, 1
""" """
try: try:
self.assertEqual(compile(s1, '<string>', 'exec'), compile(s2, '<string>', 'exec')) compile(s1, '<string>', 'exec')
compile(s2, '<string>', 'exec')
except SyntaxError: except SyntaxError:
self.fail("Indented statement over multiple lines is valid") self.fail("Indented statement over multiple lines is valid")

View file

@ -0,0 +1,3 @@
Update code object hashing and equality to consider all debugging and
exception handling tables. This fixes an issue where certain non-identical
code objects could be "deduplicated" during compilation.

View file

@ -1693,6 +1693,15 @@ code_richcompare(PyObject *self, PyObject *other, int op)
eq = PyObject_RichCompareBool(co->co_localsplusnames, eq = PyObject_RichCompareBool(co->co_localsplusnames,
cp->co_localsplusnames, Py_EQ); cp->co_localsplusnames, Py_EQ);
if (eq <= 0) goto unequal; if (eq <= 0) goto unequal;
eq = PyObject_RichCompareBool(co->co_linetable, cp->co_linetable, Py_EQ);
if (eq <= 0) {
goto unequal;
}
eq = PyObject_RichCompareBool(co->co_exceptiontable,
cp->co_exceptiontable, Py_EQ);
if (eq <= 0) {
goto unequal;
}
if (op == Py_EQ) if (op == Py_EQ)
res = Py_True; res = Py_True;
@ -1725,7 +1734,15 @@ code_hash(PyCodeObject *co)
if (h2 == -1) return -1; if (h2 == -1) return -1;
h3 = PyObject_Hash(co->co_localsplusnames); h3 = PyObject_Hash(co->co_localsplusnames);
if (h3 == -1) return -1; if (h3 == -1) return -1;
h = h0 ^ h1 ^ h2 ^ h3 ^ Py_hash_t h4 = PyObject_Hash(co->co_linetable);
if (h4 == -1) {
return -1;
}
Py_hash_t h5 = PyObject_Hash(co->co_exceptiontable);
if (h5 == -1) {
return -1;
}
h = h0 ^ h1 ^ h2 ^ h3 ^ h4 ^ h5 ^
co->co_argcount ^ co->co_posonlyargcount ^ co->co_kwonlyargcount ^ co->co_argcount ^ co->co_posonlyargcount ^ co->co_kwonlyargcount ^
co->co_flags; co->co_flags;
if (h == -1) h = -2; if (h == -1) h = -2;