GH-95150: Use position and exception tables for code hashing and equality (GH-95509)

(cherry picked from commit c7e5bbaee8) Co-authored-by: Brandt Bucher <brandtbucher@gmail.com>
2025-07-23 19:25:40 +00:00 · 2022-08-01 11:33:49 -07:00 · 2022-08-01 11:33:49 -07:00 · 7baca3c05e
commit 7baca3c05e
parent 76d83b1dfe
5 changed files with 68 additions and 5 deletions
--- a/Lib/test/test_code.py
+++ b/Lib/test/test_code.py
@ -428,6 +428,27 @@ class CodeTest(unittest.TestCase):
            self.assertIsNone(line)
            self.assertEqual(end_line, new_code.co_firstlineno + 1)
    def test_code_equality(self):
        def f():
            try:
                a()
            except:
                b()
            else:
                c()
            finally:
                d()
        code_a = f.__code__
        code_b = code_a.replace(co_linetable=b"")
        code_c = code_a.replace(co_exceptiontable=b"")
        code_d = code_b.replace(co_exceptiontable=b"")
        self.assertNotEqual(code_a, code_b)
        self.assertNotEqual(code_a, code_c)
        self.assertNotEqual(code_a, code_d)
        self.assertNotEqual(code_b, code_c)
        self.assertNotEqual(code_b, code_d)
        self.assertNotEqual(code_c, code_d)
 def isinterned(s):
    return s is sys.intern(('_' + s + '_')[1:-1])
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@ -613,7 +613,7 @@ if 1:
            exec(code, ns)
            f1 = ns['f1']
            f2 = ns['f2']
-            self.assertIs(f1.__code__, f2.__code__)
+            self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
            self.check_constant(f1, const)
            self.assertEqual(repr(f1()), repr(const))
@ -626,7 +626,7 @@ if 1:
        # Note: "lambda: ..." emits "LOAD_CONST Ellipsis",
        # whereas "lambda: Ellipsis" emits "LOAD_GLOBAL Ellipsis"
        f1, f2 = lambda: ..., lambda: ...
-        self.assertIs(f1.__code__, f2.__code__)
+        self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
        self.check_constant(f1, Ellipsis)
        self.assertEqual(repr(f1()), repr(Ellipsis))
@ -641,7 +641,7 @@ if 1:
        # {0} is converted to a constant frozenset({0}) by the peephole
        # optimizer
        f1, f2 = lambda x: x in {0}, lambda x: x in {0}
-        self.assertIs(f1.__code__, f2.__code__)
+        self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts)
        self.check_constant(f1, frozenset({0}))
        self.assertTrue(f1(0))
@ -1264,6 +1264,27 @@ f(
            self.assertIsNotNone(end_column)
            self.assertLessEqual((line, column), (end_line, end_column))
    @support.cpython_only
    def test_column_offset_deduplication(self):
        # GH-95150: Code with different column offsets shouldn't be merged!
        for source in [
            "lambda: a",
            "(a for b in c)",
            "[a for b in c]",
            "{a for b in c}",
            "{a: b for c in d}",
        ]:
            with self.subTest(source):
                code = compile(f"{source}, {source}", "<test>", "eval")
                self.assertEqual(len(code.co_consts), 2)
                self.assertIsInstance(code.co_consts[0], types.CodeType)
                self.assertIsInstance(code.co_consts[1], types.CodeType)
                self.assertNotEqual(code.co_consts[0], code.co_consts[1])
                self.assertNotEqual(
                    list(code.co_consts[0].co_positions()),
                    list(code.co_consts[1].co_positions()),
                )
 class TestExpressionStackSize(unittest.TestCase):
    # These tests check that the computed stack size for a code object
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@ -2012,7 +2012,8 @@ def fib(n):
    a, b = 0, 1
 """
        try:
-            self.assertEqual(compile(s1, '<string>', 'exec'), compile(s2, '<string>', 'exec'))
+            compile(s1, '<string>', 'exec')
            compile(s2, '<string>', 'exec')
        except SyntaxError:
            self.fail("Indented statement over multiple lines is valid")
--- a/Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst
+++ b/Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst
@ -0,0 +1,3 @@
 Update code object hashing and equality to consider all debugging and
 exception handling tables. This fixes an issue where certain non-identical
 code objects could be "deduplicated" during compilation.
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@ -1693,6 +1693,15 @@ code_richcompare(PyObject *self, PyObject *other, int op)
    eq = PyObject_RichCompareBool(co->co_localsplusnames,
                                  cp->co_localsplusnames, Py_EQ);
    if (eq <= 0) goto unequal;
    eq = PyObject_RichCompareBool(co->co_linetable, cp->co_linetable, Py_EQ);
    if (eq <= 0) {
        goto unequal;
    }
    eq = PyObject_RichCompareBool(co->co_exceptiontable,
                                  cp->co_exceptiontable, Py_EQ);
    if (eq <= 0) {
        goto unequal;
    }
    if (op == Py_EQ)
        res = Py_True;
@ -1725,7 +1734,15 @@ code_hash(PyCodeObject *co)
    if (h2 == -1) return -1;
    h3 = PyObject_Hash(co->co_localsplusnames);
    if (h3 == -1) return -1;
-    h = h0 ^ h1 ^ h2 ^ h3 ^
+    Py_hash_t h4 = PyObject_Hash(co->co_linetable);
    if (h4 == -1) {
        return -1;
    }
    Py_hash_t h5 = PyObject_Hash(co->co_exceptiontable);
    if (h5 == -1) {
        return -1;
    }
    h = h0 ^ h1 ^ h2 ^ h3 ^ h4 ^ h5 ^
        co->co_argcount ^ co->co_posonlyargcount ^ co->co_kwonlyargcount ^
        co->co_flags;
    if (h == -1) h = -2;