gh-119105: difflib.py Differ.compare is too slow [for degenerate cases] (#119376)

Track all pairs achieving the best ratio in Differ(). This repairs the "very deep recursion and cubic time" bad cases in a way that preserves previous output.
2025-11-02 03:01:58 +00:00 · 2024-05-22 18:25:08 -05:00 · 2024-05-22 18:25:08 -05:00 · 07df93de73
commit 07df93de73
parent e3bf5381fd
2 changed files with 89 additions and 59 deletions
--- a/Lib/test/test_difflib.py
+++ b/Lib/test/test_difflib.py
@ -272,6 +272,26 @@ class TestSFpatches(unittest.TestCase):
        self.assertIn('content="text/html; charset=us-ascii"', output)
        self.assertIn('&#305;mpl&#305;c&#305;t', output)

+class TestDiffer(unittest.TestCase):
+    def test_close_matches_aligned(self):
+        # Of the 4 closely matching pairs, we want 1 to match with 3,
+        # and 2 with 4, to align with a "top to bottom" mental model.
+        a = ["cat\n", "dog\n", "close match 1\n", "close match 2\n"]
+        b = ["close match 3\n", "close match 4\n", "kitten\n", "puppy\n"]
+        m = difflib.Differ().compare(a, b)
+        self.assertEqual(list(m),
+                           ['- cat\n',
+                            '- dog\n',
+                            '- close match 1\n',
+                            '?             ^\n',
+                            '+ close match 3\n',
+                            '?             ^\n',
+                            '- close match 2\n',
+                            '?             ^\n',
+                            '+ close match 4\n',
+                            '?             ^\n',
+                            '+ kitten\n',
+                            '+ puppy\n'])

 class TestOutputFormat(unittest.TestCase):
    def test_tab_delimiter(self):