gh-119105: difflib.py Differ.compare is too slow [for degenerate cases] (#119376)

Track all pairs achieving the best ratio in Differ(). This repairs the "very deep recursion and cubic time" bad cases in a way that preserves previous output.
This commit is contained in:
Tim Peters 2024-05-22 18:25:08 -05:00 committed by GitHub
parent e3bf5381fd
commit 07df93de73
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 89 additions and 59 deletions

View file

@ -272,6 +272,26 @@ class TestSFpatches(unittest.TestCase):
self.assertIn('content="text/html; charset=us-ascii"', output)
self.assertIn('ımplıcıt', output)
class TestDiffer(unittest.TestCase):
def test_close_matches_aligned(self):
# Of the 4 closely matching pairs, we want 1 to match with 3,
# and 2 with 4, to align with a "top to bottom" mental model.
a = ["cat\n", "dog\n", "close match 1\n", "close match 2\n"]
b = ["close match 3\n", "close match 4\n", "kitten\n", "puppy\n"]
m = difflib.Differ().compare(a, b)
self.assertEqual(list(m),
['- cat\n',
'- dog\n',
'- close match 1\n',
'? ^\n',
'+ close match 3\n',
'? ^\n',
'- close match 2\n',
'? ^\n',
'+ close match 4\n',
'? ^\n',
'+ kitten\n',
'+ puppy\n'])
class TestOutputFormat(unittest.TestCase):
def test_tab_delimiter(self):