gh-130167: Optimise `textwrap.dedent()` (#131919)

Co-authored-by: Marius Juston <marius.juston@hotmail.fr> Co-authored-by: Pieter Eendebak <pieter.eendebak@gmail.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
2025-09-27 18:59:43 +00:00 · 2025-03-31 01:35:12 +01:00 · 2025-03-31 01:35:12 +01:00 · 6aa88a2cb3
commit 6aa88a2cb3
parent 685fd74f81
3 changed files with 67 additions and 36 deletions
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@ -769,6 +769,56 @@ class DedentTestCase(unittest.TestCase):
        """assert that dedent() has no effect on 'text'"""
        self.assertEqual(text, dedent(text))
    def test_dedent_whitespace(self):
        # The empty string.
        text = ""
        self.assertUnchanged(text)
        # Only spaces.
        text = "    "
        expect = ""
        self.assertEqual(expect, dedent(text))
        # Only tabs.
        text = "\t\t\t\t"
        expect = ""
        self.assertEqual(expect, dedent(text))
        # A mixture.
        text = " \t  \t\t  \t "
        expect = ""
        self.assertEqual(expect, dedent(text))
        # ASCII whitespace.
        text = "\f\n\r\t\v "
        expect = "\n"
        self.assertEqual(expect, dedent(text))
        # One newline.
        text = "\n"
        expect = "\n"
        self.assertEqual(expect, dedent(text))
        # Windows-style newlines.
        text = "\r\n" * 5
        expect = "\n" * 5
        self.assertEqual(expect, dedent(text))
        # Whitespace mixture.
        text = "    \n\t\n  \n\t\t\n\n\n       "
        expect = "\n\n\n\n\n\n"
        self.assertEqual(expect, dedent(text))
        # Lines consisting only of whitespace are always normalised
        text = "a\n \n\t\n"
        expect = "a\n\n\n"
        self.assertEqual(expect, dedent(text))
        # Whitespace characters on non-empty lines are retained
        text = "a\r\n\r\n\r\n"
        expect = "a\r\n\n\n"
        self.assertEqual(expect, dedent(text))
    def test_dedent_nomargin(self):
        # No lines indented.
        text = "Hello there.\nHow are you?\nOh good, I'm glad."
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@ -413,9 +413,6 @@ def shorten(text, width, **kwargs):
 # -- Loosely related functionality -------------------------------------
 _whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
 _leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
 def dedent(text):
    """Remove any common leading whitespace from every line in `text`.
@ -429,42 +426,21 @@ def dedent(text):
    Entirely blank lines are normalized to a newline character.
    """
-    # Look for the longest leading string of spaces and tabs common to
+    if not text:
-    # all lines.
+        return text
    margin = None
    text = _whitespace_only_re.sub('', text)
    indents = _leading_whitespace_re.findall(text)
    for indent in indents:
        if margin is None:
            margin = indent
-        # Current line more deeply indented than previous winner:
+    lines = text.split('\n')
        # no change (previous winner is still on top).
        elif indent.startswith(margin):
            pass
-        # Current line consistent with and no deeper than previous winner:
+    # Get length of leading whitespace, inspired by ``os.path.commonprefix()``.
-        # it's the new winner.
+    non_blank_lines = [l for l in lines if l and not l.isspace()]
-        elif margin.startswith(indent):
+    l1 = min(non_blank_lines, default='')
-            margin = indent
+    l2 = max(non_blank_lines, default='')
    margin = 0
    for margin, c in enumerate(l1):
        if c != l2[margin] or c not in ' \t':
            break
-        # Find the largest common whitespace between current line and previous
+    return '\n'.join([l[margin:] if not l.isspace() else '' for l in lines])
        # winner.
        else:
            for i, (x, y) in enumerate(zip(margin, indent)):
                if x != y:
                    margin = margin[:i]
                    break
    # sanity check (testing/debugging only)
    if 0 and margin:
        for line in text.split("\n"):
            assert not line or line.startswith(margin), \
                   "line = %r, margin = %r" % (line, margin)
    if margin:
        text = re.sub(r'(?m)^' + margin, '', text)
    return text
 def indent(text, prefix, predicate=None):
--- a/Misc/NEWS.d/next/Library/2025-03-30-19-55-10.gh-issue-131792.NNjzFA.rst
+++ b/Misc/NEWS.d/next/Library/2025-03-30-19-55-10.gh-issue-131792.NNjzFA.rst
@ -0,0 +1,5 @@
 Improved performance of :func:`textwrap.dedent` by an average of ~2.4x,
 (with improvements of up to 4x for large inputs),
 and fixed a bug where blank lines with whitespace characters other than space
 or horizontal tab were not normalised to the newline.
 Patch by Adam Turner, Marius Juston, and Pieter Eendebak.