mirror of
https://github.com/python/cpython.git
synced 2025-08-31 14:07:50 +00:00
gh-130167: Optimise `textwrap.dedent()
` (#131919)
Co-authored-by: Marius Juston <marius.juston@hotmail.fr> Co-authored-by: Pieter Eendebak <pieter.eendebak@gmail.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
parent
685fd74f81
commit
6aa88a2cb3
3 changed files with 67 additions and 36 deletions
|
@ -769,6 +769,56 @@ class DedentTestCase(unittest.TestCase):
|
|||
"""assert that dedent() has no effect on 'text'"""
|
||||
self.assertEqual(text, dedent(text))
|
||||
|
||||
def test_dedent_whitespace(self):
|
||||
# The empty string.
|
||||
text = ""
|
||||
self.assertUnchanged(text)
|
||||
|
||||
# Only spaces.
|
||||
text = " "
|
||||
expect = ""
|
||||
self.assertEqual(expect, dedent(text))
|
||||
|
||||
# Only tabs.
|
||||
text = "\t\t\t\t"
|
||||
expect = ""
|
||||
self.assertEqual(expect, dedent(text))
|
||||
|
||||
# A mixture.
|
||||
text = " \t \t\t \t "
|
||||
expect = ""
|
||||
self.assertEqual(expect, dedent(text))
|
||||
|
||||
# ASCII whitespace.
|
||||
text = "\f\n\r\t\v "
|
||||
expect = "\n"
|
||||
self.assertEqual(expect, dedent(text))
|
||||
|
||||
# One newline.
|
||||
text = "\n"
|
||||
expect = "\n"
|
||||
self.assertEqual(expect, dedent(text))
|
||||
|
||||
# Windows-style newlines.
|
||||
text = "\r\n" * 5
|
||||
expect = "\n" * 5
|
||||
self.assertEqual(expect, dedent(text))
|
||||
|
||||
# Whitespace mixture.
|
||||
text = " \n\t\n \n\t\t\n\n\n "
|
||||
expect = "\n\n\n\n\n\n"
|
||||
self.assertEqual(expect, dedent(text))
|
||||
|
||||
# Lines consisting only of whitespace are always normalised
|
||||
text = "a\n \n\t\n"
|
||||
expect = "a\n\n\n"
|
||||
self.assertEqual(expect, dedent(text))
|
||||
|
||||
# Whitespace characters on non-empty lines are retained
|
||||
text = "a\r\n\r\n\r\n"
|
||||
expect = "a\r\n\n\n"
|
||||
self.assertEqual(expect, dedent(text))
|
||||
|
||||
def test_dedent_nomargin(self):
|
||||
# No lines indented.
|
||||
text = "Hello there.\nHow are you?\nOh good, I'm glad."
|
||||
|
|
|
@ -413,9 +413,6 @@ def shorten(text, width, **kwargs):
|
|||
|
||||
# -- Loosely related functionality -------------------------------------
|
||||
|
||||
_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
|
||||
_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
|
||||
|
||||
def dedent(text):
|
||||
"""Remove any common leading whitespace from every line in `text`.
|
||||
|
||||
|
@ -429,42 +426,21 @@ def dedent(text):
|
|||
|
||||
Entirely blank lines are normalized to a newline character.
|
||||
"""
|
||||
# Look for the longest leading string of spaces and tabs common to
|
||||
# all lines.
|
||||
margin = None
|
||||
text = _whitespace_only_re.sub('', text)
|
||||
indents = _leading_whitespace_re.findall(text)
|
||||
for indent in indents:
|
||||
if margin is None:
|
||||
margin = indent
|
||||
if not text:
|
||||
return text
|
||||
|
||||
# Current line more deeply indented than previous winner:
|
||||
# no change (previous winner is still on top).
|
||||
elif indent.startswith(margin):
|
||||
pass
|
||||
lines = text.split('\n')
|
||||
|
||||
# Current line consistent with and no deeper than previous winner:
|
||||
# it's the new winner.
|
||||
elif margin.startswith(indent):
|
||||
margin = indent
|
||||
# Get length of leading whitespace, inspired by ``os.path.commonprefix()``.
|
||||
non_blank_lines = [l for l in lines if l and not l.isspace()]
|
||||
l1 = min(non_blank_lines, default='')
|
||||
l2 = max(non_blank_lines, default='')
|
||||
margin = 0
|
||||
for margin, c in enumerate(l1):
|
||||
if c != l2[margin] or c not in ' \t':
|
||||
break
|
||||
|
||||
# Find the largest common whitespace between current line and previous
|
||||
# winner.
|
||||
else:
|
||||
for i, (x, y) in enumerate(zip(margin, indent)):
|
||||
if x != y:
|
||||
margin = margin[:i]
|
||||
break
|
||||
|
||||
# sanity check (testing/debugging only)
|
||||
if 0 and margin:
|
||||
for line in text.split("\n"):
|
||||
assert not line or line.startswith(margin), \
|
||||
"line = %r, margin = %r" % (line, margin)
|
||||
|
||||
if margin:
|
||||
text = re.sub(r'(?m)^' + margin, '', text)
|
||||
return text
|
||||
return '\n'.join([l[margin:] if not l.isspace() else '' for l in lines])
|
||||
|
||||
|
||||
def indent(text, prefix, predicate=None):
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
Improved performance of :func:`textwrap.dedent` by an average of ~2.4x,
|
||||
(with improvements of up to 4x for large inputs),
|
||||
and fixed a bug where blank lines with whitespace characters other than space
|
||||
or horizontal tab were not normalised to the newline.
|
||||
Patch by Adam Turner, Marius Juston, and Pieter Eendebak.
|
Loading…
Add table
Add a link
Reference in a new issue