mirror of
https://github.com/python/cpython.git
synced 2025-09-27 18:59:43 +00:00
gh-130167: Optimise `textwrap.dedent()
` (#131919)
Co-authored-by: Marius Juston <marius.juston@hotmail.fr> Co-authored-by: Pieter Eendebak <pieter.eendebak@gmail.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
parent
685fd74f81
commit
6aa88a2cb3
3 changed files with 67 additions and 36 deletions
|
@ -769,6 +769,56 @@ class DedentTestCase(unittest.TestCase):
|
||||||
"""assert that dedent() has no effect on 'text'"""
|
"""assert that dedent() has no effect on 'text'"""
|
||||||
self.assertEqual(text, dedent(text))
|
self.assertEqual(text, dedent(text))
|
||||||
|
|
||||||
|
def test_dedent_whitespace(self):
|
||||||
|
# The empty string.
|
||||||
|
text = ""
|
||||||
|
self.assertUnchanged(text)
|
||||||
|
|
||||||
|
# Only spaces.
|
||||||
|
text = " "
|
||||||
|
expect = ""
|
||||||
|
self.assertEqual(expect, dedent(text))
|
||||||
|
|
||||||
|
# Only tabs.
|
||||||
|
text = "\t\t\t\t"
|
||||||
|
expect = ""
|
||||||
|
self.assertEqual(expect, dedent(text))
|
||||||
|
|
||||||
|
# A mixture.
|
||||||
|
text = " \t \t\t \t "
|
||||||
|
expect = ""
|
||||||
|
self.assertEqual(expect, dedent(text))
|
||||||
|
|
||||||
|
# ASCII whitespace.
|
||||||
|
text = "\f\n\r\t\v "
|
||||||
|
expect = "\n"
|
||||||
|
self.assertEqual(expect, dedent(text))
|
||||||
|
|
||||||
|
# One newline.
|
||||||
|
text = "\n"
|
||||||
|
expect = "\n"
|
||||||
|
self.assertEqual(expect, dedent(text))
|
||||||
|
|
||||||
|
# Windows-style newlines.
|
||||||
|
text = "\r\n" * 5
|
||||||
|
expect = "\n" * 5
|
||||||
|
self.assertEqual(expect, dedent(text))
|
||||||
|
|
||||||
|
# Whitespace mixture.
|
||||||
|
text = " \n\t\n \n\t\t\n\n\n "
|
||||||
|
expect = "\n\n\n\n\n\n"
|
||||||
|
self.assertEqual(expect, dedent(text))
|
||||||
|
|
||||||
|
# Lines consisting only of whitespace are always normalised
|
||||||
|
text = "a\n \n\t\n"
|
||||||
|
expect = "a\n\n\n"
|
||||||
|
self.assertEqual(expect, dedent(text))
|
||||||
|
|
||||||
|
# Whitespace characters on non-empty lines are retained
|
||||||
|
text = "a\r\n\r\n\r\n"
|
||||||
|
expect = "a\r\n\n\n"
|
||||||
|
self.assertEqual(expect, dedent(text))
|
||||||
|
|
||||||
def test_dedent_nomargin(self):
|
def test_dedent_nomargin(self):
|
||||||
# No lines indented.
|
# No lines indented.
|
||||||
text = "Hello there.\nHow are you?\nOh good, I'm glad."
|
text = "Hello there.\nHow are you?\nOh good, I'm glad."
|
||||||
|
|
|
@ -413,9 +413,6 @@ def shorten(text, width, **kwargs):
|
||||||
|
|
||||||
# -- Loosely related functionality -------------------------------------
|
# -- Loosely related functionality -------------------------------------
|
||||||
|
|
||||||
_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
|
|
||||||
_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
|
|
||||||
|
|
||||||
def dedent(text):
|
def dedent(text):
|
||||||
"""Remove any common leading whitespace from every line in `text`.
|
"""Remove any common leading whitespace from every line in `text`.
|
||||||
|
|
||||||
|
@ -429,42 +426,21 @@ def dedent(text):
|
||||||
|
|
||||||
Entirely blank lines are normalized to a newline character.
|
Entirely blank lines are normalized to a newline character.
|
||||||
"""
|
"""
|
||||||
# Look for the longest leading string of spaces and tabs common to
|
if not text:
|
||||||
# all lines.
|
return text
|
||||||
margin = None
|
|
||||||
text = _whitespace_only_re.sub('', text)
|
|
||||||
indents = _leading_whitespace_re.findall(text)
|
|
||||||
for indent in indents:
|
|
||||||
if margin is None:
|
|
||||||
margin = indent
|
|
||||||
|
|
||||||
# Current line more deeply indented than previous winner:
|
lines = text.split('\n')
|
||||||
# no change (previous winner is still on top).
|
|
||||||
elif indent.startswith(margin):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Current line consistent with and no deeper than previous winner:
|
# Get length of leading whitespace, inspired by ``os.path.commonprefix()``.
|
||||||
# it's the new winner.
|
non_blank_lines = [l for l in lines if l and not l.isspace()]
|
||||||
elif margin.startswith(indent):
|
l1 = min(non_blank_lines, default='')
|
||||||
margin = indent
|
l2 = max(non_blank_lines, default='')
|
||||||
|
margin = 0
|
||||||
|
for margin, c in enumerate(l1):
|
||||||
|
if c != l2[margin] or c not in ' \t':
|
||||||
|
break
|
||||||
|
|
||||||
# Find the largest common whitespace between current line and previous
|
return '\n'.join([l[margin:] if not l.isspace() else '' for l in lines])
|
||||||
# winner.
|
|
||||||
else:
|
|
||||||
for i, (x, y) in enumerate(zip(margin, indent)):
|
|
||||||
if x != y:
|
|
||||||
margin = margin[:i]
|
|
||||||
break
|
|
||||||
|
|
||||||
# sanity check (testing/debugging only)
|
|
||||||
if 0 and margin:
|
|
||||||
for line in text.split("\n"):
|
|
||||||
assert not line or line.startswith(margin), \
|
|
||||||
"line = %r, margin = %r" % (line, margin)
|
|
||||||
|
|
||||||
if margin:
|
|
||||||
text = re.sub(r'(?m)^' + margin, '', text)
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
def indent(text, prefix, predicate=None):
|
def indent(text, prefix, predicate=None):
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
Improved performance of :func:`textwrap.dedent` by an average of ~2.4x,
|
||||||
|
(with improvements of up to 4x for large inputs),
|
||||||
|
and fixed a bug where blank lines with whitespace characters other than space
|
||||||
|
or horizontal tab were not normalised to the newline.
|
||||||
|
Patch by Adam Turner, Marius Juston, and Pieter Eendebak.
|
Loading…
Add table
Add a link
Reference in a new issue