#2659: add `break_on_hyphens` to TextWrapper.

2025-09-17 22:20:23 +00:00 · 2008-05-11 10:42:28 +00:00 · 2008-05-11 10:42:28 +00:00 · 6f95ae55b1
commit 6f95ae55b1
parent 5b54887deb
4 changed files with 48 additions and 3 deletions
--- a/Doc/library/textwrap.rst
+++ b/Doc/library/textwrap.rst
@ -41,6 +41,10 @@ instance and calling a single method on it.  That instance is not reused, so for
 applications that wrap/fill many text strings, it will be more efficient for you
 to create your own :class:`TextWrapper` object.
 Text is preferably wrapped on whitespaces and right after the hyphens in
 hyphenated words; only then will long words be broken if necessary, unless
 :attr:`TextWrapper.break_long_words` is set to false.
 An additional utility function, :func:`dedent`, is provided to remove
 indentation from strings that have unwanted whitespace to the left of the text.
@ -174,10 +178,22 @@ indentation from strings that have unwanted whitespace to the left of the text.
      than :attr:`width`.  (Long words will be put on a line by themselves, in
      order to minimize the amount by which :attr:`width` is exceeded.)
   .. attribute:: break_on_hyphens
      (default: ``True``) If true, wrapping will occur preferably on whitespaces
      and right after hyphens in compound words, as it is customary in English.
      If false, only whitespaces will be considered as potentially good places
      for line breaks, but you need to set :attr:`break_long_words` to false if
      you want truly insecable words.  Default behaviour in previous versions
      was to always allow breaking hyphenated words.
      .. versionadded:: 2.6
   :class:`TextWrapper` also provides two public methods, analogous to the
   module-level convenience functions:
   .. method:: wrap(text)
      Wraps the single paragraph in *text* (a string) so every line is at most
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@ -364,6 +364,14 @@ What a mess!
             ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
              "ball,", " ", "use", " ", "the", " ", "-b", " ",  "option!"])
    def test_break_on_hyphens(self):
        # Ensure that the break_on_hyphens attributes work
        text = "yaba daba-doo"
        self.check_wrap(text, 10, ["yaba daba-", "doo"],
                        break_on_hyphens=True)
        self.check_wrap(text, 10, ["yaba", "daba-doo"],
                        break_on_hyphens=False)
    def test_bad_width(self):
        # Ensure that width <= 0 is caught.
        text = "Whatever, it doesn't matter."
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@ -63,6 +63,10 @@ class TextWrapper:
      break_long_words (default: true)
        Break words longer than 'width'.  If false, those words will not
        be broken, and some lines might be longer than 'width'.
      break_on_hyphens (default: true)
        Allow breaking hyphenated words. If true, wrapping will occur
        preferably on whitespaces and right after hyphens part of
        compound words.
      drop_whitespace (default: true)
        Drop leading and trailing whitespace from lines.
    """
@ -85,6 +89,12 @@ class TextWrapper:
        r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|'   # hyphenated words
        r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')   # em-dash
    # This less funky little regex just split on recognized spaces. E.g.
    #   "Hello there -- you goof-ball, use the -b option!"
    # splits into
    #   Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
    wordsep_simple_re = re.compile(r'(\s+)')
    # XXX this is not locale- or charset-aware -- string.lowercase
    # is US-ASCII only (and therefore English-only)
    sentence_end_re = re.compile(r'[%s]'              # lowercase letter
@ -102,7 +112,8 @@ class TextWrapper:
                 replace_whitespace=True,
                 fix_sentence_endings=False,
                 break_long_words=True,
-                 drop_whitespace=True):
+                 drop_whitespace=True,
                 break_on_hyphens=True):
        self.width = width
        self.initial_indent = initial_indent
        self.subsequent_indent = subsequent_indent
@ -111,6 +122,7 @@ class TextWrapper:
        self.fix_sentence_endings = fix_sentence_endings
        self.break_long_words = break_long_words
        self.drop_whitespace = drop_whitespace
        self.break_on_hyphens = break_on_hyphens
    # -- Private methods -----------------------------------------------
@ -143,8 +155,15 @@ class TextWrapper:
        breaks into the following chunks:
          'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
          'use', ' ', 'the', ' ', '-b', ' ', 'option!'
        if break_on_hyphens is True, or in:
          'Look,', ' ', 'goof-ball', ' ', '--', ' ',
          'use', ' ', 'the', ' ', '-b', ' ', option!'
        otherwise.
        """
-        chunks = self.wordsep_re.split(text)
+        if self.break_on_hyphens is True:
            chunks = self.wordsep_re.split(text)
        else:
            chunks = self.wordsep_simple_re.split(text)
        chunks = filter(None, chunks)  # remove empty chunks
        return chunks
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -23,6 +23,8 @@ Extension Modules
 Library
 -------
 - #2659: Added ``break_on_hyphens`` option to textwrap TextWrapper class.
 - The mhlib module has been deprecated for removal in Python 3.0.
 - The linuxaudiodev module has been deprecated for removal in Python 3.0.