mirror of
https://github.com/python/cpython.git
synced 2025-07-30 14:44:10 +00:00
#2659: add `break_on_hyphens
` to TextWrapper.
This commit is contained in:
parent
5b54887deb
commit
6f95ae55b1
4 changed files with 48 additions and 3 deletions
|
@ -41,6 +41,10 @@ instance and calling a single method on it. That instance is not reused, so for
|
||||||
applications that wrap/fill many text strings, it will be more efficient for you
|
applications that wrap/fill many text strings, it will be more efficient for you
|
||||||
to create your own :class:`TextWrapper` object.
|
to create your own :class:`TextWrapper` object.
|
||||||
|
|
||||||
|
Text is preferably wrapped on whitespaces and right after the hyphens in
|
||||||
|
hyphenated words; only then will long words be broken if necessary, unless
|
||||||
|
:attr:`TextWrapper.break_long_words` is set to false.
|
||||||
|
|
||||||
An additional utility function, :func:`dedent`, is provided to remove
|
An additional utility function, :func:`dedent`, is provided to remove
|
||||||
indentation from strings that have unwanted whitespace to the left of the text.
|
indentation from strings that have unwanted whitespace to the left of the text.
|
||||||
|
|
||||||
|
@ -174,10 +178,22 @@ indentation from strings that have unwanted whitespace to the left of the text.
|
||||||
than :attr:`width`. (Long words will be put on a line by themselves, in
|
than :attr:`width`. (Long words will be put on a line by themselves, in
|
||||||
order to minimize the amount by which :attr:`width` is exceeded.)
|
order to minimize the amount by which :attr:`width` is exceeded.)
|
||||||
|
|
||||||
|
|
||||||
|
.. attribute:: break_on_hyphens
|
||||||
|
|
||||||
|
(default: ``True``) If true, wrapping will occur preferably on whitespaces
|
||||||
|
and right after hyphens in compound words, as it is customary in English.
|
||||||
|
If false, only whitespaces will be considered as potentially good places
|
||||||
|
for line breaks, but you need to set :attr:`break_long_words` to false if
|
||||||
|
you want truly insecable words. Default behaviour in previous versions
|
||||||
|
was to always allow breaking hyphenated words.
|
||||||
|
|
||||||
|
.. versionadded:: 2.6
|
||||||
|
|
||||||
|
|
||||||
:class:`TextWrapper` also provides two public methods, analogous to the
|
:class:`TextWrapper` also provides two public methods, analogous to the
|
||||||
module-level convenience functions:
|
module-level convenience functions:
|
||||||
|
|
||||||
|
|
||||||
.. method:: wrap(text)
|
.. method:: wrap(text)
|
||||||
|
|
||||||
Wraps the single paragraph in *text* (a string) so every line is at most
|
Wraps the single paragraph in *text* (a string) so every line is at most
|
||||||
|
|
|
@ -364,6 +364,14 @@ What a mess!
|
||||||
["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
|
["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
|
||||||
"ball,", " ", "use", " ", "the", " ", "-b", " ", "option!"])
|
"ball,", " ", "use", " ", "the", " ", "-b", " ", "option!"])
|
||||||
|
|
||||||
|
def test_break_on_hyphens(self):
|
||||||
|
# Ensure that the break_on_hyphens attributes work
|
||||||
|
text = "yaba daba-doo"
|
||||||
|
self.check_wrap(text, 10, ["yaba daba-", "doo"],
|
||||||
|
break_on_hyphens=True)
|
||||||
|
self.check_wrap(text, 10, ["yaba", "daba-doo"],
|
||||||
|
break_on_hyphens=False)
|
||||||
|
|
||||||
def test_bad_width(self):
|
def test_bad_width(self):
|
||||||
# Ensure that width <= 0 is caught.
|
# Ensure that width <= 0 is caught.
|
||||||
text = "Whatever, it doesn't matter."
|
text = "Whatever, it doesn't matter."
|
||||||
|
|
|
@ -63,6 +63,10 @@ class TextWrapper:
|
||||||
break_long_words (default: true)
|
break_long_words (default: true)
|
||||||
Break words longer than 'width'. If false, those words will not
|
Break words longer than 'width'. If false, those words will not
|
||||||
be broken, and some lines might be longer than 'width'.
|
be broken, and some lines might be longer than 'width'.
|
||||||
|
break_on_hyphens (default: true)
|
||||||
|
Allow breaking hyphenated words. If true, wrapping will occur
|
||||||
|
preferably on whitespaces and right after hyphens part of
|
||||||
|
compound words.
|
||||||
drop_whitespace (default: true)
|
drop_whitespace (default: true)
|
||||||
Drop leading and trailing whitespace from lines.
|
Drop leading and trailing whitespace from lines.
|
||||||
"""
|
"""
|
||||||
|
@ -85,6 +89,12 @@ class TextWrapper:
|
||||||
r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
|
r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
|
||||||
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
|
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
|
||||||
|
|
||||||
|
# This less funky little regex just split on recognized spaces. E.g.
|
||||||
|
# "Hello there -- you goof-ball, use the -b option!"
|
||||||
|
# splits into
|
||||||
|
# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
|
||||||
|
wordsep_simple_re = re.compile(r'(\s+)')
|
||||||
|
|
||||||
# XXX this is not locale- or charset-aware -- string.lowercase
|
# XXX this is not locale- or charset-aware -- string.lowercase
|
||||||
# is US-ASCII only (and therefore English-only)
|
# is US-ASCII only (and therefore English-only)
|
||||||
sentence_end_re = re.compile(r'[%s]' # lowercase letter
|
sentence_end_re = re.compile(r'[%s]' # lowercase letter
|
||||||
|
@ -102,7 +112,8 @@ class TextWrapper:
|
||||||
replace_whitespace=True,
|
replace_whitespace=True,
|
||||||
fix_sentence_endings=False,
|
fix_sentence_endings=False,
|
||||||
break_long_words=True,
|
break_long_words=True,
|
||||||
drop_whitespace=True):
|
drop_whitespace=True,
|
||||||
|
break_on_hyphens=True):
|
||||||
self.width = width
|
self.width = width
|
||||||
self.initial_indent = initial_indent
|
self.initial_indent = initial_indent
|
||||||
self.subsequent_indent = subsequent_indent
|
self.subsequent_indent = subsequent_indent
|
||||||
|
@ -111,6 +122,7 @@ class TextWrapper:
|
||||||
self.fix_sentence_endings = fix_sentence_endings
|
self.fix_sentence_endings = fix_sentence_endings
|
||||||
self.break_long_words = break_long_words
|
self.break_long_words = break_long_words
|
||||||
self.drop_whitespace = drop_whitespace
|
self.drop_whitespace = drop_whitespace
|
||||||
|
self.break_on_hyphens = break_on_hyphens
|
||||||
|
|
||||||
|
|
||||||
# -- Private methods -----------------------------------------------
|
# -- Private methods -----------------------------------------------
|
||||||
|
@ -143,8 +155,15 @@ class TextWrapper:
|
||||||
breaks into the following chunks:
|
breaks into the following chunks:
|
||||||
'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
|
'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
|
||||||
'use', ' ', 'the', ' ', '-b', ' ', 'option!'
|
'use', ' ', 'the', ' ', '-b', ' ', 'option!'
|
||||||
|
if break_on_hyphens is True, or in:
|
||||||
|
'Look,', ' ', 'goof-ball', ' ', '--', ' ',
|
||||||
|
'use', ' ', 'the', ' ', '-b', ' ', option!'
|
||||||
|
otherwise.
|
||||||
"""
|
"""
|
||||||
chunks = self.wordsep_re.split(text)
|
if self.break_on_hyphens is True:
|
||||||
|
chunks = self.wordsep_re.split(text)
|
||||||
|
else:
|
||||||
|
chunks = self.wordsep_simple_re.split(text)
|
||||||
chunks = filter(None, chunks) # remove empty chunks
|
chunks = filter(None, chunks) # remove empty chunks
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,8 @@ Extension Modules
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- #2659: Added ``break_on_hyphens`` option to textwrap TextWrapper class.
|
||||||
|
|
||||||
- The mhlib module has been deprecated for removal in Python 3.0.
|
- The mhlib module has been deprecated for removal in Python 3.0.
|
||||||
|
|
||||||
- The linuxaudiodev module has been deprecated for removal in Python 3.0.
|
- The linuxaudiodev module has been deprecated for removal in Python 3.0.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue