Issue #4163: Use unicode-friendly word splitting in the textwrap functions when given an unicode string.

2025-11-01 02:38:53 +00:00 · 2008-12-13 23:12:30 +00:00 · 2008-12-13 23:12:30 +00:00 · 74af3bbfbd
commit 74af3bbfbd
parent 9f35070a6b
3 changed files with 20 additions and 7 deletions
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@ -174,7 +174,7 @@ What a mess!
        text = ("Python 1.0.0 was released on 1994-01-26.  Python 1.0.1 was\n"
                "released on 1994-02-15.")

-        self.check_wrap(text, 30, ['Python 1.0.0 was released on',
+        self.check_wrap(text, 35, ['Python 1.0.0 was released on',
                                   '1994-01-26.  Python 1.0.1 was',
                                   'released on 1994-02-15.'])
        self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.',
@ -353,6 +353,14 @@ What a mess!
            otext = self.wrapper.fill(text)
            assert isinstance(otext, unicode)

+        def test_no_split_at_umlaut(self):
+            text = u"Die Empf\xe4nger-Auswahl"
+            self.check_wrap(text, 13, [u"Die", u"Empf\xe4nger-", u"Auswahl"])
+
+        def test_umlaut_followed_by_dash(self):
+            text = u"aa \xe4\xe4-\xe4\xe4"
+            self.check_wrap(text, 7, [u"aa \xe4\xe4-", u"\xe4\xe4"])
+
    def test_split(self):
        # Ensure that the standard _split() method works as advertised
        # in the comments