#5127: Even on narrow unicode builds, the C functions that access the Unicode

Database (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others) now accept and return characters from the full Unicode range (Py_UCS4). The differences from Python code are few: - unicodedata.numeric(), unicodedata.decimal() and unicodedata.digit() now return the correct value for large code points - repr() may consider more characters as printable.
2025-11-02 03:01:58 +00:00 · 2010-08-18 20:44:58 +00:00 · 2010-08-18 20:44:58 +00:00 · 324ac65ceb
commit 324ac65ceb
parent 36e778ef02
7 changed files with 69 additions and 232 deletions
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@ -1353,6 +1353,10 @@ class UnicodeTest(string_tests.CommonTest,
        self.assertEqual(repr(s1()), '\\n')
        self.assertEqual(repr(s2()), '\\n')

+    def test_printable_repr(self):
+        self.assertEqual(repr('\U00010000'), "'%c'" % (0x10000,)) # printable
+        self.assertEqual(repr('\U00011000'), "'\\U00011000'")     # nonprintable
+
    def test_expandtabs_overflows_gracefully(self):
        # This test only affects 32-bit platforms because expandtabs can only take
        # an int as the max value, not a 64-bit C long.  If expandtabs is changed