#1477: ur'\U0010FFFF' raised in narrow unicode builds.

Corrected the raw-unicode-escape codec to use UTF-16 surrogates in this case, just like the unicode-escape codec.
2025-11-25 04:34:37 +00:00 · 2008-03-23 09:55:29 +00:00 · 2008-03-23 09:55:29 +00:00 · 9a0d3462fc
commit 9a0d3462fc
parent 61854332b9
3 changed files with 63 additions and 6 deletions
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@ -736,12 +736,25 @@ class UnicodeTest(
        print >>out, u'def\n'

    def test_ucs4(self):
-        if sys.maxunicode == 0xFFFF:
-            return
        x = u'\U00100000'
        y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
        self.assertEqual(x, y)

+        y = r'\U00100000'
+        x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
+        self.assertEqual(x, y)
+        y = r'\U00010000'
+        x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
+        self.assertEqual(x, y)
+
+        try:
+            '\U11111111'.decode("raw-unicode-escape")
+        except UnicodeDecodeError as e:
+            self.assertEqual(e.start, 0)
+            self.assertEqual(e.end, 10)
+        else:
+            self.fail("Should have raised UnicodeDecodeError")
+
    def test_conversion(self):
        # Make sure __unicode__() works properly
        class Foo0: