in wide builds, avoid storing high unicode characters from source code with surrogates

This is accomplished by decoding with utf-32 instead of utf-16 on all builds. The patch is by Adam Olsen.
2025-11-02 19:12:55 +00:00 · 2009-10-28 21:59:39 +00:00 · 2009-10-28 21:59:39 +00:00 · b2e796aa27
commit b2e796aa27
parent 7b1b094ff1
3 changed files with 25 additions and 9 deletions
--- a/Lib/test/test_pep263.py
+++ b/Lib/test/test_pep263.py
@ -36,6 +36,14 @@ class PEP263Test(unittest.TestCase):
        exec(c, d)
        self.assertEquals(d['\xc6'], '\xc6')

+    def test_issue3297(self):
+        c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec")
+        d = {}
+        exec(c, d)
+        self.assertEqual(d['a'], d['b'])
+        self.assertEqual(len(d['a']), len(d['b']))
+        self.assertEqual(ascii(d['a']), ascii(d['b']))
+
 def test_main():
    support.run_unittest(PEP263Test)