#1477: ur'\U0010FFFF' used to raise in narrow unicode builds.

Corrected the raw-unicode-escape codec to use UTF-16 surrogates in
this case, like the unicode-escape codec does.

Backport of r61793 and r61853
This commit is contained in:
Amaury Forgeot d'Arc 2008-03-24 21:16:28 +00:00
parent d9b9d680d5
commit a79e05097b
3 changed files with 64 additions and 6 deletions

View file

@ -736,12 +736,25 @@ class UnicodeTest(
print >>out, u'def\n'
def test_ucs4(self):
if sys.maxunicode == 0xFFFF:
return
x = u'\U00100000'
y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
self.assertEqual(x, y)
y = r'\U00100000'
x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
self.assertEqual(x, y)
y = r'\U00010000'
x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
self.assertEqual(x, y)
try:
'\U11111111'.decode("raw-unicode-escape")
except UnicodeDecodeError, e:
self.assertEqual(e.start, 0)
self.assertEqual(e.end, 10)
else:
self.fail("Should have raised UnicodeDecodeError")
def test_conversion(self):
# Make sure __unicode__() works properly
class Foo0: