mirror of
https://github.com/python/cpython.git
synced 2025-10-17 20:28:43 +00:00
Make chr() and ord() return/accept surrogate pairs in narrow builds.
The domain of chr() and the range of ord() are now always [0 ... 0x10FFFF].
This commit is contained in:
parent
49c12ac04e
commit
8ac004e698
3 changed files with 58 additions and 19 deletions
|
@ -169,15 +169,23 @@ class BuiltinTest(unittest.TestCase):
|
|||
self.assertEqual(chr(97), 'a')
|
||||
self.assertEqual(chr(0xff), '\xff')
|
||||
self.assertRaises(ValueError, chr, 1<<24)
|
||||
self.assertEqual(
|
||||
chr(sys.maxunicode),
|
||||
str(('\\U%08x' % (sys.maxunicode)).encode("ascii"), 'unicode-escape')
|
||||
)
|
||||
self.assertRaises(ValueError, chr, sys.maxunicode+1)
|
||||
self.assertEqual(chr(sys.maxunicode),
|
||||
str(('\\U%08x' % (sys.maxunicode)).encode("ascii"),
|
||||
'unicode-escape'))
|
||||
self.assertRaises(TypeError, chr)
|
||||
self.assertEqual(chr(0x0000FFFF), "\U0000FFFF")
|
||||
self.assertEqual(chr(0x00010000), "\U00010000")
|
||||
self.assertEqual(chr(0x00010001), "\U00010001")
|
||||
self.assertEqual(chr(0x000FFFFE), "\U000FFFFE")
|
||||
self.assertEqual(chr(0x000FFFFF), "\U000FFFFF")
|
||||
self.assertEqual(chr(0x00100000), "\U00100000")
|
||||
self.assertEqual(chr(0x00100001), "\U00100001")
|
||||
self.assertEqual(chr(0x0010FFFE), "\U0010FFFE")
|
||||
self.assertEqual(chr(0x0010FFFF), "\U0010FFFF")
|
||||
self.assertRaises(ValueError, chr, -1)
|
||||
self.assertRaises(ValueError, chr, 0x00110000)
|
||||
|
||||
def XXX_test_cmp(self):
|
||||
# cmp() is no longer supported
|
||||
def test_cmp(self):
|
||||
self.assertEqual(cmp(-1, 1), -1)
|
||||
self.assertEqual(cmp(1, -1), 1)
|
||||
self.assertEqual(cmp(1, 1), 0)
|
||||
|
@ -1288,6 +1296,17 @@ class BuiltinTest(unittest.TestCase):
|
|||
self.assertEqual(ord(chr(sys.maxunicode)), sys.maxunicode)
|
||||
self.assertRaises(TypeError, ord, 42)
|
||||
|
||||
self.assertEqual(ord(chr(0x10FFFF)), 0x10FFFF)
|
||||
self.assertEqual(ord("\U0000FFFF"), 0x0000FFFF)
|
||||
self.assertEqual(ord("\U00010000"), 0x00010000)
|
||||
self.assertEqual(ord("\U00010001"), 0x00010001)
|
||||
self.assertEqual(ord("\U000FFFFE"), 0x000FFFFE)
|
||||
self.assertEqual(ord("\U000FFFFF"), 0x000FFFFF)
|
||||
self.assertEqual(ord("\U00100000"), 0x00100000)
|
||||
self.assertEqual(ord("\U00100001"), 0x00100001)
|
||||
self.assertEqual(ord("\U0010FFFE"), 0x0010FFFE)
|
||||
self.assertEqual(ord("\U0010FFFF"), 0x0010FFFF)
|
||||
|
||||
def test_pow(self):
|
||||
self.assertEqual(pow(0,0), 1)
|
||||
self.assertEqual(pow(0,1), 0)
|
||||
|
|
|
@ -915,21 +915,20 @@ Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode,
|
|||
|
||||
PyObject *PyUnicode_FromOrdinal(int ordinal)
|
||||
{
|
||||
Py_UNICODE s[1];
|
||||
Py_UNICODE s[2];
|
||||
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
if (ordinal < 0 || ordinal > 0x10ffff) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"chr() arg not in range(0x110000) "
|
||||
"(wide Python build)");
|
||||
"chr() arg not in range(0x110000)");
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
if (ordinal < 0 || ordinal > 0xffff) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"chr() arg not in range(0x10000) "
|
||||
"(narrow Python build)");
|
||||
return NULL;
|
||||
|
||||
#ifndef Py_UNICODE_WIDE
|
||||
if (ordinal > 0xffff) {
|
||||
ordinal -= 0x10000;
|
||||
s[0] = 0xD800 | (ordinal >> 10);
|
||||
s[1] = 0xDC00 | (ordinal & 0x3FF);
|
||||
return PyUnicode_FromUnicode(s, 2);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -317,7 +317,11 @@ builtin_chr(PyObject *self, PyObject *args)
|
|||
PyDoc_STRVAR(chr_doc,
|
||||
"chr(i) -> Unicode character\n\
|
||||
\n\
|
||||
Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff.");
|
||||
Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff."
|
||||
#ifndef Py_UNICODE_WIDE
|
||||
"\nIf 0x10000 <= i, a surrogate pair is returned."
|
||||
#endif
|
||||
);
|
||||
|
||||
|
||||
static PyObject *
|
||||
|
@ -1179,6 +1183,19 @@ builtin_ord(PyObject *self, PyObject* obj)
|
|||
ord = (long)*PyUnicode_AS_UNICODE(obj);
|
||||
return PyInt_FromLong(ord);
|
||||
}
|
||||
#ifndef Py_UNICODE_WIDE
|
||||
if (size == 2) {
|
||||
/* Decode a valid surrogate pair */
|
||||
int c0 = PyUnicode_AS_UNICODE(obj)[0];
|
||||
int c1 = PyUnicode_AS_UNICODE(obj)[1];
|
||||
if (0xD800 <= c0 && c0 <= 0xDBFF &&
|
||||
0xDC00 <= c1 && c1 <= 0xDFFF) {
|
||||
ord = ((((c0 & 0x03FF) << 10) | (c1 & 0x03FF)) +
|
||||
0x00010000);
|
||||
return PyInt_FromLong(ord);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else if (PyBytes_Check(obj)) {
|
||||
/* XXX Hopefully this is temporary */
|
||||
|
@ -1205,7 +1222,11 @@ builtin_ord(PyObject *self, PyObject* obj)
|
|||
PyDoc_STRVAR(ord_doc,
|
||||
"ord(c) -> integer\n\
|
||||
\n\
|
||||
Return the integer ordinal of a one-character string.");
|
||||
Return the integer ordinal of a one-character string."
|
||||
#ifndef Py_UNICODE_WIDE
|
||||
"\nA valid surrogate pair is also accepted."
|
||||
#endif
|
||||
);
|
||||
|
||||
|
||||
static PyObject *
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue