mirror of
https://github.com/python/cpython.git
synced 2025-11-25 04:34:37 +00:00
gh-60462: Fix locale.strxfrm() on Solaris (GH-138242)
It should interpret the result of wcsxfrm() as a sequence of abstract integers, not a sequence of Unicode code points or using other encoding scheme that does not preserve ordering.
This commit is contained in:
parent
2985c6345f
commit
482fd0c811
2 changed files with 49 additions and 1 deletions
|
|
@ -0,0 +1 @@
|
|||
Fix :func:`locale.strxfrm` on Solaris (and possibly other platforms).
|
||||
|
|
@ -483,7 +483,54 @@ _locale_strxfrm_impl(PyObject *module, PyObject *str)
|
|||
goto exit;
|
||||
}
|
||||
}
|
||||
result = PyUnicode_FromWideChar(buf, n2);
|
||||
/* The result is just a sequence of integers, they are not necessary
|
||||
Unicode code points, so PyUnicode_FromWideChar() cannot be used
|
||||
here. For example, 0xD83D 0xDC0D should not be larger than 0xFF41.
|
||||
*/
|
||||
#if SIZEOF_WCHAR_T == 4
|
||||
{
|
||||
/* Some codes can exceed the range of Unicode code points
|
||||
(0 - 0x10FFFF), so they cannot be directly used in
|
||||
PyUnicode_FromKindAndData(). They should be first encoded in
|
||||
a way that preserves the lexicographical order.
|
||||
|
||||
Codes in the range 0-0xFFFF represent themself.
|
||||
Codes larger than 0xFFFF are encoded as a pair:
|
||||
* 0x1xxxx -- the highest 16 bits
|
||||
* 0x0xxxx -- the lowest 16 bits
|
||||
*/
|
||||
size_t n3 = 0;
|
||||
for (size_t i = 0; i < n2; i++) {
|
||||
if ((Py_UCS4)buf[i] > 0x10000u) {
|
||||
n3++;
|
||||
}
|
||||
}
|
||||
if (n3) {
|
||||
n3 += n2; // no integer overflow
|
||||
Py_UCS4 *buf2 = PyMem_New(Py_UCS4, n3);
|
||||
if (buf2 == NULL) {
|
||||
PyErr_NoMemory();
|
||||
goto exit;
|
||||
}
|
||||
size_t j = 0;
|
||||
for (size_t i = 0; i < n2; i++) {
|
||||
Py_UCS4 c = (Py_UCS4)buf[i];
|
||||
if (c > 0x10000u) {
|
||||
buf2[j++] = (c >> 16) | 0x10000u;
|
||||
buf2[j++] = c & 0xFFFFu;
|
||||
}
|
||||
else {
|
||||
buf2[j++] = c;
|
||||
}
|
||||
}
|
||||
assert(j == n3);
|
||||
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf2, n3);
|
||||
PyMem_Free(buf2);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
result = PyUnicode_FromKindAndData(sizeof(wchar_t), buf, n2);
|
||||
exit:
|
||||
PyMem_Free(buf);
|
||||
PyMem_Free(s);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue