mirror of
https://github.com/python/cpython.git
synced 2025-08-28 20:56:54 +00:00
Issue 4474: On platforms with sizeof(wchar_t) == 4 and
sizeof(Py_UNICODE) == 2, PyUnicode_FromWideChar now converts each character outside the BMP to the appropriate surrogate pair. Thanks Victor Stinner for the patch.
This commit is contained in:
parent
ecdfd513a2
commit
081dfee4f1
3 changed files with 113 additions and 0 deletions
|
@ -561,6 +561,66 @@ PyObject *PyUnicode_FromString(const char *u)
|
|||
|
||||
#ifdef HAVE_WCHAR_H
|
||||
|
||||
#if (Py_UNICODE_SIZE == 2) && defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
|
||||
# define CONVERT_WCHAR_TO_SURROGATES
|
||||
#endif
|
||||
|
||||
#ifdef CONVERT_WCHAR_TO_SURROGATES
|
||||
|
||||
/* Here sizeof(wchar_t) is 4 but Py_UNICODE_SIZE == 2, so we need
|
||||
to convert from UTF32 to UTF16. */
|
||||
|
||||
PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
|
||||
Py_ssize_t size)
|
||||
{
|
||||
PyUnicodeObject *unicode;
|
||||
register Py_ssize_t i;
|
||||
Py_ssize_t alloc;
|
||||
const wchar_t *orig_w;
|
||||
|
||||
if (w == NULL) {
|
||||
if (size == 0)
|
||||
return PyUnicode_FromStringAndSize(NULL, 0);
|
||||
PyErr_BadInternalCall();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (size == -1) {
|
||||
size = wcslen(w);
|
||||
}
|
||||
|
||||
alloc = size;
|
||||
orig_w = w;
|
||||
for (i = size; i > 0; i--) {
|
||||
if (*w > 0xFFFF)
|
||||
alloc++;
|
||||
w++;
|
||||
}
|
||||
w = orig_w;
|
||||
unicode = _PyUnicode_New(alloc);
|
||||
if (!unicode)
|
||||
return NULL;
|
||||
|
||||
/* Copy the wchar_t data into the new object */
|
||||
{
|
||||
register Py_UNICODE *u;
|
||||
u = PyUnicode_AS_UNICODE(unicode);
|
||||
for (i = size; i > 0; i--) {
|
||||
if (*w > 0xFFFF) {
|
||||
wchar_t ordinal = *w++;
|
||||
ordinal -= 0x10000;
|
||||
*u++ = 0xD800 | (ordinal >> 10);
|
||||
*u++ = 0xDC00 | (ordinal & 0x3FF);
|
||||
}
|
||||
else
|
||||
*u++ = *w++;
|
||||
}
|
||||
}
|
||||
return (PyObject *)unicode;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
|
||||
Py_ssize_t size)
|
||||
{
|
||||
|
@ -597,6 +657,10 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
|
|||
return (PyObject *)unicode;
|
||||
}
|
||||
|
||||
#endif /* CONVERT_WCHAR_TO_SURROGATES */
|
||||
|
||||
#undef CONVERT_WCHAR_TO_SURROGATES
|
||||
|
||||
static void
|
||||
makefmt(char *fmt, int longflag, int size_tflag, int zeropad, int width, int precision, char c)
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue