Issue 4474: On platforms with sizeof(wchar_t) == 4 and

sizeof(Py_UNICODE) == 2, PyUnicode_FromWideChar now converts
each character outside the BMP to the appropriate surrogate pair.

Thanks Victor Stinner for the patch.

(backport of r70452 from py3k to trunk)
This commit is contained in:
Mark Dickinson 2009-03-18 16:07:26 +00:00
parent eb15863a97
commit 6b265f1bf8
3 changed files with 105 additions and 0 deletions

View file

@ -529,6 +529,60 @@ PyObject *PyUnicode_FromString(const char *u)
#ifdef HAVE_WCHAR_H
#if (Py_UNICODE_SIZE == 2) && defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
# define CONVERT_WCHAR_TO_SURROGATES
#endif
#ifdef CONVERT_WCHAR_TO_SURROGATES
/* Here sizeof(wchar_t) is 4 but Py_UNICODE_SIZE == 2, so we need
to convert from UTF32 to UTF16. */
PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
Py_ssize_t size)
{
PyUnicodeObject *unicode;
register Py_ssize_t i;
Py_ssize_t alloc;
const wchar_t *orig_w;
if (w == NULL) {
PyErr_BadInternalCall();
return NULL;
}
alloc = size;
orig_w = w;
for (i = size; i > 0; i--) {
if (*w > 0xFFFF)
alloc++;
w++;
}
w = orig_w;
unicode = _PyUnicode_New(alloc);
if (!unicode)
return NULL;
/* Copy the wchar_t data into the new object */
{
register Py_UNICODE *u;
u = PyUnicode_AS_UNICODE(unicode);
for (i = size; i > 0; i--) {
if (*w > 0xFFFF) {
wchar_t ordinal = *w++;
ordinal -= 0x10000;
*u++ = 0xD800 | (ordinal >> 10);
*u++ = 0xDC00 | (ordinal & 0x3FF);
}
else
*u++ = *w++;
}
}
return (PyObject *)unicode;
}
#else
PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
Py_ssize_t size)
{
@ -559,6 +613,10 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
return (PyObject *)unicode;
}
#endif /* CONVERT_WCHAR_TO_SURROGATES */
#undef CONVERT_WCHAR_TO_SURROGATES
static void
makefmt(char *fmt, int longflag, int size_tflag, int zeropad, int width, int precision, char c)
{