mirror of
https://github.com/python/cpython.git
synced 2025-09-16 13:47:31 +00:00
Issue 4474: On platforms with sizeof(wchar_t) == 4 and
sizeof(Py_UNICODE) == 2, PyUnicode_FromWideChar now converts each character outside the BMP to the appropriate surrogate pair. Thanks Victor Stinner for the patch. (backport of r70452 from py3k to trunk)
This commit is contained in:
parent
eb15863a97
commit
6b265f1bf8
3 changed files with 105 additions and 0 deletions
|
@ -529,6 +529,60 @@ PyObject *PyUnicode_FromString(const char *u)
|
|||
|
||||
#ifdef HAVE_WCHAR_H
|
||||
|
||||
#if (Py_UNICODE_SIZE == 2) && defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
|
||||
# define CONVERT_WCHAR_TO_SURROGATES
|
||||
#endif
|
||||
|
||||
#ifdef CONVERT_WCHAR_TO_SURROGATES
|
||||
|
||||
/* Here sizeof(wchar_t) is 4 but Py_UNICODE_SIZE == 2, so we need
|
||||
to convert from UTF32 to UTF16. */
|
||||
|
||||
PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
|
||||
Py_ssize_t size)
|
||||
{
|
||||
PyUnicodeObject *unicode;
|
||||
register Py_ssize_t i;
|
||||
Py_ssize_t alloc;
|
||||
const wchar_t *orig_w;
|
||||
|
||||
if (w == NULL) {
|
||||
PyErr_BadInternalCall();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
alloc = size;
|
||||
orig_w = w;
|
||||
for (i = size; i > 0; i--) {
|
||||
if (*w > 0xFFFF)
|
||||
alloc++;
|
||||
w++;
|
||||
}
|
||||
w = orig_w;
|
||||
unicode = _PyUnicode_New(alloc);
|
||||
if (!unicode)
|
||||
return NULL;
|
||||
|
||||
/* Copy the wchar_t data into the new object */
|
||||
{
|
||||
register Py_UNICODE *u;
|
||||
u = PyUnicode_AS_UNICODE(unicode);
|
||||
for (i = size; i > 0; i--) {
|
||||
if (*w > 0xFFFF) {
|
||||
wchar_t ordinal = *w++;
|
||||
ordinal -= 0x10000;
|
||||
*u++ = 0xD800 | (ordinal >> 10);
|
||||
*u++ = 0xDC00 | (ordinal & 0x3FF);
|
||||
}
|
||||
else
|
||||
*u++ = *w++;
|
||||
}
|
||||
}
|
||||
return (PyObject *)unicode;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
|
||||
Py_ssize_t size)
|
||||
{
|
||||
|
@ -559,6 +613,10 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
|
|||
return (PyObject *)unicode;
|
||||
}
|
||||
|
||||
#endif /* CONVERT_WCHAR_TO_SURROGATES */
|
||||
|
||||
#undef CONVERT_WCHAR_TO_SURROGATES
|
||||
|
||||
static void
|
||||
makefmt(char *fmt, int longflag, int size_tflag, int zeropad, int width, int precision, char c)
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue