(Merge 3.2) Issue #16416: On Mac OS X, operating system data are now always

encoded/decoded to/from UTF-8/surrogateescape, instead of the locale encoding
(which may be ASCII if no locale environment variable is set), to avoid
inconsistencies with os.fsencode() and os.fsdecode() functions which are
already using UTF-8/surrogateescape.
This commit is contained in:
Victor Stinner 2012-12-03 12:48:53 +01:00
commit 2660e427d1
4 changed files with 65 additions and 18 deletions

View file

@ -4809,7 +4809,10 @@ onError:
#ifdef __APPLE__
/* Simplified UTF-8 decoder using surrogateescape error handler,
used to decode the command line arguments on Mac OS X. */
used to decode the command line arguments on Mac OS X.
Return a pointer to a newly allocated wide character string (use
PyMem_Free() to free the memory), or NULL on memory allocation error. */
wchar_t*
_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
@ -4820,10 +4823,8 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
/* Note: size will always be longer than the resulting Unicode
character count */
if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1)) {
PyErr_NoMemory();
if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1))
return NULL;
}
unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t));
if (!unicode)
return NULL;