Issue #16416: On Mac OS X, operating system data are now always

encoded/decoded to/from UTF-8/surrogateescape, instead of the locale encoding
(which may be ASCII if no locale environment variable is set), to avoid
inconsistencies with os.fsencode() and os.fsdecode() functions which are
already using UTF-8/surrogateescape.
This commit is contained in:
Victor Stinner 2012-12-03 12:47:59 +01:00
parent ce31f66a6d
commit 27b1ca29cc
4 changed files with 65 additions and 18 deletions

View file

@ -2792,7 +2792,10 @@ PyObject *PyUnicode_DecodeUTF8Stateful(const char *s,
#ifdef __APPLE__
/* Simplified UTF-8 decoder using surrogateescape error handler,
used to decode the command line arguments on Mac OS X. */
used to decode the command line arguments on Mac OS X.
Return a pointer to a newly allocated wide character string (use
PyMem_Free() to free the memory), or NULL on memory allocation error. */
wchar_t*
_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
@ -2803,10 +2806,8 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
/* Note: size will always be longer than the resulting Unicode
character count */
if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1)) {
PyErr_NoMemory();
if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1))
return NULL;
}
unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t));
if (!unicode)
return NULL;