mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
Issue #16416: On Mac OS X, operating system data are now always
encoded/decoded to/from UTF-8/surrogateescape, instead of the locale encoding (which may be ASCII if no locale environment variable is set), to avoid inconsistencies with os.fsencode() and os.fsdecode() functions which are already using UTF-8/surrogateescape.
This commit is contained in:
parent
ce31f66a6d
commit
27b1ca29cc
4 changed files with 65 additions and 18 deletions
|
@ -10,6 +10,12 @@ What's New in Python 3.2.4
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #16416: On Mac OS X, operating system data are now always
|
||||||
|
encoded/decoded to/from UTF-8/surrogateescape, instead of the locale encoding
|
||||||
|
(which may be ASCII if no locale environment variable is set), to avoid
|
||||||
|
inconsistencies with os.fsencode() and os.fsdecode() functions which are
|
||||||
|
already using UTF-8/surrogateescape.
|
||||||
|
|
||||||
- Issue #16588: Silence unused-but-set warnings in Python/thread_pthread.h
|
- Issue #16588: Silence unused-but-set warnings in Python/thread_pthread.h
|
||||||
|
|
||||||
- Issue #16306: Fix multiple error messages when unknown command line
|
- Issue #16306: Fix multiple error messages when unknown command line
|
||||||
|
|
|
@ -15,10 +15,6 @@ wmain(int argc, wchar_t **argv)
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#ifdef __APPLE__
|
|
||||||
extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char **argv)
|
main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
|
@ -45,11 +41,7 @@ main(int argc, char **argv)
|
||||||
oldloc = strdup(setlocale(LC_ALL, NULL));
|
oldloc = strdup(setlocale(LC_ALL, NULL));
|
||||||
setlocale(LC_ALL, "");
|
setlocale(LC_ALL, "");
|
||||||
for (i = 0; i < argc; i++) {
|
for (i = 0; i < argc; i++) {
|
||||||
#ifdef __APPLE__
|
|
||||||
argv_copy[i] = _Py_DecodeUTF8_surrogateescape(argv[i], strlen(argv[i]));
|
|
||||||
#else
|
|
||||||
argv_copy[i] = _Py_char2wchar(argv[i], NULL);
|
argv_copy[i] = _Py_char2wchar(argv[i], NULL);
|
||||||
#endif
|
|
||||||
if (!argv_copy[i]) {
|
if (!argv_copy[i]) {
|
||||||
fprintf(stderr, "Fatal Python error: "
|
fprintf(stderr, "Fatal Python error: "
|
||||||
"unable to decode the command line argument #%i\n",
|
"unable to decode the command line argument #%i\n",
|
||||||
|
|
|
@ -2792,7 +2792,10 @@ PyObject *PyUnicode_DecodeUTF8Stateful(const char *s,
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
|
|
||||||
/* Simplified UTF-8 decoder using surrogateescape error handler,
|
/* Simplified UTF-8 decoder using surrogateescape error handler,
|
||||||
used to decode the command line arguments on Mac OS X. */
|
used to decode the command line arguments on Mac OS X.
|
||||||
|
|
||||||
|
Return a pointer to a newly allocated wide character string (use
|
||||||
|
PyMem_Free() to free the memory), or NULL on memory allocation error. */
|
||||||
|
|
||||||
wchar_t*
|
wchar_t*
|
||||||
_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
|
_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
|
||||||
|
@ -2803,10 +2806,8 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
|
||||||
|
|
||||||
/* Note: size will always be longer than the resulting Unicode
|
/* Note: size will always be longer than the resulting Unicode
|
||||||
character count */
|
character count */
|
||||||
if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1)) {
|
if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1))
|
||||||
PyErr_NoMemory();
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
|
||||||
unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t));
|
unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t));
|
||||||
if (!unicode)
|
if (!unicode)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -3,6 +3,10 @@
|
||||||
# include <windows.h>
|
# include <windows.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_STAT
|
#ifdef HAVE_STAT
|
||||||
|
|
||||||
/* Decode a byte string from the locale encoding with the
|
/* Decode a byte string from the locale encoding with the
|
||||||
|
@ -23,6 +27,17 @@
|
||||||
wchar_t*
|
wchar_t*
|
||||||
_Py_char2wchar(const char* arg, size_t *size)
|
_Py_char2wchar(const char* arg, size_t *size)
|
||||||
{
|
{
|
||||||
|
#ifdef __APPLE__
|
||||||
|
wchar_t *wstr;
|
||||||
|
wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
|
||||||
|
if (size != NULL) {
|
||||||
|
if (wstr != NULL)
|
||||||
|
*size = wcslen(wstr);
|
||||||
|
else
|
||||||
|
*size = (size_t)-1;
|
||||||
|
}
|
||||||
|
return wstr;
|
||||||
|
#else
|
||||||
wchar_t *res;
|
wchar_t *res;
|
||||||
#ifdef HAVE_BROKEN_MBSTOWCS
|
#ifdef HAVE_BROKEN_MBSTOWCS
|
||||||
/* Some platforms have a broken implementation of
|
/* Some platforms have a broken implementation of
|
||||||
|
@ -107,7 +122,7 @@ _Py_char2wchar(const char* arg, size_t *size)
|
||||||
argsize -= converted;
|
argsize -= converted;
|
||||||
out++;
|
out++;
|
||||||
}
|
}
|
||||||
#else
|
#else /* HAVE_MBRTOWC */
|
||||||
/* Cannot use C locale for escaping; manually escape as if charset
|
/* Cannot use C locale for escaping; manually escape as if charset
|
||||||
is ASCII (i.e. escape all bytes > 128. This will still roundtrip
|
is ASCII (i.e. escape all bytes > 128. This will still roundtrip
|
||||||
correctly in the locale's charset, which must be an ASCII superset. */
|
correctly in the locale's charset, which must be an ASCII superset. */
|
||||||
|
@ -121,13 +136,14 @@ _Py_char2wchar(const char* arg, size_t *size)
|
||||||
else
|
else
|
||||||
*out++ = 0xdc00 + *in++;
|
*out++ = 0xdc00 + *in++;
|
||||||
*out = 0;
|
*out = 0;
|
||||||
#endif
|
#endif /* HAVE_MBRTOWC */
|
||||||
if (size != NULL)
|
if (size != NULL)
|
||||||
*size = out - res;
|
*size = out - res;
|
||||||
return res;
|
return res;
|
||||||
oom:
|
oom:
|
||||||
fprintf(stderr, "out of memory\n");
|
fprintf(stderr, "out of memory\n");
|
||||||
return NULL;
|
return NULL;
|
||||||
|
#endif /* __APPLE__ */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Encode a (wide) character string to the locale encoding with the
|
/* Encode a (wide) character string to the locale encoding with the
|
||||||
|
@ -144,14 +160,42 @@ oom:
|
||||||
char*
|
char*
|
||||||
_Py_wchar2char(const wchar_t *text, size_t *error_pos)
|
_Py_wchar2char(const wchar_t *text, size_t *error_pos)
|
||||||
{
|
{
|
||||||
|
#ifdef __APPLE__
|
||||||
|
Py_ssize_t len;
|
||||||
|
PyObject *unicode, *bytes = NULL;
|
||||||
|
char *cpath;
|
||||||
|
|
||||||
|
unicode = PyUnicode_FromWideChar(text, wcslen(text));
|
||||||
|
if (unicode == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
|
||||||
|
Py_DECREF(unicode);
|
||||||
|
if (bytes == NULL) {
|
||||||
|
PyErr_Clear();
|
||||||
|
if (error_pos != NULL)
|
||||||
|
*error_pos = (size_t)-1;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
len = PyBytes_GET_SIZE(bytes);
|
||||||
|
cpath = PyMem_Malloc(len+1);
|
||||||
|
if (cpath == NULL) {
|
||||||
|
PyErr_Clear();
|
||||||
|
Py_DECREF(bytes);
|
||||||
|
if (error_pos != NULL)
|
||||||
|
*error_pos = (size_t)-1;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
memcpy(cpath, PyBytes_AsString(bytes), len + 1);
|
||||||
|
Py_DECREF(bytes);
|
||||||
|
return cpath;
|
||||||
|
#else /* __APPLE__ */
|
||||||
const size_t len = wcslen(text);
|
const size_t len = wcslen(text);
|
||||||
char *result = NULL, *bytes = NULL;
|
char *result = NULL, *bytes = NULL;
|
||||||
size_t i, size, converted;
|
size_t i, size, converted;
|
||||||
wchar_t c, buf[2];
|
wchar_t c, buf[2];
|
||||||
|
|
||||||
if (error_pos != NULL)
|
|
||||||
*error_pos = (size_t)-1;
|
|
||||||
|
|
||||||
/* The function works in two steps:
|
/* The function works in two steps:
|
||||||
1. compute the length of the output buffer in bytes (size)
|
1. compute the length of the output buffer in bytes (size)
|
||||||
2. outputs the bytes */
|
2. outputs the bytes */
|
||||||
|
@ -198,11 +242,15 @@ _Py_wchar2char(const wchar_t *text, size_t *error_pos)
|
||||||
|
|
||||||
size += 1; /* nul byte at the end */
|
size += 1; /* nul byte at the end */
|
||||||
result = PyMem_Malloc(size);
|
result = PyMem_Malloc(size);
|
||||||
if (result == NULL)
|
if (result == NULL) {
|
||||||
|
if (error_pos != NULL)
|
||||||
|
*error_pos = (size_t)-1;
|
||||||
return NULL;
|
return NULL;
|
||||||
|
}
|
||||||
bytes = result;
|
bytes = result;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
|
#endif /* __APPLE__ */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* In principle, this should use HAVE__WSTAT, and _wstat
|
/* In principle, this should use HAVE__WSTAT, and _wstat
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue