Issue #16455: On FreeBSD and Solaris, if the locale is C, the

ASCII/surrogateescape codec is now used, instead of the locale encoding, to
decode the command line arguments. This change fixes inconsistencies with
os.fsencode() and os.fsdecode() because these operating systems announces an
ASCII locale encoding, whereas the ISO-8859-1 encoding is used in practice.
This commit is contained in:
Victor Stinner 2012-12-04 01:34:47 +01:00
parent ca9f8b21c8
commit d45c7f8d74
5 changed files with 242 additions and 41 deletions

View file

@ -2863,10 +2863,10 @@ PyUnicode_FromEncodedObject(register PyObject *obj,
/* Convert encoding to lower case and replace '_' with '-' in order to
catch e.g. UTF_8. Return 0 on error (encoding is longer than lower_len-1),
1 on success. */
static int
normalize_encoding(const char *encoding,
char *lower,
size_t lower_len)
int
_Py_normalize_encoding(const char *encoding,
char *lower,
size_t lower_len)
{
const char *e;
char *l;
@ -2908,7 +2908,7 @@ PyUnicode_Decode(const char *s,
char lower[11]; /* Enough for any encoding shortcut */
/* Shortcuts for common default encodings */
if (normalize_encoding(encoding, lower, sizeof(lower))) {
if (_Py_normalize_encoding(encoding, lower, sizeof(lower))) {
if ((strcmp(lower, "utf-8") == 0) ||
(strcmp(lower, "utf8") == 0))
return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
@ -3110,7 +3110,8 @@ locale_error_handler(const char *errors, int *surrogateescape)
*surrogateescape = 0;
return 0;
}
if (strcmp(errors, "surrogateescape") == 0) {
if (errors == "surrogateescape"
|| strcmp(errors, "surrogateescape") == 0) {
*surrogateescape = 1;
return 0;
}
@ -3148,7 +3149,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
}
if (surrogateescape) {
/* locale encoding with surrogateescape */
/* "surrogateescape" error handler */
char *str;
str = _Py_wchar2char(wstr, &error_pos);
@ -3168,6 +3169,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
PyMem_Free(str);
}
else {
/* strict mode */
size_t len, len2;
len = wcstombs(NULL, wstr, 0);
@ -3273,7 +3275,7 @@ PyUnicode_AsEncodedString(PyObject *unicode,
}
/* Shortcuts for common default encodings */
if (normalize_encoding(encoding, lower, sizeof(lower))) {
if (_Py_normalize_encoding(encoding, lower, sizeof(lower))) {
if ((strcmp(lower, "utf-8") == 0) ||
(strcmp(lower, "utf8") == 0))
{
@ -3413,8 +3415,8 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
return NULL;
}
if (surrogateescape)
{
if (surrogateescape) {
/* "surrogateescape" error handler */
wstr = _Py_char2wchar(str, &wlen);
if (wstr == NULL) {
if (wlen == (size_t)-1)
@ -3428,6 +3430,7 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
PyMem_Free(wstr);
}
else {
/* strict mode */
#ifndef HAVE_BROKEN_MBSTOWCS
wlen = mbstowcs(NULL, str, 0);
#else
@ -3447,7 +3450,6 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
return PyErr_NoMemory();
}
/* This shouldn't fail now */
wlen2 = mbstowcs(wstr, str, wlen+1);
if (wlen2 == (size_t)-1) {
if (wstr != smallbuf)