mirror of
https://github.com/python/cpython.git
synced 2025-08-31 05:58:33 +00:00
bpo-42236: Use UTF-8 encoding if nl_langinfo(CODESET) fails (GH-23086)
If the nl_langinfo(CODESET) function returns an empty string, Python now uses UTF-8 as the filesystem encoding. In May 2010 (commitb744ba1d14
), I modified Python to log a warning and use UTF-8 as the filesystem encoding (instead of None) if nl_langinfo(CODESET) returns an empty string. In August 2020 (commit94908bbc15
), I modified Python startup to fail with a fatal error and a specific error message if nl_langinfo(CODESET) returns an empty string. The intent was to prevent guessing the encoding and also investigate user configuration where this case happens. In 10 years (2010 to 2020), I saw zero user report about the error message related to nl_langinfo(CODESET) returning an empty string. Today, UTF-8 became the defacto standard and it's safe to make the assumption that the user expects UTF-8. For example, nl_langinfo(CODESET) can return an empty string on macOS if the LC_CTYPE locale is not supported, and UTF-8 is the default encoding on macOS. While this change is likely to not affect anyone in practice, it should make UTF-8 lover happy ;-) Rewrite also the documentation explaining how Python selects the filesystem encoding and error handler.
This commit is contained in:
parent
82458b6cdb
commit
e662c398d8
8 changed files with 87 additions and 89 deletions
|
@ -826,20 +826,15 @@ _Py_EncodeLocaleEx(const wchar_t *text, char **str,
|
|||
// - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
|
||||
// - Return "UTF-8" if the UTF-8 Mode is enabled
|
||||
// - On Windows, return the ANSI code page (ex: "cp1250")
|
||||
// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string
|
||||
// and if the _Py_FORCE_UTF8_FS_ENCODING macro is defined (ex: on macOS).
|
||||
// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string.
|
||||
// - Otherwise, return nl_langinfo(CODESET).
|
||||
//
|
||||
// Return NULL and set errmsg to an error message
|
||||
// if nl_langinfo(CODESET) fails.
|
||||
//
|
||||
// Return NULL and set errmsg to NULL on memory allocation failure.
|
||||
// Return NULL on memory allocation failure.
|
||||
//
|
||||
// See also config_get_locale_encoding()
|
||||
wchar_t*
|
||||
_Py_GetLocaleEncoding(const char **errmsg)
|
||||
_Py_GetLocaleEncoding(void)
|
||||
{
|
||||
*errmsg = NULL;
|
||||
#ifdef _Py_FORCE_UTF8_LOCALE
|
||||
// On Android langinfo.h and CODESET are missing,
|
||||
// and UTF-8 is always used in mbstowcs() and wcstombs().
|
||||
|
@ -859,21 +854,14 @@ _Py_GetLocaleEncoding(const char **errmsg)
|
|||
#else
|
||||
const char *encoding = nl_langinfo(CODESET);
|
||||
if (!encoding || encoding[0] == '\0') {
|
||||
#ifdef _Py_FORCE_UTF8_FS_ENCODING
|
||||
// nl_langinfo() can return an empty string when the LC_CTYPE locale is
|
||||
// not supported. Default to UTF-8 in that case, because UTF-8 is the
|
||||
// default charset on macOS.
|
||||
// Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
|
||||
// macOS if the LC_CTYPE locale is not supported.
|
||||
return _PyMem_RawWcsdup(L"UTF-8");
|
||||
#else
|
||||
*errmsg = "failed to get the locale encoding: "
|
||||
"nl_langinfo(CODESET) returns an empty string";
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
wchar_t *wstr;
|
||||
int res = decode_current_locale(encoding, &wstr, NULL,
|
||||
errmsg, _Py_ERROR_SURROGATEESCAPE);
|
||||
NULL, _Py_ERROR_SURROGATEESCAPE);
|
||||
if (res < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
@ -887,15 +875,9 @@ _Py_GetLocaleEncoding(const char **errmsg)
|
|||
PyObject *
|
||||
_Py_GetLocaleEncodingObject(void)
|
||||
{
|
||||
const char *errmsg;
|
||||
wchar_t *encoding = _Py_GetLocaleEncoding(&errmsg);
|
||||
wchar_t *encoding = _Py_GetLocaleEncoding();
|
||||
if (encoding == NULL) {
|
||||
if (errmsg != NULL) {
|
||||
PyErr_SetString(PyExc_ValueError, errmsg);
|
||||
}
|
||||
else {
|
||||
PyErr_NoMemory();
|
||||
}
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue