mirror of
https://github.com/python/cpython.git
synced 2025-07-29 22:24:49 +00:00

If the nl_langinfo(CODESET) function returns an empty string, Python now uses UTF-8 as the filesystem encoding. In May 2010 (commitb744ba1d14
), I modified Python to log a warning and use UTF-8 as the filesystem encoding (instead of None) if nl_langinfo(CODESET) returns an empty string. In August 2020 (commit94908bbc15
), I modified Python startup to fail with a fatal error and a specific error message if nl_langinfo(CODESET) returns an empty string. The intent was to prevent guessing the encoding and also investigate user configuration where this case happens. In 10 years (2010 to 2020), I saw zero user report about the error message related to nl_langinfo(CODESET) returning an empty string. Today, UTF-8 became the defacto standard and it's safe to make the assumption that the user expects UTF-8. For example, nl_langinfo(CODESET) can return an empty string on macOS if the LC_CTYPE locale is not supported, and UTF-8 is the default encoding on macOS. While this change is likely to not affect anyone in practice, it should make UTF-8 lover happy ;-) Rewrite also the documentation explaining how Python selects the filesystem encoding and error handler.
59 lines
1.4 KiB
C
59 lines
1.4 KiB
C
#ifndef Py_INTERNAL_FILEUTILS_H
|
|
#define Py_INTERNAL_FILEUTILS_H
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#ifndef Py_BUILD_CORE
|
|
# error "Py_BUILD_CORE must be defined to include this header"
|
|
#endif
|
|
|
|
#include <locale.h> /* struct lconv */
|
|
|
|
PyAPI_DATA(int) _Py_HasFileSystemDefaultEncodeErrors;
|
|
|
|
PyAPI_FUNC(int) _Py_DecodeUTF8Ex(
|
|
const char *arg,
|
|
Py_ssize_t arglen,
|
|
wchar_t **wstr,
|
|
size_t *wlen,
|
|
const char **reason,
|
|
_Py_error_handler errors);
|
|
|
|
PyAPI_FUNC(int) _Py_EncodeUTF8Ex(
|
|
const wchar_t *text,
|
|
char **str,
|
|
size_t *error_pos,
|
|
const char **reason,
|
|
int raw_malloc,
|
|
_Py_error_handler errors);
|
|
|
|
PyAPI_FUNC(wchar_t*) _Py_DecodeUTF8_surrogateescape(
|
|
const char *arg,
|
|
Py_ssize_t arglen,
|
|
size_t *wlen);
|
|
|
|
PyAPI_FUNC(int) _Py_GetForceASCII(void);
|
|
|
|
/* Reset "force ASCII" mode (if it was initialized).
|
|
|
|
This function should be called when Python changes the LC_CTYPE locale,
|
|
so the "force ASCII" mode can be detected again on the new locale
|
|
encoding. */
|
|
PyAPI_FUNC(void) _Py_ResetForceASCII(void);
|
|
|
|
|
|
PyAPI_FUNC(int) _Py_GetLocaleconvNumeric(
|
|
struct lconv *lc,
|
|
PyObject **decimal_point,
|
|
PyObject **thousands_sep);
|
|
|
|
PyAPI_FUNC(void) _Py_closerange(int first, int last);
|
|
|
|
PyAPI_FUNC(wchar_t*) _Py_GetLocaleEncoding(void);
|
|
PyAPI_FUNC(PyObject*) _Py_GetLocaleEncodingObject(void);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
#endif /* !Py_INTERNAL_FILEUTILS_H */
|