mirror of
https://github.com/python/cpython.git
synced 2025-08-27 12:16:04 +00:00
bpo-34523: Add _PyCoreConfig.filesystem_encoding (GH-8963)
_PyCoreConfig_Read() is now responsible to choose the filesystem encoding and error handler. Using Py_Main(), the encoding is now chosen even before calling Py_Initialize(). _PyCoreConfig.filesystem_encoding is now the reference, instead of Py_FileSystemDefaultEncoding, for the Python filesystem encoding. Changes: * Add filesystem_encoding and filesystem_errors to _PyCoreConfig * _PyCoreConfig_Read() now reads the locale encoding for the file system encoding. * PyUnicode_EncodeFSDefault() and PyUnicode_DecodeFSDefaultAndSize() now use the interpreter configuration rather than Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors global configuration variables. * Add _Py_SetFileSystemEncoding() and _Py_ClearFileSystemEncoding() private functions to only modify Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors in coreconfig.c. * _Py_CoerceLegacyLocale() now takes an int rather than _PyCoreConfig for the warning.
This commit is contained in:
parent
dfe0dc7453
commit
b2457efc78
12 changed files with 301 additions and 105 deletions
|
@ -5,6 +5,11 @@
|
|||
# include <langinfo.h>
|
||||
#endif
|
||||
|
||||
#include <locale.h> /* setlocale() */
|
||||
#ifdef HAVE_LANGINFO_H
|
||||
#include <langinfo.h> /* nl_langinfo(CODESET) */
|
||||
#endif
|
||||
|
||||
|
||||
#define DECODE_LOCALE_ERR(NAME, LEN) \
|
||||
(((LEN) == -2) \
|
||||
|
@ -32,6 +37,8 @@ const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */
|
|||
int Py_HasFileSystemDefaultEncoding = 0;
|
||||
#endif
|
||||
const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape";
|
||||
static int _Py_HasFileSystemDefaultEncodeErrors = 1;
|
||||
|
||||
/* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change
|
||||
stdin and stdout error handler to "surrogateescape". It is equal to
|
||||
-1 by default: unknown, will be set by Py_Main() */
|
||||
|
@ -88,6 +95,47 @@ _Py_wstrlist_copy(int len, wchar_t **list)
|
|||
}
|
||||
|
||||
|
||||
void
|
||||
_Py_ClearFileSystemEncoding(void)
|
||||
{
|
||||
if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
|
||||
PyMem_RawFree((char*)Py_FileSystemDefaultEncoding);
|
||||
Py_FileSystemDefaultEncoding = NULL;
|
||||
}
|
||||
if (!_Py_HasFileSystemDefaultEncodeErrors && Py_FileSystemDefaultEncodeErrors) {
|
||||
PyMem_RawFree((char*)Py_FileSystemDefaultEncodeErrors);
|
||||
Py_FileSystemDefaultEncodeErrors = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
|
||||
global configuration variables. */
|
||||
int
|
||||
_Py_SetFileSystemEncoding(const char *encoding, const char *errors)
|
||||
{
|
||||
char *encoding2 = _PyMem_RawStrdup(encoding);
|
||||
if (encoding2 == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *errors2 = _PyMem_RawStrdup(errors);
|
||||
if (errors2 == NULL) {
|
||||
PyMem_RawFree(encoding2);
|
||||
return -1;
|
||||
}
|
||||
|
||||
_Py_ClearFileSystemEncoding();
|
||||
|
||||
Py_FileSystemDefaultEncoding = encoding2;
|
||||
Py_HasFileSystemDefaultEncoding = 0;
|
||||
|
||||
Py_FileSystemDefaultEncodeErrors = errors2;
|
||||
_Py_HasFileSystemDefaultEncodeErrors = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Helper to allow an embedding application to override the normal
|
||||
* mechanism that attempts to figure out an appropriate IO encoding
|
||||
*/
|
||||
|
@ -209,6 +257,8 @@ _PyCoreConfig_Clear(_PyCoreConfig *config)
|
|||
#endif
|
||||
CLEAR(config->base_exec_prefix);
|
||||
|
||||
CLEAR(config->filesystem_encoding);
|
||||
CLEAR(config->filesystem_errors);
|
||||
CLEAR(config->stdio_encoding);
|
||||
CLEAR(config->stdio_errors);
|
||||
#undef CLEAR
|
||||
|
@ -302,6 +352,8 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
|
|||
COPY_ATTR(quiet);
|
||||
COPY_ATTR(user_site_directory);
|
||||
COPY_ATTR(buffered_stdio);
|
||||
COPY_STR_ATTR(filesystem_encoding);
|
||||
COPY_STR_ATTR(filesystem_errors);
|
||||
COPY_STR_ATTR(stdio_encoding);
|
||||
COPY_STR_ATTR(stdio_errors);
|
||||
#ifdef MS_WINDOWS
|
||||
|
@ -312,6 +364,7 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
|
|||
COPY_ATTR(_frozen);
|
||||
|
||||
#undef COPY_ATTR
|
||||
#undef COPY_STR_ATTR
|
||||
#undef COPY_WSTR_ATTR
|
||||
#undef COPY_WSTRLIST
|
||||
return 0;
|
||||
|
@ -976,8 +1029,8 @@ get_stdio_errors(const _PyCoreConfig *config)
|
|||
}
|
||||
|
||||
|
||||
_PyInitError
|
||||
_Py_get_locale_encoding(char **locale_encoding)
|
||||
static _PyInitError
|
||||
get_locale_encoding(char **locale_encoding)
|
||||
{
|
||||
#ifdef MS_WINDOWS
|
||||
char encoding[20];
|
||||
|
@ -1087,7 +1140,7 @@ config_init_stdio_encoding(_PyCoreConfig *config)
|
|||
|
||||
/* Choose the default error handler based on the current locale. */
|
||||
if (config->stdio_encoding == NULL) {
|
||||
_PyInitError err = _Py_get_locale_encoding(&config->stdio_encoding);
|
||||
_PyInitError err = get_locale_encoding(&config->stdio_encoding);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
@ -1104,6 +1157,81 @@ config_init_stdio_encoding(_PyCoreConfig *config)
|
|||
}
|
||||
|
||||
|
||||
static _PyInitError
|
||||
config_init_fs_encoding(_PyCoreConfig *config)
|
||||
{
|
||||
#ifdef MS_WINDOWS
|
||||
if (config->legacy_windows_fs_encoding) {
|
||||
/* Legacy Windows filesystem encoding: mbcs/replace */
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
config->filesystem_encoding = _PyMem_RawStrdup("mbcs");
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
}
|
||||
if (config->filesystem_errors == NULL) {
|
||||
config->filesystem_errors = _PyMem_RawStrdup("replace");
|
||||
if (config->filesystem_errors == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Windows defaults to utf-8/surrogatepass (PEP 529) */
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
config->filesystem_encoding = _PyMem_RawStrdup("utf-8");
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
}
|
||||
if (config->filesystem_errors == NULL) {
|
||||
config->filesystem_errors = _PyMem_RawStrdup("surrogatepass");
|
||||
if (config->filesystem_errors == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (config->utf8_mode) {
|
||||
/* UTF-8 Mode use: utf-8/surrogateescape */
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
config->filesystem_encoding = _PyMem_RawStrdup("utf-8");
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
}
|
||||
/* errors defaults to surrogateescape above */
|
||||
}
|
||||
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
/* macOS and Android use UTF-8, other platforms use
|
||||
the locale encoding. */
|
||||
char *locale_encoding;
|
||||
#if defined(__APPLE__) || defined(__ANDROID__)
|
||||
locale_encoding = "UTF-8";
|
||||
#else
|
||||
_PyInitError err = get_locale_encoding(&locale_encoding);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
config->filesystem_encoding = _PyMem_RawStrdup(locale_encoding);
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
}
|
||||
|
||||
if (config->filesystem_errors == NULL) {
|
||||
/* by default, use the "surrogateescape" error handler */
|
||||
config->filesystem_errors = _PyMem_RawStrdup("surrogateescape");
|
||||
if (config->filesystem_errors == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
|
||||
|
||||
/* Read configuration settings from standard locations
|
||||
*
|
||||
* This function doesn't make any changes to the interpreter state - it
|
||||
|
@ -1216,6 +1344,13 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
|
|||
config->argc = 0;
|
||||
}
|
||||
|
||||
if (config->filesystem_encoding == NULL && config->filesystem_errors == NULL) {
|
||||
err = config_init_fs_encoding(config);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = config_init_stdio_encoding(config);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
|
@ -1223,6 +1358,10 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
|
|||
|
||||
assert(config->coerce_c_locale >= 0);
|
||||
assert(config->use_environment >= 0);
|
||||
assert(config->filesystem_encoding != NULL);
|
||||
assert(config->filesystem_errors != NULL);
|
||||
assert(config->stdio_encoding != NULL);
|
||||
assert(config->stdio_errors != NULL);
|
||||
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
|
|
|
@ -339,7 +339,7 @@ static const char C_LOCALE_COERCION_WARNING[] =
|
|||
"or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n";
|
||||
|
||||
static void
|
||||
_coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoercionTarget *target)
|
||||
_coerce_default_locale_settings(int warn, const _LocaleCoercionTarget *target)
|
||||
{
|
||||
const char *newloc = target->locale_name;
|
||||
|
||||
|
@ -352,7 +352,7 @@ _coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoerci
|
|||
"Error setting LC_CTYPE, skipping C locale coercion\n");
|
||||
return;
|
||||
}
|
||||
if (config->coerce_c_locale_warn) {
|
||||
if (warn) {
|
||||
fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc);
|
||||
}
|
||||
|
||||
|
@ -362,7 +362,7 @@ _coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoerci
|
|||
#endif
|
||||
|
||||
void
|
||||
_Py_CoerceLegacyLocale(const _PyCoreConfig *config)
|
||||
_Py_CoerceLegacyLocale(int warn)
|
||||
{
|
||||
#ifdef PY_COERCE_C_LOCALE
|
||||
const char *locale_override = getenv("LC_ALL");
|
||||
|
@ -385,7 +385,7 @@ defined(HAVE_LANGINFO_H) && defined(CODESET)
|
|||
}
|
||||
#endif
|
||||
/* Successfully configured locale, so make it the default */
|
||||
_coerce_default_locale_settings(config, target);
|
||||
_coerce_default_locale_settings(warn, target);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -1162,11 +1162,7 @@ Py_FinalizeEx(void)
|
|||
/* Cleanup Unicode implementation */
|
||||
_PyUnicode_Fini();
|
||||
|
||||
/* reset file system default encoding */
|
||||
if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
|
||||
PyMem_RawFree((char*)Py_FileSystemDefaultEncoding);
|
||||
Py_FileSystemDefaultEncoding = NULL;
|
||||
}
|
||||
_Py_ClearFileSystemEncoding();
|
||||
|
||||
/* XXX Still allocated:
|
||||
- various static ad-hoc pointers to interned strings
|
||||
|
@ -1475,59 +1471,31 @@ add_main_module(PyInterpreterState *interp)
|
|||
static _PyInitError
|
||||
initfsencoding(PyInterpreterState *interp)
|
||||
{
|
||||
PyObject *codec;
|
||||
_PyCoreConfig *config = &interp->core_config;
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
if (Py_LegacyWindowsFSEncodingFlag) {
|
||||
Py_FileSystemDefaultEncoding = "mbcs";
|
||||
Py_FileSystemDefaultEncodeErrors = "replace";
|
||||
}
|
||||
else {
|
||||
Py_FileSystemDefaultEncoding = "utf-8";
|
||||
Py_FileSystemDefaultEncodeErrors = "surrogatepass";
|
||||
}
|
||||
#else
|
||||
if (Py_FileSystemDefaultEncoding == NULL) {
|
||||
if (interp->core_config.utf8_mode) {
|
||||
Py_FileSystemDefaultEncoding = "utf-8";
|
||||
Py_HasFileSystemDefaultEncoding = 1;
|
||||
}
|
||||
else if (_Py_GetForceASCII()) {
|
||||
Py_FileSystemDefaultEncoding = "ascii";
|
||||
Py_HasFileSystemDefaultEncoding = 1;
|
||||
}
|
||||
else {
|
||||
extern _PyInitError _Py_get_locale_encoding(char **locale_encoding);
|
||||
|
||||
char *locale_encoding;
|
||||
_PyInitError err = _Py_get_locale_encoding(&locale_encoding);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
||||
Py_FileSystemDefaultEncoding = get_codec_name(locale_encoding);
|
||||
PyMem_RawFree(locale_encoding);
|
||||
if (Py_FileSystemDefaultEncoding == NULL) {
|
||||
return _Py_INIT_ERR("failed to get the Python codec "
|
||||
"of the locale encoding");
|
||||
}
|
||||
|
||||
Py_HasFileSystemDefaultEncoding = 0;
|
||||
interp->fscodec_initialized = 1;
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* the encoding is mbcs, utf-8 or ascii */
|
||||
codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding);
|
||||
if (!codec) {
|
||||
char *encoding = get_codec_name(config->filesystem_encoding);
|
||||
if (encoding == NULL) {
|
||||
/* Such error can only occurs in critical situations: no more
|
||||
* memory, import a module of the standard library failed,
|
||||
* etc. */
|
||||
return _Py_INIT_ERR("unable to load the file system codec");
|
||||
memory, import a module of the standard library failed, etc. */
|
||||
return _Py_INIT_ERR("failed to get the Python codec "
|
||||
"of the filesystem encoding");
|
||||
}
|
||||
Py_DECREF(codec);
|
||||
|
||||
/* Update the filesystem encoding to the normalized Python codec name.
|
||||
For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
|
||||
(Python codec name). */
|
||||
PyMem_RawFree(config->filesystem_encoding);
|
||||
config->filesystem_encoding = encoding;
|
||||
|
||||
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
|
||||
global configuration variables. */
|
||||
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
|
||||
config->filesystem_errors) < 0) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
|
||||
/* PyUnicode can now use the Python codec rather than C implementation
|
||||
for the filesystem encoding */
|
||||
interp->fscodec_initialized = 1;
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
|
|
|
@ -389,11 +389,9 @@ implementation."
|
|||
static PyObject *
|
||||
sys_getfilesystemencoding(PyObject *self, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
if (Py_FileSystemDefaultEncoding)
|
||||
return PyUnicode_FromString(Py_FileSystemDefaultEncoding);
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"filesystem encoding is not initialized");
|
||||
return NULL;
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
const _PyCoreConfig *config = &interp->core_config;
|
||||
return PyUnicode_FromString(config->filesystem_encoding);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(getfilesystemencoding_doc,
|
||||
|
@ -406,11 +404,9 @@ operating system filenames."
|
|||
static PyObject *
|
||||
sys_getfilesystemencodeerrors(PyObject *self, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
if (Py_FileSystemDefaultEncodeErrors)
|
||||
return PyUnicode_FromString(Py_FileSystemDefaultEncodeErrors);
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"filesystem encoding is not initialized");
|
||||
return NULL;
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
const _PyCoreConfig *config = &interp->core_config;
|
||||
return PyUnicode_FromString(config->filesystem_errors);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(getfilesystemencodeerrors_doc,
|
||||
|
@ -1150,8 +1146,30 @@ environment variable before launching Python."
|
|||
static PyObject *
|
||||
sys_enablelegacywindowsfsencoding(PyObject *self)
|
||||
{
|
||||
Py_FileSystemDefaultEncoding = "mbcs";
|
||||
Py_FileSystemDefaultEncodeErrors = "replace";
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
_PyCoreConfig *config = &interp->core_config;
|
||||
|
||||
/* Set the filesystem encoding to mbcs/replace (PEP 529) */
|
||||
char *encoding = _PyMem_RawStrdup("mbcs");
|
||||
char *errors = _PyMem_RawStrdup("replace");
|
||||
if (encoding == NULL || errors == NULL) {
|
||||
PyMem_Free(encoding);
|
||||
PyMem_Free(errors);
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyMem_RawFree(config->filesystem_encoding);
|
||||
config->filesystem_encoding = encoding;
|
||||
PyMem_RawFree(config->filesystem_errors);
|
||||
config->filesystem_errors = errors;
|
||||
|
||||
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
|
||||
config->filesystem_errors) < 0) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue