bpo-36775: _PyCoreConfig only uses wchar_t* (GH-13062)

_PyCoreConfig: Change filesystem_encoding, filesystem_errors,
stdio_encoding and stdio_errors fields type from char* to wchar_t*.

Changes:

* PyInterpreterState: replace fscodec_initialized (int) with fs_codec
  structure.
* Add get_error_handler_wide() and unicode_encode_utf8() helper
  functions.
* Add error_handler parameter to unicode_encode_locale()
  and unicode_decode_locale().
* Remove _PyCoreConfig_SetString().
* Rename _PyCoreConfig_SetWideString() to _PyCoreConfig_SetString().
* Rename _PyCoreConfig_SetWideStringFromString()
  to _PyCoreConfig_DecodeLocale().
This commit is contained in:
Victor Stinner 2019-05-02 14:56:30 -04:00 committed by GitHub
parent 6ae2bbbdfc
commit 709d23dee6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 357 additions and 220 deletions

View file

@ -523,27 +523,7 @@ _PyCoreConfig_Clear(_PyCoreConfig *config)
/* Copy str into *config_str (duplicate the string) */
_PyInitError
_PyCoreConfig_SetString(char **config_str, const char *str)
{
char *str2;
if (str != NULL) {
str2 = _PyMem_RawStrdup(str);
if (str2 == NULL) {
return _Py_INIT_NO_MEMORY();
}
}
else {
str2 = NULL;
}
PyMem_RawFree(*config_str);
*config_str = str2;
return _Py_INIT_OK();
}
/* Copy str into *config_str (duplicate the string) */
_PyInitError
_PyCoreConfig_SetWideString(wchar_t **config_str, const wchar_t *str)
_PyCoreConfig_SetString(wchar_t **config_str, const wchar_t *str)
{
wchar_t *str2;
if (str != NULL) {
@ -563,8 +543,8 @@ _PyCoreConfig_SetWideString(wchar_t **config_str, const wchar_t *str)
/* Decode str using Py_DecodeLocale() and set the result into *config_str */
static _PyInitError
_PyCoreConfig_SetWideStringFromStringErr(wchar_t **config_str, const char *str,
const char *decode_err_msg)
_PyCoreConfig_DecodeLocaleErr(wchar_t **config_str, const char *str,
const char *decode_err_msg)
{
wchar_t *str2;
if (str != NULL) {
@ -588,17 +568,15 @@ _PyCoreConfig_SetWideStringFromStringErr(wchar_t **config_str, const char *str,
}
_PyInitError
_PyCoreConfig_SetWideStringFromString(wchar_t **config_str, const char *str)
{
return _PyCoreConfig_SetWideStringFromStringErr(
config_str, str, "cannot decode string");
}
#define CONFIG_DECODE_LOCALE(config_str, str, NAME) \
_PyCoreConfig_SetWideStringFromStringErr(config_str, str, \
"cannot decode " NAME)
_PyCoreConfig_DecodeLocaleErr(config_str, str, "cannot decode " NAME)
_PyInitError
_PyCoreConfig_DecodeLocale(wchar_t **config_str, const char *str)
{
return CONFIG_DECODE_LOCALE(config_str, str, "string");
}
_PyInitError
@ -608,16 +586,9 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
_PyCoreConfig_Clear(config);
#define COPY_ATTR(ATTR) config->ATTR = config2->ATTR
#define COPY_STR_ATTR(ATTR) \
do { \
err = _PyCoreConfig_SetString(&config->ATTR, config2->ATTR); \
if (_Py_INIT_FAILED(err)) { \
return err; \
} \
} while (0)
#define COPY_WSTR_ATTR(ATTR) \
do { \
err = _PyCoreConfig_SetWideString(&config->ATTR, config2->ATTR); \
err = _PyCoreConfig_SetString(&config->ATTR, config2->ATTR); \
if (_Py_INIT_FAILED(err)) { \
return err; \
} \
@ -676,10 +647,10 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
COPY_ATTR(quiet);
COPY_ATTR(user_site_directory);
COPY_ATTR(buffered_stdio);
COPY_STR_ATTR(filesystem_encoding);
COPY_STR_ATTR(filesystem_errors);
COPY_STR_ATTR(stdio_encoding);
COPY_STR_ATTR(stdio_errors);
COPY_WSTR_ATTR(filesystem_encoding);
COPY_WSTR_ATTR(filesystem_errors);
COPY_WSTR_ATTR(stdio_encoding);
COPY_WSTR_ATTR(stdio_errors);
#ifdef MS_WINDOWS
COPY_ATTR(legacy_windows_stdio);
#endif
@ -692,7 +663,6 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
COPY_ATTR(_init_main);
#undef COPY_ATTR
#undef COPY_STR_ATTR
#undef COPY_WSTR_ATTR
#undef COPY_WSTRLIST
return _Py_INIT_OK();
@ -721,16 +691,10 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config)
goto fail; \
} \
} while (0)
#define FROM_STRING(STR) \
((STR != NULL) ? \
PyUnicode_FromString(STR) \
: (Py_INCREF(Py_None), Py_None))
#define SET_ITEM_INT(ATTR) \
SET_ITEM(#ATTR, PyLong_FromLong(config->ATTR))
#define SET_ITEM_UINT(ATTR) \
SET_ITEM(#ATTR, PyLong_FromUnsignedLong(config->ATTR))
#define SET_ITEM_STR(ATTR) \
SET_ITEM(#ATTR, FROM_STRING(config->ATTR))
#define FROM_WSTRING(STR) \
((STR != NULL) ? \
PyUnicode_FromWideChar(STR, -1) \
@ -753,8 +717,8 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config)
SET_ITEM_INT(show_alloc_count);
SET_ITEM_INT(dump_refs);
SET_ITEM_INT(malloc_stats);
SET_ITEM_STR(filesystem_encoding);
SET_ITEM_STR(filesystem_errors);
SET_ITEM_WSTR(filesystem_encoding);
SET_ITEM_WSTR(filesystem_errors);
SET_ITEM_WSTR(pycache_prefix);
SET_ITEM_WSTR(program_name);
SET_ITEM_WSTRLIST(argv);
@ -783,8 +747,8 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config)
SET_ITEM_INT(quiet);
SET_ITEM_INT(user_site_directory);
SET_ITEM_INT(buffered_stdio);
SET_ITEM_STR(stdio_encoding);
SET_ITEM_STR(stdio_errors);
SET_ITEM_WSTR(stdio_encoding);
SET_ITEM_WSTR(stdio_errors);
#ifdef MS_WINDOWS
SET_ITEM_INT(legacy_windows_stdio);
#endif
@ -803,12 +767,10 @@ fail:
Py_DECREF(dict);
return NULL;
#undef FROM_STRING
#undef FROM_WSTRING
#undef SET_ITEM
#undef SET_ITEM_INT
#undef SET_ITEM_UINT
#undef SET_ITEM_STR
#undef SET_ITEM_WSTR
#undef SET_ITEM_WSTRLIST
}
@ -845,7 +807,7 @@ _PyCoreConfig_GetEnvDup(const _PyCoreConfig *config,
return _Py_INIT_OK();
}
return _PyCoreConfig_SetWideString(dest, var);
return _PyCoreConfig_SetString(dest, var);
#else
const char *var = getenv(name);
if (!var || var[0] == '\0') {
@ -853,7 +815,7 @@ _PyCoreConfig_GetEnvDup(const _PyCoreConfig *config,
return _Py_INIT_OK();
}
return _PyCoreConfig_SetWideStringFromStringErr(dest, var, decode_err_msg);
return _PyCoreConfig_DecodeLocaleErr(dest, var, decode_err_msg);
#endif
}
@ -996,8 +958,7 @@ config_init_program_name(_PyCoreConfig *config)
/* Use argv[0] by default, if available */
if (config->program != NULL) {
err = _PyCoreConfig_SetWideString(&config->program_name,
config->program);
err = _PyCoreConfig_SetString(&config->program_name, config->program);
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -1010,7 +971,7 @@ config_init_program_name(_PyCoreConfig *config)
#else
const wchar_t *default_program_name = L"python3";
#endif
err = _PyCoreConfig_SetWideString(&config->program_name, default_program_name);
err = _PyCoreConfig_SetString(&config->program_name, default_program_name);
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -1025,8 +986,8 @@ config_init_executable(_PyCoreConfig *config)
/* If Py_SetProgramFullPath() was called, use its value */
const wchar_t *program_full_path = _Py_path_config.program_full_path;
if (program_full_path != NULL) {
_PyInitError err = _PyCoreConfig_SetWideString(&config->executable,
program_full_path);
_PyInitError err = _PyCoreConfig_SetString(&config->executable,
program_full_path);
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -1051,7 +1012,7 @@ config_init_home(_PyCoreConfig *config)
/* If Py_SetPythonHome() was called, use its value */
wchar_t *home = _Py_path_config.home;
if (home) {
_PyInitError err = _PyCoreConfig_SetWideString(&config->home, home);
_PyInitError err = _PyCoreConfig_SetString(&config->home, home);
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -1280,7 +1241,7 @@ config_read_complex_options(_PyCoreConfig *config)
}
static const char *
static const wchar_t *
config_get_stdio_errors(const _PyCoreConfig *config)
{
#ifndef MS_WINDOWS
@ -1288,43 +1249,44 @@ config_get_stdio_errors(const _PyCoreConfig *config)
if (loc != NULL) {
/* surrogateescape is the default in the legacy C and POSIX locales */
if (strcmp(loc, "C") == 0 || strcmp(loc, "POSIX") == 0) {
return "surrogateescape";
return L"surrogateescape";
}
#ifdef PY_COERCE_C_LOCALE
/* surrogateescape is the default in locale coercion target locales */
if (_Py_IsLocaleCoercionTarget(loc)) {
return "surrogateescape";
return L"surrogateescape";
}
#endif
}
return "strict";
return L"strict";
#else
/* On Windows, always use surrogateescape by default */
return "surrogateescape";
return L"surrogateescape";
#endif
}
static _PyInitError
config_get_locale_encoding(char **locale_encoding)
config_get_locale_encoding(wchar_t **locale_encoding)
{
#ifdef MS_WINDOWS
char encoding[20];
PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
return _PyCoreConfig_DecodeLocale(locale_encoding, encoding);
#elif defined(_Py_FORCE_UTF8_LOCALE)
const char *encoding = "UTF-8";
return _PyCoreConfig_SetString(locale_encoding, L"utf-8");
#else
const char *encoding = nl_langinfo(CODESET);
if (!encoding || encoding[0] == '\0') {
return _Py_INIT_ERR("failed to get the locale encoding: "
"nl_langinfo(CODESET) failed");
}
/* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */
return CONFIG_DECODE_LOCALE(locale_encoding, encoding,
"nl_langinfo(CODESET)");
#endif
assert(*locale_encoding == NULL);
return _PyCoreConfig_SetString(locale_encoding, encoding);
}
@ -1337,16 +1299,18 @@ config_init_stdio_encoding(_PyCoreConfig *config,
/* If Py_SetStandardStreamEncoding() have been called, use these
parameters. */
if (config->stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL) {
err = _PyCoreConfig_SetString(&config->stdio_encoding,
_Py_StandardStreamEncoding);
err = CONFIG_DECODE_LOCALE(&config->stdio_encoding,
_Py_StandardStreamEncoding,
"_Py_StandardStreamEncoding");
if (_Py_INIT_FAILED(err)) {
return err;
}
}
if (config->stdio_errors == NULL && _Py_StandardStreamErrors != NULL) {
err = _PyCoreConfig_SetString(&config->stdio_errors,
_Py_StandardStreamErrors);
err = CONFIG_DECODE_LOCALE(&config->stdio_errors,
_Py_StandardStreamErrors,
"_Py_StandardStreamErrors");
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -1359,11 +1323,9 @@ config_init_stdio_encoding(_PyCoreConfig *config,
/* PYTHONIOENCODING environment variable */
const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONIOENCODING");
if (opt) {
/* _PyCoreConfig_SetString() requires dest to be initialized to NULL */
char *pythonioencoding = NULL;
err = _PyCoreConfig_SetString(&pythonioencoding, opt);
if (_Py_INIT_FAILED(err)) {
return err;
char *pythonioencoding = _PyMem_RawStrdup(opt);
if (pythonioencoding == NULL) {
return _Py_INIT_NO_MEMORY();
}
char *errors = strchr(pythonioencoding, ':');
@ -1378,8 +1340,9 @@ config_init_stdio_encoding(_PyCoreConfig *config,
/* Does PYTHONIOENCODING contain an encoding? */
if (pythonioencoding[0]) {
if (config->stdio_encoding == NULL) {
err = _PyCoreConfig_SetString(&config->stdio_encoding,
pythonioencoding);
err = CONFIG_DECODE_LOCALE(&config->stdio_encoding,
pythonioencoding,
"PYTHONIOENCODING environment variable");
if (_Py_INIT_FAILED(err)) {
PyMem_RawFree(pythonioencoding);
return err;
@ -1396,7 +1359,9 @@ config_init_stdio_encoding(_PyCoreConfig *config,
}
if (config->stdio_errors == NULL && errors != NULL) {
err = _PyCoreConfig_SetString(&config->stdio_errors, errors);
err = CONFIG_DECODE_LOCALE(&config->stdio_errors,
errors,
"PYTHONIOENCODING environment variable");
if (_Py_INIT_FAILED(err)) {
PyMem_RawFree(pythonioencoding);
return err;
@ -1409,15 +1374,14 @@ config_init_stdio_encoding(_PyCoreConfig *config,
/* UTF-8 Mode uses UTF-8/surrogateescape */
if (preconfig->utf8_mode) {
if (config->stdio_encoding == NULL) {
err = _PyCoreConfig_SetString(&config->stdio_encoding,
"utf-8");
err = _PyCoreConfig_SetString(&config->stdio_encoding, L"utf-8");
if (_Py_INIT_FAILED(err)) {
return err;
}
}
if (config->stdio_errors == NULL) {
err = _PyCoreConfig_SetString(&config->stdio_errors,
"surrogateescape");
L"surrogateescape");
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -1432,7 +1396,7 @@ config_init_stdio_encoding(_PyCoreConfig *config,
}
}
if (config->stdio_errors == NULL) {
const char *errors = config_get_stdio_errors(config);
const wchar_t *errors = config_get_stdio_errors(config);
assert(errors != NULL);
err = _PyCoreConfig_SetString(&config->stdio_errors, errors);
@ -1452,33 +1416,32 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig)
if (config->filesystem_encoding == NULL) {
#ifdef _Py_FORCE_UTF8_FS_ENCODING
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8");
err = _PyCoreConfig_SetString(&config->filesystem_encoding, L"utf-8");
#else
#ifdef MS_WINDOWS
if (preconfig->legacy_windows_fs_encoding) {
/* Legacy Windows filesystem encoding: mbcs/replace */
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"mbcs");
L"mbcs");
}
else
#endif
if (preconfig->utf8_mode) {
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8");
L"utf-8");
}
#ifndef MS_WINDOWS
else if (_Py_GetForceASCII()) {
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"ascii");
L"ascii");
}
#endif
else {
#ifdef MS_WINDOWS
/* Windows defaults to utf-8/surrogatepass (PEP 529). */
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8");
L"utf-8");
#else
err = config_get_locale_encoding(&config->filesystem_encoding);
#endif
@ -1491,16 +1454,16 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig)
}
if (config->filesystem_errors == NULL) {
const char *errors;
const wchar_t *errors;
#ifdef MS_WINDOWS
if (preconfig->legacy_windows_fs_encoding) {
errors = "replace";
errors = L"replace";
}
else {
errors = "surrogatepass";
errors = L"surrogatepass";
}
#else
errors = "surrogateescape";
errors = L"surrogateescape";
#endif
err = _PyCoreConfig_SetString(&config->filesystem_errors, errors);
if (_Py_INIT_FAILED(err)) {
@ -1745,8 +1708,8 @@ config_parse_cmdline(_PyCoreConfig *config, _PyPreCmdline *precmdline,
|| wcscmp(_PyOS_optarg, L"never") == 0
|| wcscmp(_PyOS_optarg, L"default") == 0)
{
err = _PyCoreConfig_SetWideString(&config->check_hash_pycs_mode,
_PyOS_optarg);
err = _PyCoreConfig_SetString(&config->check_hash_pycs_mode,
_PyOS_optarg);
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -2119,7 +2082,7 @@ config_read_cmdline(_PyCoreConfig *config, _PyPreCmdline *precmdline)
}
if (config->check_hash_pycs_mode == NULL) {
err = _PyCoreConfig_SetWideString(&config->check_hash_pycs_mode, L"default");
err = _PyCoreConfig_SetString(&config->check_hash_pycs_mode, L"default");
if (_Py_INIT_FAILED(err)) {
goto done;
}