mirror of
https://github.com/python/cpython.git
synced 2025-10-17 12:18:23 +00:00
bpo-29240, bpo-32030: Py_Main() re-reads config if encoding changes (#4899)
bpo-29240, bpo-32030: If the encoding change (C locale coerced or UTF-8 Mode changed), Py_Main() now reads again the configuration with the new encoding. Changes: * Add _Py_UnixMain() called by main(). * Rename pymain_free_pymain() to pymain_clear_pymain(), it can now be called multipled times. * Rename pymain_parse_cmdline_envvars() to pymain_read_conf(). * Py_Main() now clears orig_argc and orig_argv at exit. * Remove argv_copy2, Py_Main() doesn't modify argv anymore. There is no need anymore to get two copies of the wchar_t** argv. * _PyCoreConfig: add coerce_c_locale and coerce_c_locale_warn. * Py_UTF8Mode is now initialized to -1. * Locale coercion (PEP 538) now respects -I and -E options.
This commit is contained in:
parent
e796b2fe26
commit
9454060e84
12 changed files with 325 additions and 215 deletions
|
@ -29,9 +29,10 @@ const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */
|
|||
int Py_HasFileSystemDefaultEncoding = 0;
|
||||
#endif
|
||||
const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape";
|
||||
/* UTF-8 mode (PEP 540): if non-zero, use the UTF-8 encoding, and change stdin
|
||||
and stdout error handler to "surrogateescape". */
|
||||
int Py_UTF8Mode = 0;
|
||||
/* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change
|
||||
stdin and stdout error handler to "surrogateescape". It is equal to
|
||||
-1 by default: unknown, will be set by Py_Main() */
|
||||
int Py_UTF8Mode = -1;
|
||||
|
||||
_Py_IDENTIFIER(__builtins__);
|
||||
_Py_IDENTIFIER(__dict__);
|
||||
|
|
|
@ -393,7 +393,7 @@ Py_DecodeLocale(const char* arg, size_t *size)
|
|||
#if defined(__APPLE__) || defined(__ANDROID__)
|
||||
return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
|
||||
#else
|
||||
if (Py_UTF8Mode) {
|
||||
if (Py_UTF8Mode == 1) {
|
||||
return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
|
||||
}
|
||||
|
||||
|
@ -539,7 +539,7 @@ Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
|
|||
#if defined(__APPLE__) || defined(__ANDROID__)
|
||||
return _Py_EncodeLocaleUTF8(text, error_pos);
|
||||
#else /* __APPLE__ */
|
||||
if (Py_UTF8Mode) {
|
||||
if (Py_UTF8Mode == 1) {
|
||||
return _Py_EncodeLocaleUTF8(text, error_pos);
|
||||
}
|
||||
|
||||
|
|
|
@ -385,18 +385,10 @@ static const char *_C_LOCALE_WARNING =
|
|||
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
|
||||
"locales is recommended.\n";
|
||||
|
||||
static int
|
||||
_legacy_locale_warnings_enabled(void)
|
||||
{
|
||||
const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
|
||||
return (coerce_c_locale != NULL &&
|
||||
strncmp(coerce_c_locale, "warn", 5) == 0);
|
||||
}
|
||||
|
||||
static void
|
||||
_emit_stderr_warning_for_legacy_locale(void)
|
||||
_emit_stderr_warning_for_legacy_locale(const _PyCoreConfig *core_config)
|
||||
{
|
||||
if (_legacy_locale_warnings_enabled()) {
|
||||
if (core_config->coerce_c_locale_warn) {
|
||||
if (_Py_LegacyLocaleDetected()) {
|
||||
fprintf(stderr, "%s", _C_LOCALE_WARNING);
|
||||
}
|
||||
|
@ -440,12 +432,12 @@ get_default_standard_stream_error_handler(void)
|
|||
}
|
||||
|
||||
#ifdef PY_COERCE_C_LOCALE
|
||||
static const char _C_LOCALE_COERCION_WARNING[] =
|
||||
static const char C_LOCALE_COERCION_WARNING[] =
|
||||
"Python detected LC_CTYPE=C: LC_CTYPE coerced to %.20s (set another locale "
|
||||
"or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n";
|
||||
|
||||
static void
|
||||
_coerce_default_locale_settings(const _LocaleCoercionTarget *target)
|
||||
_coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoercionTarget *target)
|
||||
{
|
||||
const char *newloc = target->locale_name;
|
||||
|
||||
|
@ -458,8 +450,8 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
|
|||
"Error setting LC_CTYPE, skipping C locale coercion\n");
|
||||
return;
|
||||
}
|
||||
if (_legacy_locale_warnings_enabled()) {
|
||||
fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc);
|
||||
if (config->coerce_c_locale_warn) {
|
||||
fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc);
|
||||
}
|
||||
|
||||
/* Reconfigure with the overridden environment variables */
|
||||
|
@ -468,47 +460,31 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
|
|||
#endif
|
||||
|
||||
void
|
||||
_Py_CoerceLegacyLocale(void)
|
||||
_Py_CoerceLegacyLocale(const _PyCoreConfig *config)
|
||||
{
|
||||
#ifdef PY_COERCE_C_LOCALE
|
||||
/* We ignore the Python -E and -I flags here, as the CLI needs to sort out
|
||||
* the locale settings *before* we try to do anything with the command
|
||||
* line arguments. For cross-platform debugging purposes, we also need
|
||||
* to give end users a way to force even scripts that are otherwise
|
||||
* isolated from their environment to use the legacy ASCII-centric C
|
||||
* locale.
|
||||
*
|
||||
* Ignoring -E and -I is safe from a security perspective, as we only use
|
||||
* the setting to turn *off* the implicit locale coercion, and anyone with
|
||||
* access to the process environment already has the ability to set
|
||||
* `LC_ALL=C` to override the C level locale settings anyway.
|
||||
*/
|
||||
const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
|
||||
if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
|
||||
/* PYTHONCOERCECLOCALE is not set, or is set to something other than "0" */
|
||||
const char *locale_override = getenv("LC_ALL");
|
||||
if (locale_override == NULL || *locale_override == '\0') {
|
||||
/* LC_ALL is also not set (or is set to an empty string) */
|
||||
const _LocaleCoercionTarget *target = NULL;
|
||||
for (target = _TARGET_LOCALES; target->locale_name; target++) {
|
||||
const char *new_locale = setlocale(LC_CTYPE,
|
||||
target->locale_name);
|
||||
if (new_locale != NULL) {
|
||||
const char *locale_override = getenv("LC_ALL");
|
||||
if (locale_override == NULL || *locale_override == '\0') {
|
||||
/* LC_ALL is also not set (or is set to an empty string) */
|
||||
const _LocaleCoercionTarget *target = NULL;
|
||||
for (target = _TARGET_LOCALES; target->locale_name; target++) {
|
||||
const char *new_locale = setlocale(LC_CTYPE,
|
||||
target->locale_name);
|
||||
if (new_locale != NULL) {
|
||||
#if !defined(__APPLE__) && !defined(__ANDROID__) && \
|
||||
defined(HAVE_LANGINFO_H) && defined(CODESET)
|
||||
/* Also ensure that nl_langinfo works in this locale */
|
||||
char *codeset = nl_langinfo(CODESET);
|
||||
if (!codeset || *codeset == '\0') {
|
||||
/* CODESET is not set or empty, so skip coercion */
|
||||
new_locale = NULL;
|
||||
_Py_SetLocaleFromEnv(LC_CTYPE);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
/* Successfully configured locale, so make it the default */
|
||||
_coerce_default_locale_settings(target);
|
||||
return;
|
||||
defined(HAVE_LANGINFO_H) && defined(CODESET)
|
||||
/* Also ensure that nl_langinfo works in this locale */
|
||||
char *codeset = nl_langinfo(CODESET);
|
||||
if (!codeset || *codeset == '\0') {
|
||||
/* CODESET is not set or empty, so skip coercion */
|
||||
new_locale = NULL;
|
||||
_Py_SetLocaleFromEnv(LC_CTYPE);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
/* Successfully configured locale, so make it the default */
|
||||
_coerce_default_locale_settings(config, target);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -648,7 +624,7 @@ _Py_InitializeCore(const _PyCoreConfig *core_config)
|
|||
the locale's charset without having to switch
|
||||
locales. */
|
||||
_Py_SetLocaleFromEnv(LC_CTYPE);
|
||||
_emit_stderr_warning_for_legacy_locale();
|
||||
_emit_stderr_warning_for_legacy_locale(core_config);
|
||||
#endif
|
||||
|
||||
err = _Py_HashRandomization_Init(core_config);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue