bpo-29240, bpo-32030: Py_Main() re-reads config if encoding changes (#4899)

bpo-29240, bpo-32030: If the encoding change (C locale coerced or
UTF-8 Mode changed), Py_Main() now reads again the configuration with
the new encoding.

Changes:

* Add _Py_UnixMain() called by main().
* Rename pymain_free_pymain() to pymain_clear_pymain(), it can now be
  called multipled times.
* Rename pymain_parse_cmdline_envvars() to pymain_read_conf().
* Py_Main() now clears orig_argc and orig_argv at exit.
* Remove argv_copy2, Py_Main() doesn't modify argv anymore. There is
  no need anymore to get two copies of the wchar_t** argv.
* _PyCoreConfig: add coerce_c_locale and coerce_c_locale_warn.
* Py_UTF8Mode is now initialized to -1.
* Locale coercion (PEP 538) now respects -I and -E options.
This commit is contained in:
Victor Stinner 2017-12-16 04:54:22 +01:00 committed by GitHub
parent e796b2fe26
commit 9454060e84
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 325 additions and 215 deletions

View file

@ -385,18 +385,10 @@ static const char *_C_LOCALE_WARNING =
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
"locales is recommended.\n";
static int
_legacy_locale_warnings_enabled(void)
{
const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
return (coerce_c_locale != NULL &&
strncmp(coerce_c_locale, "warn", 5) == 0);
}
static void
_emit_stderr_warning_for_legacy_locale(void)
_emit_stderr_warning_for_legacy_locale(const _PyCoreConfig *core_config)
{
if (_legacy_locale_warnings_enabled()) {
if (core_config->coerce_c_locale_warn) {
if (_Py_LegacyLocaleDetected()) {
fprintf(stderr, "%s", _C_LOCALE_WARNING);
}
@ -440,12 +432,12 @@ get_default_standard_stream_error_handler(void)
}
#ifdef PY_COERCE_C_LOCALE
static const char _C_LOCALE_COERCION_WARNING[] =
static const char C_LOCALE_COERCION_WARNING[] =
"Python detected LC_CTYPE=C: LC_CTYPE coerced to %.20s (set another locale "
"or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n";
static void
_coerce_default_locale_settings(const _LocaleCoercionTarget *target)
_coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoercionTarget *target)
{
const char *newloc = target->locale_name;
@ -458,8 +450,8 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
"Error setting LC_CTYPE, skipping C locale coercion\n");
return;
}
if (_legacy_locale_warnings_enabled()) {
fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc);
if (config->coerce_c_locale_warn) {
fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc);
}
/* Reconfigure with the overridden environment variables */
@ -468,47 +460,31 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
#endif
void
_Py_CoerceLegacyLocale(void)
_Py_CoerceLegacyLocale(const _PyCoreConfig *config)
{
#ifdef PY_COERCE_C_LOCALE
/* We ignore the Python -E and -I flags here, as the CLI needs to sort out
* the locale settings *before* we try to do anything with the command
* line arguments. For cross-platform debugging purposes, we also need
* to give end users a way to force even scripts that are otherwise
* isolated from their environment to use the legacy ASCII-centric C
* locale.
*
* Ignoring -E and -I is safe from a security perspective, as we only use
* the setting to turn *off* the implicit locale coercion, and anyone with
* access to the process environment already has the ability to set
* `LC_ALL=C` to override the C level locale settings anyway.
*/
const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
/* PYTHONCOERCECLOCALE is not set, or is set to something other than "0" */
const char *locale_override = getenv("LC_ALL");
if (locale_override == NULL || *locale_override == '\0') {
/* LC_ALL is also not set (or is set to an empty string) */
const _LocaleCoercionTarget *target = NULL;
for (target = _TARGET_LOCALES; target->locale_name; target++) {
const char *new_locale = setlocale(LC_CTYPE,
target->locale_name);
if (new_locale != NULL) {
const char *locale_override = getenv("LC_ALL");
if (locale_override == NULL || *locale_override == '\0') {
/* LC_ALL is also not set (or is set to an empty string) */
const _LocaleCoercionTarget *target = NULL;
for (target = _TARGET_LOCALES; target->locale_name; target++) {
const char *new_locale = setlocale(LC_CTYPE,
target->locale_name);
if (new_locale != NULL) {
#if !defined(__APPLE__) && !defined(__ANDROID__) && \
defined(HAVE_LANGINFO_H) && defined(CODESET)
/* Also ensure that nl_langinfo works in this locale */
char *codeset = nl_langinfo(CODESET);
if (!codeset || *codeset == '\0') {
/* CODESET is not set or empty, so skip coercion */
new_locale = NULL;
_Py_SetLocaleFromEnv(LC_CTYPE);
continue;
}
#endif
/* Successfully configured locale, so make it the default */
_coerce_default_locale_settings(target);
return;
defined(HAVE_LANGINFO_H) && defined(CODESET)
/* Also ensure that nl_langinfo works in this locale */
char *codeset = nl_langinfo(CODESET);
if (!codeset || *codeset == '\0') {
/* CODESET is not set or empty, so skip coercion */
new_locale = NULL;
_Py_SetLocaleFromEnv(LC_CTYPE);
continue;
}
#endif
/* Successfully configured locale, so make it the default */
_coerce_default_locale_settings(config, target);
return;
}
}
}
@ -648,7 +624,7 @@ _Py_InitializeCore(const _PyCoreConfig *core_config)
the locale's charset without having to switch
locales. */
_Py_SetLocaleFromEnv(LC_CTYPE);
_emit_stderr_warning_for_legacy_locale();
_emit_stderr_warning_for_legacy_locale(core_config);
#endif
err = _Py_HashRandomization_Init(core_config);