bpo-30565: Add PYTHONCOERCECLOCALE=warn runtime flag (GH-2260)

- removes PY_WARN_ON_C_LOCALE build time flag
- locale coercion and compatibility warnings are now always compiled
  in, but are off by default
- adds PYTHONCOERCECLOCALE=warn runtime option to aid in
  debugging potentially locale related compatibility problems

Due to not-yet-resolved test failures on *BSD systems (including
Mac OS X), this also temporarily disables UTF-8 as a locale coercion
target, and skips testing the interpreter's behavior in the POSIX locale.
This commit is contained in:
Nick Coghlan 2017-06-18 12:29:42 +10:00 committed by GitHub
parent 6a98a04e21
commit eb81795d7d
5 changed files with 184 additions and 122 deletions

View file

@ -356,6 +356,10 @@ _Py_LegacyLocaleDetected(void)
{
#ifndef MS_WINDOWS
/* On non-Windows systems, the C locale is considered a legacy locale */
/* XXX (ncoghlan): some platforms (notably Mac OS X) don't appear to treat
* the POSIX locale as a simple alias for the C locale, so
* we may also want to check for that explicitly.
*/
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0;
#else
@ -364,6 +368,30 @@ _Py_LegacyLocaleDetected(void)
#endif
}
static const char *_C_LOCALE_WARNING =
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
"locales is recommended.\n";
static int
_legacy_locale_warnings_enabled(void)
{
const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
return (coerce_c_locale != NULL &&
strncmp(coerce_c_locale, "warn", 5) == 0);
}
static void
_emit_stderr_warning_for_legacy_locale(void)
{
if (_legacy_locale_warnings_enabled()) {
if (_Py_LegacyLocaleDetected()) {
fprintf(stderr, "%s", _C_LOCALE_WARNING);
}
}
}
typedef struct _CandidateLocale {
const char *locale_name; /* The locale to try as a coercion target */
} _LocaleCoercionTarget;
@ -371,10 +399,17 @@ typedef struct _CandidateLocale {
static _LocaleCoercionTarget _TARGET_LOCALES[] = {
{"C.UTF-8"},
{"C.utf8"},
{"UTF-8"},
/* {"UTF-8"}, */
{NULL}
};
/* XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
* problems encountered on *BSD systems with those test cases
* For additional details see:
* nl_langinfo CODESET error: https://bugs.python.org/issue30647
* locale handling differences: https://bugs.python.org/issue30672
*/
static char *
get_default_standard_stream_error_handler(void)
{
@ -419,7 +454,9 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
"Error setting LC_CTYPE, skipping C locale coercion\n");
return;
}
fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc);
if (_legacy_locale_warnings_enabled()) {
fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc);
}
/* Reconfigure with the overridden environment variables */
setlocale(LC_ALL, "");
@ -465,26 +502,6 @@ _Py_CoerceLegacyLocale(void)
}
#ifdef PY_WARN_ON_C_LOCALE
static const char *_C_LOCALE_WARNING =
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
"locales is recommended.\n";
static void
_emit_stderr_warning_for_c_locale(void)
{
const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
if (_Py_LegacyLocaleDetected()) {
fprintf(stderr, "%s", _C_LOCALE_WARNING);
}
}
}
#endif
/* Global initializations. Can be undone by Py_Finalize(). Don't
call this twice without an intervening Py_Finalize() call.
@ -561,9 +578,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
the locale's charset without having to switch
locales. */
setlocale(LC_CTYPE, "");
#ifdef PY_WARN_ON_C_LOCALE
_emit_stderr_warning_for_c_locale();
#endif
_emit_stderr_warning_for_legacy_locale();
#endif
#endif