mirror of
https://github.com/python/cpython.git
synced 2025-10-21 22:22:48 +00:00
bpo-30647: Check nl_langinfo(CODESET) in locale coercion (GH-2374)
- On some versions of FreeBSD, setting the "UTF-8" locale succeeds, but a subsequent "nl_langinfo(CODESET)" fails - adding a check for this in the coercion logic means that coercion will happen on systems where this check succeeds, and will be skipped otherwise - that way CPython should automatically adapt to changes in platform behaviour, rather than needing a new release to enable coercion at build time - this also allows UTF-8 to be re-enabled as a coercion target, restoring the locale coercion behaviour on Mac OS X
This commit is contained in:
parent
f7d090c165
commit
18974c35ad
2 changed files with 29 additions and 15 deletions
|
@ -1,6 +1,7 @@
|
||||||
# Tests the attempted automatic coercion of the C locale to a UTF-8 locale
|
# Tests the attempted automatic coercion of the C locale to a UTF-8 locale
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
|
import locale
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import sysconfig
|
import sysconfig
|
||||||
|
@ -32,24 +33,34 @@ else:
|
||||||
|
|
||||||
# In order to get the warning messages to match up as expected, the candidate
|
# In order to get the warning messages to match up as expected, the candidate
|
||||||
# order here must much the target locale order in Python/pylifecycle.c
|
# order here must much the target locale order in Python/pylifecycle.c
|
||||||
_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8")
|
_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8")
|
||||||
|
|
||||||
# XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
|
|
||||||
# problems encountered on *BSD systems with those test cases
|
|
||||||
# For additional details see:
|
|
||||||
# nl_langinfo CODESET error: https://bugs.python.org/issue30647
|
|
||||||
# locale handling differences: https://bugs.python.org/issue30672
|
|
||||||
|
|
||||||
# There's no reliable cross-platform way of checking locale alias
|
# There's no reliable cross-platform way of checking locale alias
|
||||||
# lists, so the only way of knowing which of these locales will work
|
# lists, so the only way of knowing which of these locales will work
|
||||||
# is to try them with locale.setlocale(). We do that in a subprocess
|
# is to try them with locale.setlocale(). We do that in a subprocess
|
||||||
# to avoid altering the locale of the test runner.
|
# to avoid altering the locale of the test runner.
|
||||||
|
#
|
||||||
|
# If the relevant locale module attributes exist, and we're not on a platform
|
||||||
|
# where we expect it to always succeed, we also check that
|
||||||
|
# `locale.nl_langinfo(locale.CODESET)` works, as if it fails, the interpreter
|
||||||
|
# will skip locale coercion for that particular target locale
|
||||||
|
_check_nl_langinfo_CODESET = bool(
|
||||||
|
sys.platform not in ("darwin", "linux") and
|
||||||
|
hasattr(locale, "nl_langinfo") and
|
||||||
|
hasattr(locale, "CODESET")
|
||||||
|
)
|
||||||
|
|
||||||
def _set_locale_in_subprocess(locale_name):
|
def _set_locale_in_subprocess(locale_name):
|
||||||
cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))"
|
cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))"
|
||||||
|
if _check_nl_langinfo_CODESET:
|
||||||
|
# If there's no valid CODESET, we expect coercion to be skipped
|
||||||
|
cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))"
|
||||||
cmd = cmd_fmt.format(locale_name)
|
cmd = cmd_fmt.format(locale_name)
|
||||||
result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
|
result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
|
||||||
return result.rc == 0
|
return result.rc == 0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"
|
_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"
|
||||||
_EncodingDetails = namedtuple("EncodingDetails", _fields)
|
_EncodingDetails = namedtuple("EncodingDetails", _fields)
|
||||||
|
|
||||||
|
|
|
@ -399,17 +399,10 @@ typedef struct _CandidateLocale {
|
||||||
static _LocaleCoercionTarget _TARGET_LOCALES[] = {
|
static _LocaleCoercionTarget _TARGET_LOCALES[] = {
|
||||||
{"C.UTF-8"},
|
{"C.UTF-8"},
|
||||||
{"C.utf8"},
|
{"C.utf8"},
|
||||||
/* {"UTF-8"}, */
|
{"UTF-8"},
|
||||||
{NULL}
|
{NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
/* XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
|
|
||||||
* problems encountered on *BSD systems with those test cases
|
|
||||||
* For additional details see:
|
|
||||||
* nl_langinfo CODESET error: https://bugs.python.org/issue30647
|
|
||||||
* locale handling differences: https://bugs.python.org/issue30672
|
|
||||||
*/
|
|
||||||
|
|
||||||
static char *
|
static char *
|
||||||
get_default_standard_stream_error_handler(void)
|
get_default_standard_stream_error_handler(void)
|
||||||
{
|
{
|
||||||
|
@ -490,6 +483,16 @@ _Py_CoerceLegacyLocale(void)
|
||||||
const char *new_locale = setlocale(LC_CTYPE,
|
const char *new_locale = setlocale(LC_CTYPE,
|
||||||
target->locale_name);
|
target->locale_name);
|
||||||
if (new_locale != NULL) {
|
if (new_locale != NULL) {
|
||||||
|
#if !defined(__APPLE__) && defined(HAVE_LANGINFO_H) && defined(CODESET)
|
||||||
|
/* Also ensure that nl_langinfo works in this locale */
|
||||||
|
char *codeset = nl_langinfo(CODESET);
|
||||||
|
if (!codeset || *codeset == '\0') {
|
||||||
|
/* CODESET is not set or empty, so skip coercion */
|
||||||
|
new_locale = NULL;
|
||||||
|
setlocale(LC_CTYPE, "");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
/* Successfully configured locale, so make it the default */
|
/* Successfully configured locale, so make it the default */
|
||||||
_coerce_default_locale_settings(target);
|
_coerce_default_locale_settings(target);
|
||||||
return;
|
return;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue