bpo-34527: POSIX locale enables the UTF-8 Mode (GH-8972)

* The UTF-8 Mode is now also enabled by the "POSIX" locale, not only
  by the "C" locale.
* On FreeBSD, Py_DecodeLocale() and Py_EncodeLocale() now also forces
  the ASCII encoding if the LC_CTYPE locale is "POSIX", not only if
  the LC_CTYPE locale is "C".
* test_utf8_mode.test_cmd_line() checks also that the command line
  arguments are decoded from UTF-8 when the the UTF-8 Mode is enabled
  with POSIX locale or C locale.
This commit is contained in:
Victor Stinner 2018-08-28 12:35:44 +02:00 committed by GitHub
parent d658deac60
commit 5cb258950c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 35 additions and 11 deletions

View file

@ -1,5 +1,6 @@
#include "Python.h"
#include "internal/pystate.h"
#include <locale.h>
#define DECODE_LOCALE_ERR(NAME, LEN) \
@ -828,14 +829,21 @@ static void
config_init_locale(_PyCoreConfig *config)
{
if (_Py_LegacyLocaleDetected()) {
/* POSIX locale: enable C locale coercion and UTF-8 Mode */
if (config->utf8_mode < 0) {
config->utf8_mode = 1;
}
/* The C locale enables the C locale coercion (PEP 538) */
if (config->coerce_c_locale < 0) {
config->coerce_c_locale = 1;
}
}
#ifndef MS_WINDOWS
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
if (ctype_loc != NULL
&& (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0)) {
/* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
if (config->utf8_mode < 0) {
config->utf8_mode = 1;
}
}
#endif
}

View file

@ -128,7 +128,7 @@ check_force_ascii(void)
loc = setlocale(LC_CTYPE, NULL);
if (loc == NULL)
goto error;
if (strcmp(loc, "C") != 0) {
if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
/* the LC_CTYPE locale is different than C */
return 0;
}