bpo-34485: Enhance init_sys_streams() (GH-8978)

Python now gets the locale encoding with C code to initialize the encoding
of standard streams like sys.stdout. Moreover, the encoding is now
initialized to the Python codec name to get a normalized encoding name and
to ensure that the codec is loaded. The change avoids importing
_bootlocale and _locale modules at startup by default.

When the PYTHONIOENCODING environment variable only contains an encoding,
the error handler is now is now set explicitly to "strict".

Rename also get_default_standard_stream_error_handler() to
get_stdio_errors().

Reduce the buffer to format the "cpXXX" string (Windows locale encoding).
This commit is contained in:
Victor Stinner 2018-08-28 23:26:33 +02:00 committed by GitHub
parent d500e5307a
commit 9e4994d410
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 91 additions and 43 deletions

View file

@ -244,22 +244,26 @@ error:
return NULL;
}
static char*
get_locale_encoding(void)
static _PyInitError
get_locale_encoding(char **locale_encoding)
{
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
char* codeset = nl_langinfo(CODESET);
if (!codeset || codeset[0] == '\0') {
PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty");
return NULL;
}
return get_codec_name(codeset);
#ifdef MS_WINDOWS
char encoding[20];
PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
#elif defined(__ANDROID__)
return get_codec_name("UTF-8");
const char *encoding = "UTF-8";
#else
PyErr_SetNone(PyExc_NotImplementedError);
return NULL;
const char *encoding = nl_langinfo(CODESET);
if (!encoding || encoding[0] == '\0') {
return _Py_INIT_USER_ERR("failed to get the locale encoding: "
"nl_langinfo(CODESET) failed");
}
#endif
*locale_encoding = _PyMem_RawStrdup(encoding);
if (*locale_encoding == NULL) {
return _Py_INIT_NO_MEMORY();
}
return _Py_INIT_OK();
}
static _PyInitError
@ -397,7 +401,7 @@ static _LocaleCoercionTarget _TARGET_LOCALES[] = {
};
static const char *
get_default_standard_stream_error_handler(void)
get_stdio_errors(void)
{
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
if (ctype_loc != NULL) {
@ -417,8 +421,7 @@ get_default_standard_stream_error_handler(void)
#endif
}
/* Otherwise return NULL to request the typical default error handler */
return NULL;
return "strict";
}
#ifdef PY_COERCE_C_LOCALE
@ -1586,9 +1589,17 @@ initfsencoding(PyInterpreterState *interp)
Py_HasFileSystemDefaultEncoding = 1;
}
else {
Py_FileSystemDefaultEncoding = get_locale_encoding();
char *locale_encoding;
_PyInitError err = get_locale_encoding(&locale_encoding);
if (_Py_INIT_FAILED(err)) {
return err;
}
Py_FileSystemDefaultEncoding = get_codec_name(locale_encoding);
PyMem_RawFree(locale_encoding);
if (Py_FileSystemDefaultEncoding == NULL) {
return _Py_INIT_ERR("Unable to get the locale encoding");
return _Py_INIT_ERR("failed to get the Python codec "
"of the locale encoding");
}
Py_HasFileSystemDefaultEncoding = 0;
@ -1787,6 +1798,8 @@ init_sys_streams(PyInterpreterState *interp)
PyObject * encoding_attr;
char *pythonioencoding = NULL;
const char *encoding, *errors;
char *locale_encoding = NULL;
char *codec_name = NULL;
_PyInitError res = _Py_INIT_OK();
/* Hack to avoid a nasty recursion issue when Python is invoked
@ -1838,21 +1851,46 @@ init_sys_streams(PyInterpreterState *interp)
errors = err;
}
}
if (*pythonioencoding && !encoding) {
if (!encoding && *pythonioencoding) {
encoding = pythonioencoding;
if (!errors) {
errors = "strict";
}
}
}
else if (interp->core_config.utf8_mode) {
encoding = "utf-8";
errors = "surrogateescape";
if (interp->core_config.utf8_mode) {
if (!encoding) {
encoding = "utf-8";
}
if (!errors) {
errors = "surrogateescape";
}
}
if (!errors && !pythonioencoding) {
if (!errors) {
/* Choose the default error handler based on the current locale */
errors = get_default_standard_stream_error_handler();
errors = get_stdio_errors();
}
}
if (encoding == NULL) {
_PyInitError err = get_locale_encoding(&locale_encoding);
if (_Py_INIT_FAILED(err)) {
return err;
}
encoding = locale_encoding;
}
codec_name = get_codec_name(encoding);
if (codec_name == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"failed to get the Python codec name "
"of stdio encoding");
goto error;
}
encoding = codec_name;
/* Set sys.stdin */
fd = fileno(stdin);
/* Under some conditions stdin, stdout and stderr may not be connected
@ -1928,6 +1966,8 @@ done:
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
PyMem_RawFree(locale_encoding);
PyMem_RawFree(codec_name);
PyMem_Free(pythonioencoding);
Py_XDECREF(bimod);
Py_XDECREF(iomod);