bpo-29240: PEP 540: Add a new UTF-8 Mode (#855)

* Add -X utf8 command line option, PYTHONUTF8 environment variable
  and a new sys.flags.utf8_mode flag.
* If the LC_CTYPE locale is "C" at startup: enable automatically the
  UTF-8 mode.
* Add _winapi.GetACP(). encodings._alias_mbcs() now calls
  _winapi.GetACP() to get the ANSI code page
* locale.getpreferredencoding() now returns 'UTF-8' in the UTF-8
  mode. As a side effect, open() now uses the UTF-8 encoding by
  default in this mode.
* Py_DecodeLocale() and Py_EncodeLocale() now use the UTF-8 encoding
  in the UTF-8 Mode.
* Update subprocess._args_from_interpreter_flags() to handle -X utf8
* Skip some tests relying on the current locale if the UTF-8 mode is
  enabled.
* Add test_utf8mode.py.
* _Py_DecodeUTF8_surrogateescape() gets a new optional parameter to
  return also the length (number of wide characters).
* pymain_get_global_config() and pymain_set_global_config() now
  always copy flag values, rather than only copying if the new value
  is greater than the old value.
This commit is contained in:
Victor Stinner 2017-12-13 12:29:09 +01:00 committed by GitHub
parent c3e070f849
commit 91106cd9ff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
27 changed files with 598 additions and 183 deletions

View file

@ -54,7 +54,7 @@ extern grammar _PyParser_Grammar; /* From graminit.c */
static _PyInitError add_main_module(PyInterpreterState *interp);
static _PyInitError initfsencoding(PyInterpreterState *interp);
static _PyInitError initsite(void);
static _PyInitError init_sys_streams(void);
static _PyInitError init_sys_streams(PyInterpreterState *interp);
static _PyInitError initsigs(void);
static void call_py_exitfuncs(void);
static void wait_for_thread_shutdown(void);
@ -925,7 +925,7 @@ _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config)
return err;
}
err = init_sys_streams();
err = init_sys_streams(interp);
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -1410,7 +1410,7 @@ new_interpreter(PyThreadState **tstate_p)
return err;
}
err = init_sys_streams();
err = init_sys_streams(interp);
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -1558,7 +1558,13 @@ initfsencoding(PyInterpreterState *interp)
Py_FileSystemDefaultEncodeErrors = "surrogatepass";
}
#else
if (Py_FileSystemDefaultEncoding == NULL) {
if (Py_FileSystemDefaultEncoding == NULL &&
interp->core_config.utf8_mode)
{
Py_FileSystemDefaultEncoding = "utf-8";
Py_HasFileSystemDefaultEncoding = 1;
}
else if (Py_FileSystemDefaultEncoding == NULL) {
Py_FileSystemDefaultEncoding = get_locale_encoding();
if (Py_FileSystemDefaultEncoding == NULL) {
return _Py_INIT_ERR("Unable to get the locale encoding");
@ -1749,7 +1755,7 @@ error:
/* Initialize sys.stdin, stdout, stderr and builtins.open */
static _PyInitError
init_sys_streams(void)
init_sys_streams(PyInterpreterState *interp)
{
PyObject *iomod = NULL, *wrapper;
PyObject *bimod = NULL;
@ -1794,10 +1800,10 @@ init_sys_streams(void)
encoding = _Py_StandardStreamEncoding;
errors = _Py_StandardStreamErrors;
if (!encoding || !errors) {
pythonioencoding = Py_GETENV("PYTHONIOENCODING");
if (pythonioencoding) {
char *opt = Py_GETENV("PYTHONIOENCODING");
if (opt && opt[0] != '\0') {
char *err;
pythonioencoding = _PyMem_Strdup(pythonioencoding);
pythonioencoding = _PyMem_Strdup(opt);
if (pythonioencoding == NULL) {
PyErr_NoMemory();
goto error;
@ -1814,7 +1820,12 @@ init_sys_streams(void)
encoding = pythonioencoding;
}
}
if (!errors && !(pythonioencoding && *pythonioencoding)) {
else if (interp->core_config.utf8_mode) {
encoding = "utf-8";
errors = "surrogateescape";
}
if (!errors && !pythonioencoding) {
/* Choose the default error handler based on the current locale */
errors = get_default_standard_stream_error_handler();
}