bpo-28180: Implementation for PEP 538 (#659)

- new PYTHONCOERCECLOCALE config setting
- coerces legacy C locale to C.UTF-8, C.utf8 or UTF-8 by default
- always uses C.UTF-8 on Android
- uses `surrogateescape` on stdin and stdout in the coercion
  target locales
- configure option to disable locale coercion at build time
- configure option to disable C locale warning at build time
This commit is contained in:
Nick Coghlan 2017-06-11 13:16:15 +10:00 committed by GitHub
parent 0afbabe245
commit 6ea4186de3
14 changed files with 699 additions and 55 deletions

View file

@ -15,6 +15,21 @@ wmain(int argc, wchar_t **argv)
}
#else
/* Access private pylifecycle helper API to better handle the legacy C locale
*
* The legacy C locale assumes ASCII as the default text encoding, which
* causes problems not only for the CPython runtime, but also other
* components like GNU readline.
*
* Accordingly, when the CLI detects it, it attempts to coerce it to a
* more capable UTF-8 based alternative.
*
* See the documentation of the PYTHONCOERCECLOCALE setting for more details.
*
*/
extern int _Py_LegacyLocaleDetected(void);
extern void _Py_CoerceLegacyLocale(void);
int
main(int argc, char **argv)
{
@ -25,7 +40,11 @@ main(int argc, char **argv)
char *oldloc;
/* Force malloc() allocator to bootstrap Python */
#ifdef Py_DEBUG
(void)_PyMem_SetupAllocators("malloc_debug");
# else
(void)_PyMem_SetupAllocators("malloc");
# endif
argv_copy = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1));
argv_copy2 = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1));
@ -49,7 +68,21 @@ main(int argc, char **argv)
return 1;
}
#ifdef __ANDROID__
/* Passing "" to setlocale() on Android requests the C locale rather
* than checking environment variables, so request C.UTF-8 explicitly
*/
setlocale(LC_ALL, "C.UTF-8");
#else
/* Reconfigure the locale to the default for this process */
setlocale(LC_ALL, "");
#endif
if (_Py_LegacyLocaleDetected()) {
_Py_CoerceLegacyLocale();
}
/* Convert from char to wchar_t based on the locale settings */
for (i = 0; i < argc; i++) {
argv_copy[i] = Py_DecodeLocale(argv[i], NULL);
if (!argv_copy[i]) {
@ -70,7 +103,11 @@ main(int argc, char **argv)
/* Force again malloc() allocator to release memory blocks allocated
before Py_Main() */
#ifdef Py_DEBUG
(void)_PyMem_SetupAllocators("malloc_debug");
# else
(void)_PyMem_SetupAllocators("malloc");
# endif
for (i = 0; i < argc; i++) {
PyMem_RawFree(argv_copy2[i]);