bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986)

Standard streams like sys.stdout now use the "surrogateescape" error
handler, instead of "strict", on the POSIX locale (when the C locale is not
coerced and the UTF-8 Mode is disabled).

Add tests on sys.stdout.errors with LC_ALL=POSIX.
This commit is contained in:
Victor Stinner 2018-08-29 09:58:12 +02:00 committed by GitHub
parent 21786f5186
commit 315877dc36
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 40 additions and 18 deletions

View file

@ -654,10 +654,10 @@ class SysModuleTest(unittest.TestCase):
expected = None expected = None
self.check_fsencoding(fs_encoding, expected) self.check_fsencoding(fs_encoding, expected)
def c_locale_get_error_handler(self, isolated=False, encoding=None): def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
# Force the POSIX locale # Force the POSIX locale
env = os.environ.copy() env = os.environ.copy()
env["LC_ALL"] = "C" env["LC_ALL"] = locale
env["PYTHONCOERCECLOCALE"] = "0" env["PYTHONCOERCECLOCALE"] = "0"
code = '\n'.join(( code = '\n'.join((
'import sys', 'import sys',
@ -683,44 +683,50 @@ class SysModuleTest(unittest.TestCase):
stdout, stderr = p.communicate() stdout, stderr = p.communicate()
return stdout return stdout
def test_c_locale_surrogateescape(self): def check_locale_surrogateescape(self, locale):
out = self.c_locale_get_error_handler(isolated=True) out = self.c_locale_get_error_handler(locale, isolated=True)
self.assertEqual(out, self.assertEqual(out,
'stdin: surrogateescape\n' 'stdin: surrogateescape\n'
'stdout: surrogateescape\n' 'stdout: surrogateescape\n'
'stderr: backslashreplace\n') 'stderr: backslashreplace\n')
# replace the default error handler # replace the default error handler
out = self.c_locale_get_error_handler(encoding=':ignore') out = self.c_locale_get_error_handler(locale, encoding=':ignore')
self.assertEqual(out, self.assertEqual(out,
'stdin: ignore\n' 'stdin: ignore\n'
'stdout: ignore\n' 'stdout: ignore\n'
'stderr: backslashreplace\n') 'stderr: backslashreplace\n')
# force the encoding # force the encoding
out = self.c_locale_get_error_handler(encoding='iso8859-1') out = self.c_locale_get_error_handler(locale, encoding='iso8859-1')
self.assertEqual(out, self.assertEqual(out,
'stdin: strict\n' 'stdin: strict\n'
'stdout: strict\n' 'stdout: strict\n'
'stderr: backslashreplace\n') 'stderr: backslashreplace\n')
out = self.c_locale_get_error_handler(encoding='iso8859-1:') out = self.c_locale_get_error_handler(locale, encoding='iso8859-1:')
self.assertEqual(out, self.assertEqual(out,
'stdin: strict\n' 'stdin: strict\n'
'stdout: strict\n' 'stdout: strict\n'
'stderr: backslashreplace\n') 'stderr: backslashreplace\n')
# have no any effect # have no any effect
out = self.c_locale_get_error_handler(encoding=':') out = self.c_locale_get_error_handler(locale, encoding=':')
self.assertEqual(out, self.assertEqual(out,
'stdin: surrogateescape\n' 'stdin: surrogateescape\n'
'stdout: surrogateescape\n' 'stdout: surrogateescape\n'
'stderr: backslashreplace\n') 'stderr: backslashreplace\n')
out = self.c_locale_get_error_handler(encoding='') out = self.c_locale_get_error_handler(locale, encoding='')
self.assertEqual(out, self.assertEqual(out,
'stdin: surrogateescape\n' 'stdin: surrogateescape\n'
'stdout: surrogateescape\n' 'stdout: surrogateescape\n'
'stderr: backslashreplace\n') 'stderr: backslashreplace\n')
def test_c_locale_surrogateescape(self):
self.check_locale_surrogateescape('C')
def test_posix_locale_surrogateescape(self):
self.check_locale_surrogateescape('POSIX')
def test_implementation(self): def test_implementation(self):
# This test applies to all implementations equally. # This test applies to all implementations equally.

View file

@ -0,0 +1,3 @@
Standard streams like sys.stdout now use the "surrogateescape" error
handler, instead of "strict", on the POSIX locale (when the C locale is not
coerced and the UTF-8 Mode is disabled).

View file

@ -345,13 +345,13 @@ get_stdio_errors(void)
{ {
const char *ctype_loc = setlocale(LC_CTYPE, NULL); const char *ctype_loc = setlocale(LC_CTYPE, NULL);
if (ctype_loc != NULL) { if (ctype_loc != NULL) {
/* "surrogateescape" is the default in the legacy C locale */ /* surrogateescape is the default in the legacy C and POSIX locales */
if (strcmp(ctype_loc, "C") == 0) { if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) {
return "surrogateescape"; return "surrogateescape";
} }
#ifdef PY_COERCE_C_LOCALE #ifdef PY_COERCE_C_LOCALE
/* "surrogateescape" is the default in locale coercion target locales */ /* surrogateescape is the default in locale coercion target locales */
const _LocaleCoercionTarget *target = NULL; const _LocaleCoercionTarget *target = NULL;
for (target = _TARGET_LOCALES; target->locale_name; target++) { for (target = _TARGET_LOCALES; target->locale_name; target++) {
if (strcmp(ctype_loc, target->locale_name) == 0) { if (strcmp(ctype_loc, target->locale_name) == 0) {
@ -1791,15 +1791,28 @@ init_sys_streams(PyInterpreterState *interp)
if (err) { if (err) {
*err = '\0'; *err = '\0';
err++; err++;
if (*err && !errors) { if (!err[0]) {
errors = err; err = NULL;
} }
} }
if (!encoding && *pythonioencoding) {
/* Does PYTHONIOENCODING contain an encoding? */
if (pythonioencoding[0]) {
if (!encoding) {
encoding = pythonioencoding; encoding = pythonioencoding;
if (!errors) {
errors = "strict";
} }
/* If the encoding is set but not the error handler,
use "strict" error handler by default.
PYTHONIOENCODING=latin1 behaves as
PYTHONIOENCODING=latin1:strict. */
if (!err) {
err = "strict";
}
}
if (!errors && err != NULL) {
errors = err;
} }
} }