mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986)
Standard streams like sys.stdout now use the "surrogateescape" error handler, instead of "strict", on the POSIX locale (when the C locale is not coerced and the UTF-8 Mode is disabled). Add tests on sys.stdout.errors with LC_ALL=POSIX.
This commit is contained in:
parent
21786f5186
commit
315877dc36
3 changed files with 40 additions and 18 deletions
|
@ -654,10 +654,10 @@ class SysModuleTest(unittest.TestCase):
|
||||||
expected = None
|
expected = None
|
||||||
self.check_fsencoding(fs_encoding, expected)
|
self.check_fsencoding(fs_encoding, expected)
|
||||||
|
|
||||||
def c_locale_get_error_handler(self, isolated=False, encoding=None):
|
def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
|
||||||
# Force the POSIX locale
|
# Force the POSIX locale
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
env["LC_ALL"] = "C"
|
env["LC_ALL"] = locale
|
||||||
env["PYTHONCOERCECLOCALE"] = "0"
|
env["PYTHONCOERCECLOCALE"] = "0"
|
||||||
code = '\n'.join((
|
code = '\n'.join((
|
||||||
'import sys',
|
'import sys',
|
||||||
|
@ -683,44 +683,50 @@ class SysModuleTest(unittest.TestCase):
|
||||||
stdout, stderr = p.communicate()
|
stdout, stderr = p.communicate()
|
||||||
return stdout
|
return stdout
|
||||||
|
|
||||||
def test_c_locale_surrogateescape(self):
|
def check_locale_surrogateescape(self, locale):
|
||||||
out = self.c_locale_get_error_handler(isolated=True)
|
out = self.c_locale_get_error_handler(locale, isolated=True)
|
||||||
self.assertEqual(out,
|
self.assertEqual(out,
|
||||||
'stdin: surrogateescape\n'
|
'stdin: surrogateescape\n'
|
||||||
'stdout: surrogateescape\n'
|
'stdout: surrogateescape\n'
|
||||||
'stderr: backslashreplace\n')
|
'stderr: backslashreplace\n')
|
||||||
|
|
||||||
# replace the default error handler
|
# replace the default error handler
|
||||||
out = self.c_locale_get_error_handler(encoding=':ignore')
|
out = self.c_locale_get_error_handler(locale, encoding=':ignore')
|
||||||
self.assertEqual(out,
|
self.assertEqual(out,
|
||||||
'stdin: ignore\n'
|
'stdin: ignore\n'
|
||||||
'stdout: ignore\n'
|
'stdout: ignore\n'
|
||||||
'stderr: backslashreplace\n')
|
'stderr: backslashreplace\n')
|
||||||
|
|
||||||
# force the encoding
|
# force the encoding
|
||||||
out = self.c_locale_get_error_handler(encoding='iso8859-1')
|
out = self.c_locale_get_error_handler(locale, encoding='iso8859-1')
|
||||||
self.assertEqual(out,
|
self.assertEqual(out,
|
||||||
'stdin: strict\n'
|
'stdin: strict\n'
|
||||||
'stdout: strict\n'
|
'stdout: strict\n'
|
||||||
'stderr: backslashreplace\n')
|
'stderr: backslashreplace\n')
|
||||||
out = self.c_locale_get_error_handler(encoding='iso8859-1:')
|
out = self.c_locale_get_error_handler(locale, encoding='iso8859-1:')
|
||||||
self.assertEqual(out,
|
self.assertEqual(out,
|
||||||
'stdin: strict\n'
|
'stdin: strict\n'
|
||||||
'stdout: strict\n'
|
'stdout: strict\n'
|
||||||
'stderr: backslashreplace\n')
|
'stderr: backslashreplace\n')
|
||||||
|
|
||||||
# have no any effect
|
# have no any effect
|
||||||
out = self.c_locale_get_error_handler(encoding=':')
|
out = self.c_locale_get_error_handler(locale, encoding=':')
|
||||||
self.assertEqual(out,
|
self.assertEqual(out,
|
||||||
'stdin: surrogateescape\n'
|
'stdin: surrogateescape\n'
|
||||||
'stdout: surrogateescape\n'
|
'stdout: surrogateescape\n'
|
||||||
'stderr: backslashreplace\n')
|
'stderr: backslashreplace\n')
|
||||||
out = self.c_locale_get_error_handler(encoding='')
|
out = self.c_locale_get_error_handler(locale, encoding='')
|
||||||
self.assertEqual(out,
|
self.assertEqual(out,
|
||||||
'stdin: surrogateescape\n'
|
'stdin: surrogateescape\n'
|
||||||
'stdout: surrogateescape\n'
|
'stdout: surrogateescape\n'
|
||||||
'stderr: backslashreplace\n')
|
'stderr: backslashreplace\n')
|
||||||
|
|
||||||
|
def test_c_locale_surrogateescape(self):
|
||||||
|
self.check_locale_surrogateescape('C')
|
||||||
|
|
||||||
|
def test_posix_locale_surrogateescape(self):
|
||||||
|
self.check_locale_surrogateescape('POSIX')
|
||||||
|
|
||||||
def test_implementation(self):
|
def test_implementation(self):
|
||||||
# This test applies to all implementations equally.
|
# This test applies to all implementations equally.
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
Standard streams like sys.stdout now use the "surrogateescape" error
|
||||||
|
handler, instead of "strict", on the POSIX locale (when the C locale is not
|
||||||
|
coerced and the UTF-8 Mode is disabled).
|
|
@ -345,13 +345,13 @@ get_stdio_errors(void)
|
||||||
{
|
{
|
||||||
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
|
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
|
||||||
if (ctype_loc != NULL) {
|
if (ctype_loc != NULL) {
|
||||||
/* "surrogateescape" is the default in the legacy C locale */
|
/* surrogateescape is the default in the legacy C and POSIX locales */
|
||||||
if (strcmp(ctype_loc, "C") == 0) {
|
if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) {
|
||||||
return "surrogateescape";
|
return "surrogateescape";
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef PY_COERCE_C_LOCALE
|
#ifdef PY_COERCE_C_LOCALE
|
||||||
/* "surrogateescape" is the default in locale coercion target locales */
|
/* surrogateescape is the default in locale coercion target locales */
|
||||||
const _LocaleCoercionTarget *target = NULL;
|
const _LocaleCoercionTarget *target = NULL;
|
||||||
for (target = _TARGET_LOCALES; target->locale_name; target++) {
|
for (target = _TARGET_LOCALES; target->locale_name; target++) {
|
||||||
if (strcmp(ctype_loc, target->locale_name) == 0) {
|
if (strcmp(ctype_loc, target->locale_name) == 0) {
|
||||||
|
@ -1791,15 +1791,28 @@ init_sys_streams(PyInterpreterState *interp)
|
||||||
if (err) {
|
if (err) {
|
||||||
*err = '\0';
|
*err = '\0';
|
||||||
err++;
|
err++;
|
||||||
if (*err && !errors) {
|
if (!err[0]) {
|
||||||
errors = err;
|
err = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!encoding && *pythonioencoding) {
|
|
||||||
encoding = pythonioencoding;
|
/* Does PYTHONIOENCODING contain an encoding? */
|
||||||
if (!errors) {
|
if (pythonioencoding[0]) {
|
||||||
errors = "strict";
|
if (!encoding) {
|
||||||
|
encoding = pythonioencoding;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If the encoding is set but not the error handler,
|
||||||
|
use "strict" error handler by default.
|
||||||
|
PYTHONIOENCODING=latin1 behaves as
|
||||||
|
PYTHONIOENCODING=latin1:strict. */
|
||||||
|
if (!err) {
|
||||||
|
err = "strict";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!errors && err != NULL) {
|
||||||
|
errors = err;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue