mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 00:08:32 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			282 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			282 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """
 | |
| Test the implementation of the PEP 540: the UTF-8 Mode.
 | |
| """
 | |
| 
 | |
| import locale
 | |
| import subprocess
 | |
| import sys
 | |
| import textwrap
 | |
| import unittest
 | |
| from test import support
 | |
| from test.support.script_helper import assert_python_ok, assert_python_failure
 | |
| from test.support import os_helper, MS_WINDOWS
 | |
| 
 | |
| 
 | |
| POSIX_LOCALES = ('C', 'POSIX')
 | |
| VXWORKS = (sys.platform == "vxworks")
 | |
| 
 | |
| class UTF8ModeTests(unittest.TestCase):
 | |
|     DEFAULT_ENV = {
 | |
|         'PYTHONUTF8': '',
 | |
|         'PYTHONLEGACYWINDOWSFSENCODING': '',
 | |
|         'PYTHONCOERCECLOCALE': '0',
 | |
|     }
 | |
| 
 | |
|     def posix_locale(self):
 | |
|         loc = locale.setlocale(locale.LC_CTYPE, None)
 | |
|         return (loc in POSIX_LOCALES)
 | |
| 
 | |
|     def get_output(self, *args, failure=False, **kw):
 | |
|         kw = dict(self.DEFAULT_ENV, **kw)
 | |
|         if failure:
 | |
|             out = assert_python_failure(*args, **kw)
 | |
|             out = out[2]
 | |
|         else:
 | |
|             out = assert_python_ok(*args, **kw)
 | |
|             out = out[1]
 | |
|         return out.decode().rstrip("\n\r")
 | |
| 
 | |
|     @unittest.skipIf(MS_WINDOWS, 'Windows has no POSIX locale')
 | |
|     def test_posix_locale(self):
 | |
|         code = 'import sys; print(sys.flags.utf8_mode)'
 | |
| 
 | |
|         for loc in POSIX_LOCALES:
 | |
|             with self.subTest(LC_ALL=loc):
 | |
|                 out = self.get_output('-c', code, LC_ALL=loc)
 | |
|                 self.assertEqual(out, '1')
 | |
| 
 | |
|     def test_xoption(self):
 | |
|         code = 'import sys; print(sys.flags.utf8_mode)'
 | |
| 
 | |
|         out = self.get_output('-X', 'utf8', '-c', code)
 | |
|         self.assertEqual(out, '1')
 | |
| 
 | |
|         # undocumented but accepted syntax: -X utf8=1
 | |
|         out = self.get_output('-X', 'utf8=1', '-c', code)
 | |
|         self.assertEqual(out, '1')
 | |
| 
 | |
|         out = self.get_output('-X', 'utf8=0', '-c', code)
 | |
|         self.assertEqual(out, '0')
 | |
| 
 | |
|         if MS_WINDOWS:
 | |
|             # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode
 | |
|             # and has the priority over -X utf8
 | |
|             out = self.get_output('-X', 'utf8', '-c', code,
 | |
|                                   PYTHONLEGACYWINDOWSFSENCODING='1')
 | |
|             self.assertEqual(out, '0')
 | |
| 
 | |
|     def test_env_var(self):
 | |
|         code = 'import sys; print(sys.flags.utf8_mode)'
 | |
| 
 | |
|         out = self.get_output('-c', code, PYTHONUTF8='1')
 | |
|         self.assertEqual(out, '1')
 | |
| 
 | |
|         out = self.get_output('-c', code, PYTHONUTF8='0')
 | |
|         self.assertEqual(out, '0')
 | |
| 
 | |
|         # -X utf8 has the priority over PYTHONUTF8
 | |
|         out = self.get_output('-X', 'utf8=0', '-c', code, PYTHONUTF8='1')
 | |
|         self.assertEqual(out, '0')
 | |
| 
 | |
|         if MS_WINDOWS:
 | |
|             # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
 | |
|             # and has the priority over PYTHONUTF8
 | |
|             out = self.get_output('-X', 'utf8', '-c', code, PYTHONUTF8='1',
 | |
|                                   PYTHONLEGACYWINDOWSFSENCODING='1')
 | |
|             self.assertEqual(out, '0')
 | |
| 
 | |
|         # Cannot test with the POSIX locale, since the POSIX locale enables
 | |
|         # the UTF-8 mode
 | |
|         if not self.posix_locale():
 | |
|             # PYTHONUTF8 should be ignored if -E is used
 | |
|             out = self.get_output('-E', '-c', code, PYTHONUTF8='1')
 | |
|             self.assertEqual(out, '0')
 | |
| 
 | |
|         # invalid mode
 | |
|         out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True)
 | |
|         self.assertIn('invalid PYTHONUTF8 environment variable value',
 | |
|                       out.rstrip())
 | |
| 
 | |
|     def test_filesystemencoding(self):
 | |
|         code = textwrap.dedent('''
 | |
|             import sys
 | |
|             print("{}/{}".format(sys.getfilesystemencoding(),
 | |
|                                  sys.getfilesystemencodeerrors()))
 | |
|         ''')
 | |
| 
 | |
|         if MS_WINDOWS:
 | |
|             expected = 'utf-8/surrogatepass'
 | |
|         else:
 | |
|             expected = 'utf-8/surrogateescape'
 | |
| 
 | |
|         out = self.get_output('-X', 'utf8', '-c', code)
 | |
|         self.assertEqual(out, expected)
 | |
| 
 | |
|         if MS_WINDOWS:
 | |
|             # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
 | |
|             # and has the priority over -X utf8 and PYTHONUTF8
 | |
|             out = self.get_output('-X', 'utf8', '-c', code,
 | |
|                                   PYTHONUTF8='strict',
 | |
|                                   PYTHONLEGACYWINDOWSFSENCODING='1')
 | |
|             self.assertEqual(out, 'mbcs/replace')
 | |
| 
 | |
|     def test_stdio(self):
 | |
|         code = textwrap.dedent('''
 | |
|             import sys
 | |
|             print(f"stdin: {sys.stdin.encoding}/{sys.stdin.errors}")
 | |
|             print(f"stdout: {sys.stdout.encoding}/{sys.stdout.errors}")
 | |
|             print(f"stderr: {sys.stderr.encoding}/{sys.stderr.errors}")
 | |
|         ''')
 | |
| 
 | |
|         out = self.get_output('-X', 'utf8', '-c', code,
 | |
|                               PYTHONIOENCODING='')
 | |
|         self.assertEqual(out.splitlines(),
 | |
|                          ['stdin: utf-8/surrogateescape',
 | |
|                           'stdout: utf-8/surrogateescape',
 | |
|                           'stderr: utf-8/backslashreplace'])
 | |
| 
 | |
|         # PYTHONIOENCODING has the priority over PYTHONUTF8
 | |
|         out = self.get_output('-X', 'utf8', '-c', code,
 | |
|                               PYTHONIOENCODING="latin1")
 | |
|         self.assertEqual(out.splitlines(),
 | |
|                          ['stdin: iso8859-1/strict',
 | |
|                           'stdout: iso8859-1/strict',
 | |
|                           'stderr: iso8859-1/backslashreplace'])
 | |
| 
 | |
|         out = self.get_output('-X', 'utf8', '-c', code,
 | |
|                               PYTHONIOENCODING=":namereplace")
 | |
|         self.assertEqual(out.splitlines(),
 | |
|                          ['stdin: utf-8/namereplace',
 | |
|                           'stdout: utf-8/namereplace',
 | |
|                           'stderr: utf-8/backslashreplace'])
 | |
| 
 | |
|     def test_io(self):
 | |
|         code = textwrap.dedent('''
 | |
|             import sys
 | |
|             filename = sys.argv[1]
 | |
|             with open(filename) as fp:
 | |
|                 print(f"{fp.encoding}/{fp.errors}")
 | |
|         ''')
 | |
|         filename = __file__
 | |
| 
 | |
|         out = self.get_output('-c', code, filename, PYTHONUTF8='1')
 | |
|         self.assertEqual(out.lower(), 'utf-8/strict')
 | |
| 
 | |
|     def _check_io_encoding(self, module, encoding=None, errors=None):
 | |
|         filename = __file__
 | |
| 
 | |
|         # Encoding explicitly set
 | |
|         args = []
 | |
|         if encoding:
 | |
|             args.append(f'encoding={encoding!r}')
 | |
|         if errors:
 | |
|             args.append(f'errors={errors!r}')
 | |
|         code = textwrap.dedent('''
 | |
|             import sys
 | |
|             from %s import open
 | |
|             filename = sys.argv[1]
 | |
|             with open(filename, %s) as fp:
 | |
|                 print(f"{fp.encoding}/{fp.errors}")
 | |
|         ''') % (module, ', '.join(args))
 | |
|         out = self.get_output('-c', code, filename,
 | |
|                               PYTHONUTF8='1')
 | |
| 
 | |
|         if not encoding:
 | |
|             encoding = 'utf-8'
 | |
|         if not errors:
 | |
|             errors = 'strict'
 | |
|         self.assertEqual(out.lower(), f'{encoding}/{errors}')
 | |
| 
 | |
|     def check_io_encoding(self, module):
 | |
|         self._check_io_encoding(module, encoding="latin1")
 | |
|         self._check_io_encoding(module, errors="namereplace")
 | |
|         self._check_io_encoding(module,
 | |
|                                 encoding="latin1", errors="namereplace")
 | |
| 
 | |
|     def test_io_encoding(self):
 | |
|         self.check_io_encoding('io')
 | |
| 
 | |
|     def test_pyio_encoding(self):
 | |
|         self.check_io_encoding('_pyio')
 | |
| 
 | |
|     def test_locale_getpreferredencoding(self):
 | |
|         code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
 | |
|         out = self.get_output('-X', 'utf8', '-c', code)
 | |
|         self.assertEqual(out, 'utf-8 utf-8')
 | |
| 
 | |
|         for loc in POSIX_LOCALES:
 | |
|             with self.subTest(LC_ALL=loc):
 | |
|                 out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
 | |
|                 self.assertEqual(out, 'utf-8 utf-8')
 | |
| 
 | |
|     @unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
 | |
|     def test_cmd_line(self):
 | |
|         arg = 'h\xe9\u20ac'.encode('utf-8')
 | |
|         arg_utf8 = arg.decode('utf-8')
 | |
|         arg_ascii = arg.decode('ascii', 'surrogateescape')
 | |
|         code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))'
 | |
| 
 | |
|         def check(utf8_opt, expected, **kw):
 | |
|             out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw)
 | |
|             args = out.partition(':')[2].rstrip()
 | |
|             self.assertEqual(args, ascii(expected), out)
 | |
| 
 | |
|         check('utf8', [arg_utf8])
 | |
|         for loc in POSIX_LOCALES:
 | |
|             with self.subTest(LC_ALL=loc):
 | |
|                 check('utf8', [arg_utf8], LC_ALL=loc)
 | |
| 
 | |
|         if sys.platform == 'darwin' or support.is_android or VXWORKS:
 | |
|             c_arg = arg_utf8
 | |
|         elif sys.platform.startswith("aix"):
 | |
|             c_arg = arg.decode('iso-8859-1')
 | |
|         else:
 | |
|             c_arg = arg_ascii
 | |
|         for loc in POSIX_LOCALES:
 | |
|             with self.subTest(LC_ALL=loc):
 | |
|                 check('utf8=0', [c_arg], LC_ALL=loc)
 | |
| 
 | |
|     def test_optim_level(self):
 | |
|         # CPython: check that Py_Main() doesn't increment Py_OptimizeFlag
 | |
|         # twice when -X utf8 requires to parse the configuration twice (when
 | |
|         # the encoding changes after reading the configuration, the
 | |
|         # configuration is read again with the new encoding).
 | |
|         code = 'import sys; print(sys.flags.optimize)'
 | |
|         out = self.get_output('-X', 'utf8', '-O', '-c', code)
 | |
|         self.assertEqual(out, '1')
 | |
|         out = self.get_output('-X', 'utf8', '-OO', '-c', code)
 | |
|         self.assertEqual(out, '2')
 | |
| 
 | |
|         code = 'import sys; print(sys.flags.ignore_environment)'
 | |
|         out = self.get_output('-X', 'utf8', '-E', '-c', code)
 | |
|         self.assertEqual(out, '1')
 | |
| 
 | |
|     @unittest.skipIf(MS_WINDOWS,
 | |
|                      "os.device_encoding() doesn't implement "
 | |
|                      "the UTF-8 Mode on Windows")
 | |
|     @support.requires_subprocess()
 | |
|     def test_device_encoding(self):
 | |
|         # Use stdout as TTY
 | |
|         if not sys.stdout.isatty():
 | |
|             self.skipTest("sys.stdout is not a TTY")
 | |
| 
 | |
|         filename = 'out.txt'
 | |
|         self.addCleanup(os_helper.unlink, filename)
 | |
| 
 | |
|         code = (f'import os, sys; fd = sys.stdout.fileno(); '
 | |
|                 f'out = open({filename!r}, "w", encoding="utf-8"); '
 | |
|                 f'print(os.isatty(fd), os.device_encoding(fd), file=out); '
 | |
|                 f'out.close()')
 | |
|         cmd = [sys.executable, '-X', 'utf8', '-c', code]
 | |
|         # The stdout TTY is inherited to the child process
 | |
|         proc = subprocess.run(cmd, text=True)
 | |
|         self.assertEqual(proc.returncode, 0, proc)
 | |
| 
 | |
|         # In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY
 | |
|         with open(filename, encoding="utf8") as fp:
 | |
|             out = fp.read().rstrip()
 | |
|         self.assertEqual(out, 'True utf-8')
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     unittest.main()
 | 
