Normalize the encoding names for Latin-1 and UTF-8 to

'latin-1' and 'utf-8'. These are optimized in the Python Unicode implementation to result in more direct processing, bypassing the codec registry. Also see issue11303.
2025-12-04 00:30:19 +00:00 · 2011-02-25 15:42:01 +00:00 · 2011-02-25 15:42:01 +00:00 · 8f36af7a4c
commit 8f36af7a4c
parent a391b11320
32 changed files with 84 additions and 79 deletions
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@ -1289,7 +1289,7 @@ class UstarUnicodeTest(unittest.TestCase):
        self._test_unicode_filename("utf7")

    def test_utf8_filename(self):
-        self._test_unicode_filename("utf8")
+        self._test_unicode_filename("utf-8")

    def _test_unicode_filename(self, encoding):
        tar = tarfile.open(tmpname, "w", format=self.format, encoding=encoding, errors="strict")
@ -1368,7 +1368,7 @@ class GNUUnicodeTest(UstarUnicodeTest):
    def test_bad_pax_header(self):
        # Test for issue #8633. GNU tar <= 1.23 creates raw binary fields
        # without a hdrcharset=BINARY header.
-        for encoding, name in (("utf8", "pax/bad-pax-\udce4\udcf6\udcfc"),
+        for encoding, name in (("utf-8", "pax/bad-pax-\udce4\udcf6\udcfc"),
                ("iso8859-1", "pax/bad-pax-\xe4\xf6\xfc"),):
            with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar:
                try:
@ -1383,7 +1383,7 @@ class PAXUnicodeTest(UstarUnicodeTest):

    def test_binary_header(self):
        # Test a POSIX.1-2008 compatible header with a hdrcharset=BINARY field.
-        for encoding, name in (("utf8", "pax/hdrcharset-\udce4\udcf6\udcfc"),
+        for encoding, name in (("utf-8", "pax/hdrcharset-\udce4\udcf6\udcfc"),
                ("iso8859-1", "pax/hdrcharset-\xe4\xf6\xfc"),):
            with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar:
                try: