Fix GzipFile's handling of filenames given as bytes objects.

Add relevant tests for GzipFile, and also for BZ2File and LZMAFile.
2025-11-18 18:31:56 +00:00 · 2012-06-20 01:48:50 +02:00 · 2012-06-20 01:48:50 +02:00 · 10c8791978
commit 10c8791978
parent e67f48ce5e 103e8113e4
5 changed files with 59 additions and 4 deletions
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@ -182,9 +182,8 @@ class GzipFile(io.BufferedIOBase):
        if fileobj is None:
            fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
        if filename is None:
-            if hasattr(fileobj, 'name') and isinstance(fileobj.name, str):
+            filename = getattr(fileobj, 'name', '')
-                filename = fileobj.name
+            if not isinstance(filename, (str, bytes)):
            else:
                filename = ''
        if mode is None:
            mode = getattr(fileobj, 'mode', 'rb')
@ -258,7 +257,8 @@ class GzipFile(io.BufferedIOBase):
            # RFC 1952 requires the FNAME field to be Latin-1. Do not
            # include filenames that cannot be represented that way.
            fname = os.path.basename(self.name)
-            fname = fname.encode('latin-1')
+            if not isinstance(fname, bytes):
                fname = fname.encode('latin-1')
            if fname.endswith(b'.gz'):
                fname = fname[:-3]
        except UnicodeEncodeError:
--- a/Lib/test/test_bz2.py
+++ b/Lib/test/test_bz2.py
@ -522,6 +522,21 @@ class BZ2FileTest(BaseTest):
        with BZ2File(self.filename) as bz2f:
            self.assertEqual(bz2f.read(), data1 + data2)
    def testOpenBytesFilename(self):
        str_filename = self.filename
        try:
            bytes_filename = str_filename.encode("ascii")
        except UnicodeEncodeError:
            self.skipTest("Temporary file name needs to be ASCII")
        with BZ2File(bytes_filename, "wb") as f:
            f.write(self.DATA)
        with BZ2File(bytes_filename, "rb") as f:
            self.assertEqual(f.read(), self.DATA)
        # Sanity check that we are actually operating on the right file.
        with BZ2File(str_filename, "rb") as f:
            self.assertEqual(f.read(), self.DATA)
    # Tests for a BZ2File wrapping another file object:
    def testReadBytesIO(self):
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@ -355,6 +355,20 @@ class TestGzip(BaseTest):
            with gzip.GzipFile(fileobj=f, mode="w") as g:
                pass
    def test_bytes_filename(self):
        str_filename = self.filename
        try:
            bytes_filename = str_filename.encode("ascii")
        except UnicodeEncodeError:
            self.skipTest("Temporary file name needs to be ASCII")
        with gzip.GzipFile(bytes_filename, "wb") as f:
            f.write(data1 * 50)
        with gzip.GzipFile(bytes_filename, "rb") as f:
            self.assertEqual(f.read(), data1 * 50)
        # Sanity check that we are actually operating on the right file.
        with gzip.GzipFile(str_filename, "rb") as f:
            self.assertEqual(f.read(), data1 * 50)
    # Testing compress/decompress shortcut functions
    def test_compress(self):
--- a/Lib/test/test_lzma.py
+++ b/Lib/test/test_lzma.py
@ -655,6 +655,16 @@ class FileTestCase(unittest.TestCase):
                self.assertEqual(f.read(), INPUT)
                self.assertEqual(f.read(), b"")
    def test_read_from_file_with_bytes_filename(self):
        try:
            bytes_filename = TESTFN.encode("ascii")
        except UnicodeEncodeError:
            self.skipTest("Temporary file name needs to be ASCII")
        with TempFile(TESTFN, COMPRESSED_XZ):
            with LZMAFile(bytes_filename) as f:
                self.assertEqual(f.read(), INPUT)
                self.assertEqual(f.read(), b"")
    def test_read_incomplete(self):
        with LZMAFile(BytesIO(COMPRESSED_XZ[:128])) as f:
            self.assertRaises(EOFError, f.read)
@ -814,6 +824,20 @@ class FileTestCase(unittest.TestCase):
        finally:
            unlink(TESTFN)
    def test_write_to_file_with_bytes_filename(self):
        try:
            bytes_filename = TESTFN.encode("ascii")
        except UnicodeEncodeError:
            self.skipTest("Temporary file name needs to be ASCII")
        try:
            with LZMAFile(bytes_filename, "w") as f:
                f.write(INPUT)
            expected = lzma.compress(INPUT)
            with open(TESTFN, "rb") as f:
                self.assertEqual(f.read(), expected)
        finally:
            unlink(TESTFN)
    def test_write_append_to_file(self):
        part1 = INPUT[:1024]
        part2 = INPUT[1024:1536]
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -31,6 +31,8 @@ Core and Builtins
 Library
 -------
 - Fix GzipFile's handling of filenames given as bytes objects.
 - Issue #14772: Return destination values from some shutil functions.
 - Issue #15064: Implement context manager protocol for multiprocessing types