Another round on SF patch 618135: gzip.py and files > 2G

The last round boosted "the limit" from 2GB to 4GB. This round gets rid of the 4GB limit. For files > 4GB, gzip stores just the last 32 bits of the file size, and now we play along with that too. Tested by hand (on a 6+GB file) on Win2K. Boosting from 2GB to 4GB was arguably enough "a bugfix". Going beyond that smells more like "new feature" to me.
2025-10-17 20:28:43 +00:00 · 2002-11-05 20:38:55 +00:00 · 2002-11-05 20:38:55 +00:00 · 9288f95cb5
commit 9288f95cb5
parent cd8fdbb3fa
2 changed files with 15 additions and 7 deletions
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@ -24,6 +24,10 @@ def U32(i):
        i += 1L << 32
    return i

+def LOWU32(i):
+    """Return the low-order 32 bits of an int, as a non-negative int."""
+    return i & 0xFFFFFFFFL
+
 def write32(output, value):
    output.write(struct.pack("<l", value))

@ -295,21 +299,22 @@ class GzipFile:
        # We've read to the end of the file, so we have to rewind in order
        # to reread the 8 bytes containing the CRC and the file size.
        # We check the that the computed CRC and size of the
-        # uncompressed data matches the stored values.
+        # uncompressed data matches the stored values.  Note that the size
+        # stored is the true file size mod 2**32.
        self.fileobj.seek(-8, 1)
        crc32 = read32(self.fileobj)
        isize = U32(read32(self.fileobj))   # may exceed 2GB
        if U32(crc32) != U32(self.crc):
            raise ValueError, "CRC check failed"
-        elif isize != self.size:
+        elif isize != LOWU32(self.size):
            raise ValueError, "Incorrect length of data produced"

    def close(self):
        if self.mode == WRITE:
            self.fileobj.write(self.compress.flush())
            write32(self.fileobj, self.crc)
-            # self.size may exceed 2GB
-            write32u(self.fileobj, self.size)
+            # self.size may exceed 2GB, or even 4GB
+            write32u(self.fileobj, LOWU32(self.size))
            self.fileobj = None
        elif self.mode == READ:
            self.fileobj = None