Another round on SF patch 618135: gzip.py and files > 2G

The last round boosted "the limit" from 2GB to 4GB.  This round gets
rid of the 4GB limit.  For files > 4GB, gzip stores just the last 32
bits of the file size, and now we play along with that too.  Tested
by hand (on a 6+GB file) on Win2K.

Boosting from 2GB to 4GB was arguably enough "a bugfix".  Going beyond
that smells more like "new feature" to me.
This commit is contained in:
Tim Peters 2002-11-05 20:38:55 +00:00
parent cd8fdbb3fa
commit 9288f95cb5
2 changed files with 15 additions and 7 deletions

View file

@ -24,6 +24,10 @@ def U32(i):
i += 1L << 32
return i
def LOWU32(i):
"""Return the low-order 32 bits of an int, as a non-negative int."""
return i & 0xFFFFFFFFL
def write32(output, value):
output.write(struct.pack("<l", value))
@ -295,21 +299,22 @@ class GzipFile:
# We've read to the end of the file, so we have to rewind in order
# to reread the 8 bytes containing the CRC and the file size.
# We check the that the computed CRC and size of the
# uncompressed data matches the stored values.
# uncompressed data matches the stored values. Note that the size
# stored is the true file size mod 2**32.
self.fileobj.seek(-8, 1)
crc32 = read32(self.fileobj)
isize = U32(read32(self.fileobj)) # may exceed 2GB
if U32(crc32) != U32(self.crc):
raise ValueError, "CRC check failed"
elif isize != self.size:
elif isize != LOWU32(self.size):
raise ValueError, "Incorrect length of data produced"
def close(self):
if self.mode == WRITE:
self.fileobj.write(self.compress.flush())
write32(self.fileobj, self.crc)
# self.size may exceed 2GB
write32u(self.fileobj, self.size)
# self.size may exceed 2GB, or even 4GB
write32u(self.fileobj, LOWU32(self.size))
self.fileobj = None
elif self.mode == READ:
self.fileobj = None