bpo-10030: Sped up reading encrypted ZIP files by 2 times. (#550)

This commit is contained in:
Serhiy Storchaka 2017-03-30 19:09:08 +03:00 committed by GitHub
parent d4edfc9abf
commit 06e522521c
2 changed files with 52 additions and 52 deletions

View file

@ -509,65 +509,63 @@ class ZipInfo (object):
return self.filename[-1] == '/' return self.filename[-1] == '/'
class _ZipDecrypter: # ZIP encryption uses the CRC32 one-byte primitive for scrambling some
"""Class to handle decryption of files stored within a ZIP archive. # internal keys. We noticed that a direct implementation is faster than
# relying on binascii.crc32().
ZIP supports a password-based form of encryption. Even though known _crctable = None
plaintext attacks have been found against it, it is still useful def _gen_crc(crc):
to be able to get data out of such a file. for j in range(8):
if crc & 1:
crc = (crc >> 1) ^ 0xEDB88320
else:
crc >>= 1
return crc
Usage: # ZIP supports a password-based form of encryption. Even though known
zd = _ZipDecrypter(mypwd) # plaintext attacks have been found against it, it is still useful
plain_char = zd(cypher_char) # to be able to get data out of such a file.
plain_text = map(zd, cypher_text) #
""" # Usage:
# zd = _ZipDecrypter(mypwd)
# plain_bytes = zd(cypher_bytes)
def _GenerateCRCTable(): def _ZipDecrypter(pwd):
"""Generate a CRC-32 table. key0 = 305419896
key1 = 591751049
key2 = 878082192
ZIP encryption uses the CRC32 one-byte primitive for scrambling some global _crctable
internal keys. We noticed that a direct implementation is faster than if _crctable is None:
relying on binascii.crc32(). _crctable = list(map(_gen_crc, range(256)))
""" crctable = _crctable
poly = 0xedb88320
table = [0] * 256
for i in range(256):
crc = i
for j in range(8):
if crc & 1:
crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
else:
crc = ((crc >> 1) & 0x7FFFFFFF)
table[i] = crc
return table
crctable = None
def _crc32(self, ch, crc): def crc32(ch, crc):
"""Compute the CRC32 primitive on one byte.""" """Compute the CRC32 primitive on one byte."""
return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff] return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
def __init__(self, pwd): def update_keys(c):
if _ZipDecrypter.crctable is None: nonlocal key0, key1, key2
_ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable() key0 = crc32(c, key0)
self.key0 = 305419896 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
self.key1 = 591751049 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
self.key2 = 878082192 key2 = crc32(key1 >> 24, key2)
for p in pwd:
self._UpdateKeys(p)
def _UpdateKeys(self, c): for p in pwd:
self.key0 = self._crc32(c, self.key0) update_keys(p)
self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
self.key1 = (self.key1 * 134775813 + 1) & 4294967295
self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
def __call__(self, c): def decrypter(data):
"""Decrypt a single character.""" """Decrypt a bytes object."""
assert isinstance(c, int) result = bytearray()
k = self.key2 | 2 append = result.append
c = c ^ (((k * (k^1)) >> 8) & 255) for c in data:
self._UpdateKeys(c) k = key2 | 2
return c c ^= ((k * (k^1)) >> 8) & 0xFF
update_keys(c)
append(c)
return bytes(result)
return decrypter
class LZMACompressor: class LZMACompressor:
@ -953,7 +951,7 @@ class ZipExtFile(io.BufferedIOBase):
raise EOFError raise EOFError
if self._decrypter is not None: if self._decrypter is not None:
data = bytes(map(self._decrypter, data)) data = self._decrypter(data)
return data return data
def close(self): def close(self):
@ -1411,7 +1409,7 @@ class ZipFile:
# or the MSB of the file time depending on the header type # or the MSB of the file time depending on the header type
# and is used to check the correctness of the password. # and is used to check the correctness of the password.
header = zef_file.read(12) header = zef_file.read(12)
h = list(map(zd, header[0:12])) h = zd(header[0:12])
if zinfo.flag_bits & 0x8: if zinfo.flag_bits & 0x8:
# compare against the file type from extended local headers # compare against the file type from extended local headers
check_byte = (zinfo._raw_time >> 8) & 0xff check_byte = (zinfo._raw_time >> 8) & 0xff

View file

@ -301,6 +301,8 @@ Extension Modules
Library Library
------- -------
- bpo-10030: Sped up reading encrypted ZIP files by 2 times.
- bpo-29204: Element.getiterator() and the html parameter of XMLParser() were - bpo-29204: Element.getiterator() and the html parameter of XMLParser() were
deprecated only in the documentation (since Python 3.2 and 3.4 correspondintly). deprecated only in the documentation (since Python 3.2 and 3.4 correspondintly).
Now using them emits a deprecation warning. Now using them emits a deprecation warning.