bpo-10030: Sped up reading encrypted ZIP files by 2 times. (#550)

This commit is contained in:
Serhiy Storchaka 2017-03-30 19:09:08 +03:00 committed by GitHub
parent d4edfc9abf
commit 06e522521c
2 changed files with 52 additions and 52 deletions

View file

@ -509,65 +509,63 @@ class ZipInfo (object):
return self.filename[-1] == '/' return self.filename[-1] == '/'
class _ZipDecrypter: # ZIP encryption uses the CRC32 one-byte primitive for scrambling some
"""Class to handle decryption of files stored within a ZIP archive. # internal keys. We noticed that a direct implementation is faster than
# relying on binascii.crc32().
ZIP supports a password-based form of encryption. Even though known _crctable = None
plaintext attacks have been found against it, it is still useful def _gen_crc(crc):
to be able to get data out of such a file.
Usage:
zd = _ZipDecrypter(mypwd)
plain_char = zd(cypher_char)
plain_text = map(zd, cypher_text)
"""
def _GenerateCRCTable():
"""Generate a CRC-32 table.
ZIP encryption uses the CRC32 one-byte primitive for scrambling some
internal keys. We noticed that a direct implementation is faster than
relying on binascii.crc32().
"""
poly = 0xedb88320
table = [0] * 256
for i in range(256):
crc = i
for j in range(8): for j in range(8):
if crc & 1: if crc & 1:
crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly crc = (crc >> 1) ^ 0xEDB88320
else: else:
crc = ((crc >> 1) & 0x7FFFFFFF) crc >>= 1
table[i] = crc return crc
return table
crctable = None
def _crc32(self, ch, crc): # ZIP supports a password-based form of encryption. Even though known
# plaintext attacks have been found against it, it is still useful
# to be able to get data out of such a file.
#
# Usage:
# zd = _ZipDecrypter(mypwd)
# plain_bytes = zd(cypher_bytes)
def _ZipDecrypter(pwd):
key0 = 305419896
key1 = 591751049
key2 = 878082192
global _crctable
if _crctable is None:
_crctable = list(map(_gen_crc, range(256)))
crctable = _crctable
def crc32(ch, crc):
"""Compute the CRC32 primitive on one byte.""" """Compute the CRC32 primitive on one byte."""
return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff] return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
def update_keys(c):
nonlocal key0, key1, key2
key0 = crc32(c, key0)
key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
key2 = crc32(key1 >> 24, key2)
def __init__(self, pwd):
if _ZipDecrypter.crctable is None:
_ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
self.key0 = 305419896
self.key1 = 591751049
self.key2 = 878082192
for p in pwd: for p in pwd:
self._UpdateKeys(p) update_keys(p)
def _UpdateKeys(self, c): def decrypter(data):
self.key0 = self._crc32(c, self.key0) """Decrypt a bytes object."""
self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 result = bytearray()
self.key1 = (self.key1 * 134775813 + 1) & 4294967295 append = result.append
self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2) for c in data:
k = key2 | 2
c ^= ((k * (k^1)) >> 8) & 0xFF
update_keys(c)
append(c)
return bytes(result)
def __call__(self, c): return decrypter
"""Decrypt a single character."""
assert isinstance(c, int)
k = self.key2 | 2
c = c ^ (((k * (k^1)) >> 8) & 255)
self._UpdateKeys(c)
return c
class LZMACompressor: class LZMACompressor:
@ -953,7 +951,7 @@ class ZipExtFile(io.BufferedIOBase):
raise EOFError raise EOFError
if self._decrypter is not None: if self._decrypter is not None:
data = bytes(map(self._decrypter, data)) data = self._decrypter(data)
return data return data
def close(self): def close(self):
@ -1411,7 +1409,7 @@ class ZipFile:
# or the MSB of the file time depending on the header type # or the MSB of the file time depending on the header type
# and is used to check the correctness of the password. # and is used to check the correctness of the password.
header = zef_file.read(12) header = zef_file.read(12)
h = list(map(zd, header[0:12])) h = zd(header[0:12])
if zinfo.flag_bits & 0x8: if zinfo.flag_bits & 0x8:
# compare against the file type from extended local headers # compare against the file type from extended local headers
check_byte = (zinfo._raw_time >> 8) & 0xff check_byte = (zinfo._raw_time >> 8) & 0xff

View file

@ -301,6 +301,8 @@ Extension Modules
Library Library
------- -------
- bpo-10030: Sped up reading encrypted ZIP files by 2 times.
- bpo-29204: Element.getiterator() and the html parameter of XMLParser() were - bpo-29204: Element.getiterator() and the html parameter of XMLParser() were
deprecated only in the documentation (since Python 3.2 and 3.4 correspondintly). deprecated only in the documentation (since Python 3.2 and 3.4 correspondintly).
Now using them emits a deprecation warning. Now using them emits a deprecation warning.