bpo-41486: Faster bz2/lzma/zlib via new output buffering (GH-21740)

Faster bz2/lzma/zlib via new output buffering. Also adds .readall() function to _compression.DecompressReader class to take best advantage of this in the consume-all-output at once scenario. Often a 5-20% speedup in common scenarios due to less data copying. Contributed by Ma Lin.
2025-10-10 00:43:41 +00:00 · 2021-04-28 14:58:54 +08:00 · 2021-04-28 14:58:54 +08:00 · f9bedb630e
commit f9bedb630e
parent a5e64444e6
7 changed files with 670 additions and 254 deletions
--- a/Lib/_compression.py
+++ b/Lib/_compression.py
@ -1,7 +1,7 @@
 """Internal classes used by the gzip, lzma and bz2 modules"""

 import io
-
+import sys

 BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE  # Compressed data read chunk size

@ -110,6 +110,16 @@ class DecompressReader(io.RawIOBase):
        self._pos += len(data)
        return data

+    def readall(self):
+        chunks = []
+        # sys.maxsize means the max length of output buffer is unlimited,
+        # so that the whole input buffer can be decompressed within one
+        # .decompress() call.
+        while data := self.read(sys.maxsize):
+            chunks.append(data)
+
+        return b"".join(chunks)
+
    # Rewind the file to the beginning of the data stream.
    def _rewind(self):
        self._fp.seek(0)