Issue #16350, part 2: Set unused_data (and unconsumed_tail) correctly in decompressobj().flush().

Additionally, fix a bug where a MemoryError in allocating a bytes object could leave the decompressor object in an invalid state (with its unconsumed_tail member being NULL). Patch by Serhiy Storchaka.
2025-12-10 11:00:14 +00:00 · 2012-11-11 02:14:15 +01:00 · 2012-11-11 02:14:15 +01:00 · 252f4dc6c9
commit 252f4dc6c9
parent 67f089f860
3 changed files with 80 additions and 56 deletions
--- a/Lib/test/test_zlib.py
+++ b/Lib/test/test_zlib.py
@ -429,15 +429,27 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
    def test_decompress_unused_data(self):
        # Repeated calls to decompress() after EOF should accumulate data in
        # dco.unused_data, instead of just storing the arg to the last call.
-        x = zlib.compress(HAMLET_SCENE) + HAMLET_SCENE
+        source = b'abcdefghijklmnopqrstuvwxyz'
-        for step in 1, 2, 100:
+        remainder = b'0123456789'
        y = zlib.compress(source)
        x = y + remainder
        for maxlen in 0, 1000:
            for step in 1, 2, len(y), len(x):
                dco = zlib.decompressobj()
-            data = b''.join(dco.decompress(x[i : i + step])
+                data = b''
-                            for i in range(0, len(x), step))
+                for i in range(0, len(x), step):
                    if i < len(y):
                        self.assertEqual(dco.unused_data, b'')
                    if maxlen == 0:
                        data += dco.decompress(x[i : i + step])
                        self.assertEqual(dco.unconsumed_tail, b'')
                    else:
                        data += dco.decompress(
                                dco.unconsumed_tail + x[i : i + step], maxlen)
                data += dco.flush()
-
+                self.assertEqual(data, source)
-            self.assertEqual(data, HAMLET_SCENE)
+                self.assertEqual(dco.unconsumed_tail, b'')
-            self.assertEqual(dco.unused_data, HAMLET_SCENE)
+                self.assertEqual(dco.unused_data, remainder)
    if hasattr(zlib.decompressobj(), "copy"):
        def test_decompresscopy(self):
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -140,9 +140,11 @@ Core and Builtins
 Library
 -------
- Issue #16350: zlib.Decompress.decompress() now accumulates data from
+- Issue #16350: zlib.decompressobj().decompress() now accumulates data from
  successive calls after EOF in unused_data, instead of only saving the argument
-  to the last call. Patch by Serhiy Storchaka.
+  to the last call. decompressobj().flush() now correctly sets unused_data and
  unconsumed_tail. A bug in the handling of MemoryError when setting the
  unconsumed_tail attribute has also been fixed. Patch by Serhiy Storchaka.
 - Issue #12759: sre_parse now raises a proper error when the name of the group
  is missing.  Initial patch by Serhiy Storchaka.
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@ -467,6 +467,49 @@ PyZlib_objcompress(compobject *self, PyObject *args)
    return RetVal;
 }
 /* Helper for objdecompress() and unflush(). Saves any unconsumed input data in
   self->unused_data or self->unconsumed_tail, as appropriate. */
 static int
 save_unconsumed_input(compobject *self, int err)
 {
    if (err == Z_STREAM_END) {
        /* The end of the compressed data has been reached. Store the leftover
           input data in self->unused_data. */
        if (self->zst.avail_in > 0) {
            Py_ssize_t old_size = PyString_GET_SIZE(self->unused_data);
            Py_ssize_t new_size;
            PyObject *new_data;
            if (self->zst.avail_in > PY_SSIZE_T_MAX - old_size) {
                PyErr_NoMemory();
                return -1;
            }
            new_size = old_size + self->zst.avail_in;
            new_data = PyString_FromStringAndSize(NULL, new_size);
            if (new_data == NULL)
                return -1;
            Py_MEMCPY(PyString_AS_STRING(new_data),
                      PyString_AS_STRING(self->unused_data), old_size);
            Py_MEMCPY(PyString_AS_STRING(new_data) + old_size,
                      self->zst.next_in, self->zst.avail_in);
            Py_DECREF(self->unused_data);
            self->unused_data = new_data;
            self->zst.avail_in = 0;
        }
    }
    if (self->zst.avail_in > 0 || PyString_GET_SIZE(self->unconsumed_tail)) {
        /* This code handles two distinct cases:
           1. Output limit was reached. Save leftover input in unconsumed_tail.
           2. All input data was consumed. Clear unconsumed_tail. */
        PyObject *new_data = PyString_FromStringAndSize(
                (char *)self->zst.next_in, self->zst.avail_in);
        if (new_data == NULL)
            return -1;
        Py_DECREF(self->unconsumed_tail);
        self->unconsumed_tail = new_data;
    }
    return 0;
 }
 PyDoc_STRVAR(decomp_decompress__doc__,
 "decompress(data, max_length) -- Return a string containing the decompressed\n"
 "version of the data.\n"
@ -541,60 +584,20 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
        Py_END_ALLOW_THREADS
    }
-    if(max_length) {
+    if (save_unconsumed_input(self, err) < 0) {
        /* Not all of the compressed data could be accommodated in a buffer of
           the specified size. Return the unconsumed tail in an attribute. */
        Py_DECREF(self->unconsumed_tail);
        self->unconsumed_tail = PyString_FromStringAndSize((char *)self->zst.next_in,
                                                           self->zst.avail_in);
    }
    else if (PyString_GET_SIZE(self->unconsumed_tail) > 0) {
        /* All of the compressed data was consumed. Clear unconsumed_tail. */
        Py_DECREF(self->unconsumed_tail);
        self->unconsumed_tail = PyString_FromStringAndSize("", 0);
    }
    if(!self->unconsumed_tail) {
        Py_DECREF(RetVal);
        RetVal = NULL;
        goto error;
    }
-    /* The end of the compressed data has been reached, so set the
+    /* This is the logical place to call inflateEnd, but the old behaviour of
-       unused_data attribute to a string containing the remainder of the
+       only calling it on flush() is preserved. */
-       data in the string.  Note that this is also a logical place to call
+
-       inflateEnd, but the old behaviour of only calling it on flush() is
+    if (err != Z_STREAM_END && err != Z_OK && err != Z_BUF_ERROR) {
       preserved.
    */
    if (err == Z_STREAM_END) {
        if (self->zst.avail_in > 0) {
            /* Append the leftover data to the existing value of unused_data. */
            Py_ssize_t old_size = PyString_GET_SIZE(self->unused_data);
            Py_ssize_t new_size = old_size + self->zst.avail_in;
            PyObject *new_data;
            if (new_size <= old_size) {  /* Check for overflow. */
                PyErr_NoMemory();
                Py_DECREF(RetVal);
                RetVal = NULL;
                goto error;
            }
            new_data = PyString_FromStringAndSize(NULL, new_size);
            if (new_data == NULL) {
                Py_DECREF(RetVal);
                RetVal = NULL;
                goto error;
            }
            Py_MEMCPY(PyString_AS_STRING(new_data),
                      PyString_AS_STRING(self->unused_data), old_size);
            Py_MEMCPY(PyString_AS_STRING(new_data) + old_size,
                      self->zst.next_in, self->zst.avail_in);
            Py_DECREF(self->unused_data);
            self->unused_data = new_data;
        }
        /* We will only get Z_BUF_ERROR if the output buffer was full
           but there wasn't more output when we tried again, so it is
           not an error condition.
        */
    } else if (err != Z_OK && err != Z_BUF_ERROR) {
        zlib_error(self->zst, err, "while decompressing");
        Py_DECREF(RetVal);
        RetVal = NULL;
@ -848,6 +851,12 @@ PyZlib_unflush(compobject *self, PyObject *args)
        Py_END_ALLOW_THREADS
    }
    if (save_unconsumed_input(self, err) < 0) {
        Py_DECREF(retval);
        retval = NULL;
        goto error;
    }
    /* If flushmode is Z_FINISH, we also have to call deflateEnd() to free
       various data structures. Note we should only get Z_STREAM_END when
       flushmode is Z_FINISH */
@ -861,6 +870,7 @@ PyZlib_unflush(compobject *self, PyObject *args)
            goto error;
        }
    }
    _PyString_Resize(&retval, self->zst.total_out - start_total_out);
 error: