mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Issue #16350, part 2: Set unused_data (and unconsumed_tail) correctly in decompressobj().flush().
Additionally, fix a bug where a MemoryError in allocating a bytes object could leave the decompressor object in an invalid state (with its unconsumed_tail member being NULL). Patch by Serhiy Storchaka.
This commit is contained in:
parent
67f089f860
commit
252f4dc6c9
3 changed files with 80 additions and 56 deletions
|
@ -429,15 +429,27 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
|
||||||
def test_decompress_unused_data(self):
|
def test_decompress_unused_data(self):
|
||||||
# Repeated calls to decompress() after EOF should accumulate data in
|
# Repeated calls to decompress() after EOF should accumulate data in
|
||||||
# dco.unused_data, instead of just storing the arg to the last call.
|
# dco.unused_data, instead of just storing the arg to the last call.
|
||||||
x = zlib.compress(HAMLET_SCENE) + HAMLET_SCENE
|
source = b'abcdefghijklmnopqrstuvwxyz'
|
||||||
for step in 1, 2, 100:
|
remainder = b'0123456789'
|
||||||
|
y = zlib.compress(source)
|
||||||
|
x = y + remainder
|
||||||
|
for maxlen in 0, 1000:
|
||||||
|
for step in 1, 2, len(y), len(x):
|
||||||
dco = zlib.decompressobj()
|
dco = zlib.decompressobj()
|
||||||
data = b''.join(dco.decompress(x[i : i + step])
|
data = b''
|
||||||
for i in range(0, len(x), step))
|
for i in range(0, len(x), step):
|
||||||
|
if i < len(y):
|
||||||
|
self.assertEqual(dco.unused_data, b'')
|
||||||
|
if maxlen == 0:
|
||||||
|
data += dco.decompress(x[i : i + step])
|
||||||
|
self.assertEqual(dco.unconsumed_tail, b'')
|
||||||
|
else:
|
||||||
|
data += dco.decompress(
|
||||||
|
dco.unconsumed_tail + x[i : i + step], maxlen)
|
||||||
data += dco.flush()
|
data += dco.flush()
|
||||||
|
self.assertEqual(data, source)
|
||||||
self.assertEqual(data, HAMLET_SCENE)
|
self.assertEqual(dco.unconsumed_tail, b'')
|
||||||
self.assertEqual(dco.unused_data, HAMLET_SCENE)
|
self.assertEqual(dco.unused_data, remainder)
|
||||||
|
|
||||||
if hasattr(zlib.decompressobj(), "copy"):
|
if hasattr(zlib.decompressobj(), "copy"):
|
||||||
def test_decompresscopy(self):
|
def test_decompresscopy(self):
|
||||||
|
|
|
@ -140,9 +140,11 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
- Issue #16350: zlib.Decompress.decompress() now accumulates data from
|
- Issue #16350: zlib.decompressobj().decompress() now accumulates data from
|
||||||
successive calls after EOF in unused_data, instead of only saving the argument
|
successive calls after EOF in unused_data, instead of only saving the argument
|
||||||
to the last call. Patch by Serhiy Storchaka.
|
to the last call. decompressobj().flush() now correctly sets unused_data and
|
||||||
|
unconsumed_tail. A bug in the handling of MemoryError when setting the
|
||||||
|
unconsumed_tail attribute has also been fixed. Patch by Serhiy Storchaka.
|
||||||
|
|
||||||
- Issue #12759: sre_parse now raises a proper error when the name of the group
|
- Issue #12759: sre_parse now raises a proper error when the name of the group
|
||||||
is missing. Initial patch by Serhiy Storchaka.
|
is missing. Initial patch by Serhiy Storchaka.
|
||||||
|
|
|
@ -467,6 +467,49 @@ PyZlib_objcompress(compobject *self, PyObject *args)
|
||||||
return RetVal;
|
return RetVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Helper for objdecompress() and unflush(). Saves any unconsumed input data in
|
||||||
|
self->unused_data or self->unconsumed_tail, as appropriate. */
|
||||||
|
static int
|
||||||
|
save_unconsumed_input(compobject *self, int err)
|
||||||
|
{
|
||||||
|
if (err == Z_STREAM_END) {
|
||||||
|
/* The end of the compressed data has been reached. Store the leftover
|
||||||
|
input data in self->unused_data. */
|
||||||
|
if (self->zst.avail_in > 0) {
|
||||||
|
Py_ssize_t old_size = PyString_GET_SIZE(self->unused_data);
|
||||||
|
Py_ssize_t new_size;
|
||||||
|
PyObject *new_data;
|
||||||
|
if (self->zst.avail_in > PY_SSIZE_T_MAX - old_size) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
new_size = old_size + self->zst.avail_in;
|
||||||
|
new_data = PyString_FromStringAndSize(NULL, new_size);
|
||||||
|
if (new_data == NULL)
|
||||||
|
return -1;
|
||||||
|
Py_MEMCPY(PyString_AS_STRING(new_data),
|
||||||
|
PyString_AS_STRING(self->unused_data), old_size);
|
||||||
|
Py_MEMCPY(PyString_AS_STRING(new_data) + old_size,
|
||||||
|
self->zst.next_in, self->zst.avail_in);
|
||||||
|
Py_DECREF(self->unused_data);
|
||||||
|
self->unused_data = new_data;
|
||||||
|
self->zst.avail_in = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (self->zst.avail_in > 0 || PyString_GET_SIZE(self->unconsumed_tail)) {
|
||||||
|
/* This code handles two distinct cases:
|
||||||
|
1. Output limit was reached. Save leftover input in unconsumed_tail.
|
||||||
|
2. All input data was consumed. Clear unconsumed_tail. */
|
||||||
|
PyObject *new_data = PyString_FromStringAndSize(
|
||||||
|
(char *)self->zst.next_in, self->zst.avail_in);
|
||||||
|
if (new_data == NULL)
|
||||||
|
return -1;
|
||||||
|
Py_DECREF(self->unconsumed_tail);
|
||||||
|
self->unconsumed_tail = new_data;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(decomp_decompress__doc__,
|
PyDoc_STRVAR(decomp_decompress__doc__,
|
||||||
"decompress(data, max_length) -- Return a string containing the decompressed\n"
|
"decompress(data, max_length) -- Return a string containing the decompressed\n"
|
||||||
"version of the data.\n"
|
"version of the data.\n"
|
||||||
|
@ -541,60 +584,20 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
|
||||||
Py_END_ALLOW_THREADS
|
Py_END_ALLOW_THREADS
|
||||||
}
|
}
|
||||||
|
|
||||||
if(max_length) {
|
if (save_unconsumed_input(self, err) < 0) {
|
||||||
/* Not all of the compressed data could be accommodated in a buffer of
|
|
||||||
the specified size. Return the unconsumed tail in an attribute. */
|
|
||||||
Py_DECREF(self->unconsumed_tail);
|
|
||||||
self->unconsumed_tail = PyString_FromStringAndSize((char *)self->zst.next_in,
|
|
||||||
self->zst.avail_in);
|
|
||||||
}
|
|
||||||
else if (PyString_GET_SIZE(self->unconsumed_tail) > 0) {
|
|
||||||
/* All of the compressed data was consumed. Clear unconsumed_tail. */
|
|
||||||
Py_DECREF(self->unconsumed_tail);
|
|
||||||
self->unconsumed_tail = PyString_FromStringAndSize("", 0);
|
|
||||||
}
|
|
||||||
if(!self->unconsumed_tail) {
|
|
||||||
Py_DECREF(RetVal);
|
Py_DECREF(RetVal);
|
||||||
RetVal = NULL;
|
RetVal = NULL;
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The end of the compressed data has been reached, so set the
|
/* This is the logical place to call inflateEnd, but the old behaviour of
|
||||||
unused_data attribute to a string containing the remainder of the
|
only calling it on flush() is preserved. */
|
||||||
data in the string. Note that this is also a logical place to call
|
|
||||||
inflateEnd, but the old behaviour of only calling it on flush() is
|
if (err != Z_STREAM_END && err != Z_OK && err != Z_BUF_ERROR) {
|
||||||
preserved.
|
|
||||||
*/
|
|
||||||
if (err == Z_STREAM_END) {
|
|
||||||
if (self->zst.avail_in > 0) {
|
|
||||||
/* Append the leftover data to the existing value of unused_data. */
|
|
||||||
Py_ssize_t old_size = PyString_GET_SIZE(self->unused_data);
|
|
||||||
Py_ssize_t new_size = old_size + self->zst.avail_in;
|
|
||||||
PyObject *new_data;
|
|
||||||
if (new_size <= old_size) { /* Check for overflow. */
|
|
||||||
PyErr_NoMemory();
|
|
||||||
Py_DECREF(RetVal);
|
|
||||||
RetVal = NULL;
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
new_data = PyString_FromStringAndSize(NULL, new_size);
|
|
||||||
if (new_data == NULL) {
|
|
||||||
Py_DECREF(RetVal);
|
|
||||||
RetVal = NULL;
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
Py_MEMCPY(PyString_AS_STRING(new_data),
|
|
||||||
PyString_AS_STRING(self->unused_data), old_size);
|
|
||||||
Py_MEMCPY(PyString_AS_STRING(new_data) + old_size,
|
|
||||||
self->zst.next_in, self->zst.avail_in);
|
|
||||||
Py_DECREF(self->unused_data);
|
|
||||||
self->unused_data = new_data;
|
|
||||||
}
|
|
||||||
/* We will only get Z_BUF_ERROR if the output buffer was full
|
/* We will only get Z_BUF_ERROR if the output buffer was full
|
||||||
but there wasn't more output when we tried again, so it is
|
but there wasn't more output when we tried again, so it is
|
||||||
not an error condition.
|
not an error condition.
|
||||||
*/
|
*/
|
||||||
} else if (err != Z_OK && err != Z_BUF_ERROR) {
|
|
||||||
zlib_error(self->zst, err, "while decompressing");
|
zlib_error(self->zst, err, "while decompressing");
|
||||||
Py_DECREF(RetVal);
|
Py_DECREF(RetVal);
|
||||||
RetVal = NULL;
|
RetVal = NULL;
|
||||||
|
@ -848,6 +851,12 @@ PyZlib_unflush(compobject *self, PyObject *args)
|
||||||
Py_END_ALLOW_THREADS
|
Py_END_ALLOW_THREADS
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (save_unconsumed_input(self, err) < 0) {
|
||||||
|
Py_DECREF(retval);
|
||||||
|
retval = NULL;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
/* If flushmode is Z_FINISH, we also have to call deflateEnd() to free
|
/* If flushmode is Z_FINISH, we also have to call deflateEnd() to free
|
||||||
various data structures. Note we should only get Z_STREAM_END when
|
various data structures. Note we should only get Z_STREAM_END when
|
||||||
flushmode is Z_FINISH */
|
flushmode is Z_FINISH */
|
||||||
|
@ -861,6 +870,7 @@ PyZlib_unflush(compobject *self, PyObject *args)
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_PyString_Resize(&retval, self->zst.total_out - start_total_out);
|
_PyString_Resize(&retval, self->zst.total_out - start_total_out);
|
||||||
|
|
||||||
error:
|
error:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue