mirror of
https://github.com/python/cpython.git
synced 2025-12-04 00:30:19 +00:00
Issue #14684: Add support for predefined compression dictionaries to the zlib module.
Original patch by Sam Rushing.
This commit is contained in:
parent
50b0a365ba
commit
fd8a838d58
4 changed files with 152 additions and 28 deletions
|
|
@ -58,12 +58,19 @@ The available exception and functions in this module are:
|
|||
exception if any error occurs.
|
||||
|
||||
|
||||
.. function:: compressobj([level])
|
||||
.. function:: compressobj([level[, method[, wbits[, memlevel[, strategy[, zdict]]]]]])
|
||||
|
||||
Returns a compression object, to be used for compressing data streams that won't
|
||||
fit into memory at once. *level* is an integer from ``1`` to ``9`` controlling
|
||||
the level of compression; ``1`` is fastest and produces the least compression,
|
||||
``9`` is slowest and produces the most. The default value is ``6``.
|
||||
fit into memory at once.
|
||||
|
||||
*level* is an integer from ``1`` to ``9`` controlling the level of
|
||||
compression; ``1`` is fastest and produces the least compression, ``9`` is
|
||||
slowest and produces the most. The default value is ``6``.
|
||||
|
||||
*zdict* is a predefined compression dictionary. This is a sequence of bytes
|
||||
(such as a :class:`bytes` object) containing subsequences that are expected
|
||||
to occur frequently in the data that is to be compressed. Those subsequences
|
||||
that are expected to be most common should come at the end of the dictionary.
|
||||
|
||||
|
||||
.. function:: crc32(data[, value])
|
||||
|
|
@ -114,11 +121,21 @@ The available exception and functions in this module are:
|
|||
to :c:func:`malloc`. The default size is 16384.
|
||||
|
||||
|
||||
.. function:: decompressobj([wbits])
|
||||
.. function:: decompressobj([wbits[, zdict]])
|
||||
|
||||
Returns a decompression object, to be used for decompressing data streams that
|
||||
won't fit into memory at once. The *wbits* parameter controls the size of the
|
||||
window buffer.
|
||||
won't fit into memory at once.
|
||||
|
||||
The *wbits* parameter controls the size of the window buffer.
|
||||
|
||||
The *zdict* parameter specifies a predefined compression dictionary. If
|
||||
provided, this must be the same dictionary as was used by the compressor that
|
||||
produced the data that is to be decompressed.
|
||||
|
||||
.. note::
|
||||
If *zdict* is a mutable object (such as a :class:`bytearray`), you must not
|
||||
modify its contents between the call to :func:`decompressobj` and the first
|
||||
call to the decompressor's ``decompress()`` method.
|
||||
|
||||
|
||||
Compression objects support the following methods:
|
||||
|
|
|
|||
|
|
@ -425,6 +425,36 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
|
|||
dco = zlib.decompressobj()
|
||||
self.assertEqual(dco.flush(), b"") # Returns nothing
|
||||
|
||||
def test_dictionary(self):
|
||||
h = HAMLET_SCENE
|
||||
# build a simulated dictionary out of the words in HAMLET
|
||||
words = h.split()
|
||||
random.shuffle(words)
|
||||
zdict = b''.join(words)
|
||||
# use it to compress HAMLET
|
||||
co = zlib.compressobj(zdict=zdict)
|
||||
cd = co.compress(h) + co.flush()
|
||||
# verify that it will decompress with the dictionary
|
||||
dco = zlib.decompressobj(zdict=zdict)
|
||||
self.assertEqual(dco.decompress(cd) + dco.flush(), h)
|
||||
# verify that it fails when not given the dictionary
|
||||
dco = zlib.decompressobj()
|
||||
self.assertRaises(zlib.error, dco.decompress, cd)
|
||||
|
||||
def test_dictionary_streaming(self):
|
||||
# this is simulating the needs of SPDY to be able to reuse the same
|
||||
# stream object (with its compression state) between sets of compressed
|
||||
# headers.
|
||||
co = zlib.compressobj(zdict=HAMLET_SCENE)
|
||||
do = zlib.decompressobj(zdict=HAMLET_SCENE)
|
||||
piece = HAMLET_SCENE[1000:1500]
|
||||
d0 = co.compress(piece) + co.flush(zlib.Z_SYNC_FLUSH)
|
||||
d1 = co.compress(piece[100:]) + co.flush(zlib.Z_SYNC_FLUSH)
|
||||
d2 = co.compress(piece[:-100]) + co.flush(zlib.Z_SYNC_FLUSH)
|
||||
self.assertEqual(do.decompress(d0), piece)
|
||||
self.assertEqual(do.decompress(d1), piece[100:])
|
||||
self.assertEqual(do.decompress(d2), piece[:-100])
|
||||
|
||||
def test_decompress_incomplete_stream(self):
|
||||
# This is 'foo', deflated
|
||||
x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E'
|
||||
|
|
|
|||
|
|
@ -34,6 +34,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #14684: zlib.compressobj() and zlib.decompressobj() now support the use
|
||||
of predefined compression dictionaries. Original patch by Sam Rushing.
|
||||
|
||||
- Fix GzipFile's handling of filenames given as bytes objects.
|
||||
|
||||
- Issue #14772: Return destination values from some shutil functions.
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ typedef struct
|
|||
PyObject *unconsumed_tail;
|
||||
char eof;
|
||||
int is_initialised;
|
||||
PyObject *zdict;
|
||||
#ifdef WITH_THREAD
|
||||
PyThread_type_lock lock;
|
||||
#endif
|
||||
|
|
@ -80,14 +81,21 @@ zlib_error(z_stream zst, int err, char *msg)
|
|||
}
|
||||
|
||||
PyDoc_STRVAR(compressobj__doc__,
|
||||
"compressobj([level]) -- Return a compressor object.\n"
|
||||
"compressobj([level[, method[, wbits[, memlevel[, strategy[, zdict]]]]]])\n"
|
||||
" -- Return a compressor object.\n"
|
||||
"\n"
|
||||
"Optional arg level is the compression level, in 1-9.");
|
||||
"Optional arg level is the compression level, in 1-9.\n"
|
||||
"\n"
|
||||
"Optional arg zdict is the predefined compression dictionary - a sequence of\n"
|
||||
"bytes containing subsequences that are likely to occur in the input data.");
|
||||
|
||||
PyDoc_STRVAR(decompressobj__doc__,
|
||||
"decompressobj([wbits]) -- Return a decompressor object.\n"
|
||||
"decompressobj([wbits[, zdict]]) -- Return a decompressor object.\n"
|
||||
"\n"
|
||||
"Optional arg wbits is the window buffer size.");
|
||||
"Optional arg wbits is the window buffer size.\n"
|
||||
"\n"
|
||||
"Optional arg zdict is the predefined compression dictionary. This must be\n"
|
||||
"the same dictionary as used by the compressor that produced the input data.");
|
||||
|
||||
static compobject *
|
||||
newcompobject(PyTypeObject *type)
|
||||
|
|
@ -98,6 +106,7 @@ newcompobject(PyTypeObject *type)
|
|||
return NULL;
|
||||
self->eof = 0;
|
||||
self->is_initialised = 0;
|
||||
self->zdict = NULL;
|
||||
self->unused_data = PyBytes_FromStringAndSize("", 0);
|
||||
if (self->unused_data == NULL) {
|
||||
Py_DECREF(self);
|
||||
|
|
@ -316,19 +325,24 @@ PyZlib_decompress(PyObject *self, PyObject *args)
|
|||
}
|
||||
|
||||
static PyObject *
|
||||
PyZlib_compressobj(PyObject *selfptr, PyObject *args)
|
||||
PyZlib_compressobj(PyObject *selfptr, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
compobject *self;
|
||||
int level=Z_DEFAULT_COMPRESSION, method=DEFLATED;
|
||||
int wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL, strategy=0, err;
|
||||
Py_buffer zdict;
|
||||
static char *kwlist[] = {"level", "method", "wbits",
|
||||
"memLevel", "strategy", "zdict", NULL};
|
||||
|
||||
if (!PyArg_ParseTuple(args, "|iiiii:compressobj", &level, &method, &wbits,
|
||||
&memLevel, &strategy))
|
||||
zdict.buf = NULL; /* Sentinel, so we can tell whether zdict was supplied. */
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iiiiiy*:compressobj",
|
||||
kwlist, &level, &method, &wbits,
|
||||
&memLevel, &strategy, &zdict))
|
||||
return NULL;
|
||||
|
||||
self = newcompobject(&Comptype);
|
||||
if (self==NULL)
|
||||
return(NULL);
|
||||
goto error;
|
||||
self->zst.zalloc = (alloc_func)NULL;
|
||||
self->zst.zfree = (free_func)Z_NULL;
|
||||
self->zst.next_in = NULL;
|
||||
|
|
@ -337,30 +351,58 @@ PyZlib_compressobj(PyObject *selfptr, PyObject *args)
|
|||
switch(err) {
|
||||
case (Z_OK):
|
||||
self->is_initialised = 1;
|
||||
return (PyObject*)self;
|
||||
if (zdict.buf == NULL) {
|
||||
goto success;
|
||||
} else {
|
||||
err = deflateSetDictionary(&self->zst, zdict.buf, zdict.len);
|
||||
switch (err) {
|
||||
case (Z_OK):
|
||||
goto success;
|
||||
case (Z_STREAM_ERROR):
|
||||
PyErr_SetString(PyExc_ValueError, "Invalid dictionary");
|
||||
goto error;
|
||||
default:
|
||||
PyErr_SetString(PyExc_ValueError, "deflateSetDictionary()");
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
case (Z_MEM_ERROR):
|
||||
Py_DECREF(self);
|
||||
PyErr_SetString(PyExc_MemoryError,
|
||||
"Can't allocate memory for compression object");
|
||||
return NULL;
|
||||
goto error;
|
||||
case(Z_STREAM_ERROR):
|
||||
Py_DECREF(self);
|
||||
PyErr_SetString(PyExc_ValueError, "Invalid initialization option");
|
||||
return NULL;
|
||||
goto error;
|
||||
default:
|
||||
zlib_error(self->zst, err, "while creating compression object");
|
||||
Py_DECREF(self);
|
||||
return NULL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
error:
|
||||
Py_XDECREF(self);
|
||||
self = NULL;
|
||||
success:
|
||||
if (zdict.buf != NULL)
|
||||
PyBuffer_Release(&zdict);
|
||||
return (PyObject*)self;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
PyZlib_decompressobj(PyObject *selfptr, PyObject *args)
|
||||
PyZlib_decompressobj(PyObject *selfptr, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
static char *kwlist[] = {"wbits", "zdict", NULL};
|
||||
int wbits=DEF_WBITS, err;
|
||||
compobject *self;
|
||||
if (!PyArg_ParseTuple(args, "|i:decompressobj", &wbits))
|
||||
PyObject *zdict=NULL;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO:decompressobj",
|
||||
kwlist, &wbits, &zdict))
|
||||
return NULL;
|
||||
if (zdict != NULL && !PyObject_CheckBuffer(zdict)) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"zdict argument must support the buffer protocol");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
self = newcompobject(&Decomptype);
|
||||
if (self == NULL)
|
||||
|
|
@ -369,6 +411,10 @@ PyZlib_decompressobj(PyObject *selfptr, PyObject *args)
|
|||
self->zst.zfree = (free_func)Z_NULL;
|
||||
self->zst.next_in = NULL;
|
||||
self->zst.avail_in = 0;
|
||||
if (zdict != NULL) {
|
||||
Py_INCREF(zdict);
|
||||
self->zdict = zdict;
|
||||
}
|
||||
err = inflateInit2(&self->zst, wbits);
|
||||
switch(err) {
|
||||
case (Z_OK):
|
||||
|
|
@ -398,6 +444,7 @@ Dealloc(compobject *self)
|
|||
#endif
|
||||
Py_XDECREF(self->unused_data);
|
||||
Py_XDECREF(self->unconsumed_tail);
|
||||
Py_XDECREF(self->zdict);
|
||||
PyObject_Del(self);
|
||||
}
|
||||
|
||||
|
|
@ -557,6 +604,27 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
|
|||
err = inflate(&(self->zst), Z_SYNC_FLUSH);
|
||||
Py_END_ALLOW_THREADS
|
||||
|
||||
if (err == Z_NEED_DICT && self->zdict != NULL) {
|
||||
Py_buffer zdict_buf;
|
||||
if (PyObject_GetBuffer(self->zdict, &zdict_buf, PyBUF_SIMPLE) == -1) {
|
||||
Py_DECREF(RetVal);
|
||||
RetVal = NULL;
|
||||
goto error;
|
||||
}
|
||||
err = inflateSetDictionary(&(self->zst), zdict_buf.buf, zdict_buf.len);
|
||||
PyBuffer_Release(&zdict_buf);
|
||||
if (err != Z_OK) {
|
||||
zlib_error(self->zst, err, "while decompressing data");
|
||||
Py_DECREF(RetVal);
|
||||
RetVal = NULL;
|
||||
goto error;
|
||||
}
|
||||
/* repeat the call to inflate! */
|
||||
Py_BEGIN_ALLOW_THREADS
|
||||
err = inflate(&(self->zst), Z_SYNC_FLUSH);
|
||||
Py_END_ALLOW_THREADS
|
||||
}
|
||||
|
||||
/* While Z_OK and the output buffer is full, there might be more output.
|
||||
So extend the output buffer and try again.
|
||||
*/
|
||||
|
|
@ -770,10 +838,13 @@ PyZlib_copy(compobject *self)
|
|||
}
|
||||
Py_INCREF(self->unused_data);
|
||||
Py_INCREF(self->unconsumed_tail);
|
||||
Py_XINCREF(self->zdict);
|
||||
Py_XDECREF(retval->unused_data);
|
||||
Py_XDECREF(retval->unconsumed_tail);
|
||||
Py_XDECREF(retval->zdict);
|
||||
retval->unused_data = self->unused_data;
|
||||
retval->unconsumed_tail = self->unconsumed_tail;
|
||||
retval->zdict = self->zdict;
|
||||
retval->eof = self->eof;
|
||||
|
||||
/* Mark it as being initialized */
|
||||
|
|
@ -822,10 +893,13 @@ PyZlib_uncopy(compobject *self)
|
|||
|
||||
Py_INCREF(self->unused_data);
|
||||
Py_INCREF(self->unconsumed_tail);
|
||||
Py_XINCREF(self->zdict);
|
||||
Py_XDECREF(retval->unused_data);
|
||||
Py_XDECREF(retval->unconsumed_tail);
|
||||
Py_XDECREF(retval->zdict);
|
||||
retval->unused_data = self->unused_data;
|
||||
retval->unconsumed_tail = self->unconsumed_tail;
|
||||
retval->zdict = self->zdict;
|
||||
retval->eof = self->eof;
|
||||
|
||||
/* Mark it as being initialized */
|
||||
|
|
@ -1032,13 +1106,13 @@ static PyMethodDef zlib_methods[] =
|
|||
adler32__doc__},
|
||||
{"compress", (PyCFunction)PyZlib_compress, METH_VARARGS,
|
||||
compress__doc__},
|
||||
{"compressobj", (PyCFunction)PyZlib_compressobj, METH_VARARGS,
|
||||
{"compressobj", (PyCFunction)PyZlib_compressobj, METH_VARARGS|METH_KEYWORDS,
|
||||
compressobj__doc__},
|
||||
{"crc32", (PyCFunction)PyZlib_crc32, METH_VARARGS,
|
||||
crc32__doc__},
|
||||
{"decompress", (PyCFunction)PyZlib_decompress, METH_VARARGS,
|
||||
decompress__doc__},
|
||||
{"decompressobj", (PyCFunction)PyZlib_decompressobj, METH_VARARGS,
|
||||
{"decompressobj", (PyCFunction)PyZlib_decompressobj, METH_VARARGS|METH_KEYWORDS,
|
||||
decompressobj__doc__},
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
|
@ -1112,10 +1186,10 @@ PyDoc_STRVAR(zlib_module_documentation,
|
|||
"\n"
|
||||
"adler32(string[, start]) -- Compute an Adler-32 checksum.\n"
|
||||
"compress(string[, level]) -- Compress string, with compression level in 1-9.\n"
|
||||
"compressobj([level]) -- Return a compressor object.\n"
|
||||
"compressobj([level[, ...]]) -- Return a compressor object.\n"
|
||||
"crc32(string[, start]) -- Compute a CRC-32 checksum.\n"
|
||||
"decompress(string,[wbits],[bufsize]) -- Decompresses a compressed string.\n"
|
||||
"decompressobj([wbits]) -- Return a decompressor object.\n"
|
||||
"decompressobj([wbits[, zdict]]]) -- Return a decompressor object.\n"
|
||||
"\n"
|
||||
"'wbits' is window buffer size.\n"
|
||||
"Compressor objects support compress() and flush() methods; decompressor\n"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue