mirror of
https://github.com/python/cpython.git
synced 2025-12-05 00:52:25 +00:00
Issue #14684: Add support for predefined compression dictionaries to the zlib module.
Original patch by Sam Rushing.
This commit is contained in:
parent
50b0a365ba
commit
fd8a838d58
4 changed files with 152 additions and 28 deletions
|
|
@ -58,12 +58,19 @@ The available exception and functions in this module are:
|
||||||
exception if any error occurs.
|
exception if any error occurs.
|
||||||
|
|
||||||
|
|
||||||
.. function:: compressobj([level])
|
.. function:: compressobj([level[, method[, wbits[, memlevel[, strategy[, zdict]]]]]])
|
||||||
|
|
||||||
Returns a compression object, to be used for compressing data streams that won't
|
Returns a compression object, to be used for compressing data streams that won't
|
||||||
fit into memory at once. *level* is an integer from ``1`` to ``9`` controlling
|
fit into memory at once.
|
||||||
the level of compression; ``1`` is fastest and produces the least compression,
|
|
||||||
``9`` is slowest and produces the most. The default value is ``6``.
|
*level* is an integer from ``1`` to ``9`` controlling the level of
|
||||||
|
compression; ``1`` is fastest and produces the least compression, ``9`` is
|
||||||
|
slowest and produces the most. The default value is ``6``.
|
||||||
|
|
||||||
|
*zdict* is a predefined compression dictionary. This is a sequence of bytes
|
||||||
|
(such as a :class:`bytes` object) containing subsequences that are expected
|
||||||
|
to occur frequently in the data that is to be compressed. Those subsequences
|
||||||
|
that are expected to be most common should come at the end of the dictionary.
|
||||||
|
|
||||||
|
|
||||||
.. function:: crc32(data[, value])
|
.. function:: crc32(data[, value])
|
||||||
|
|
@ -114,11 +121,21 @@ The available exception and functions in this module are:
|
||||||
to :c:func:`malloc`. The default size is 16384.
|
to :c:func:`malloc`. The default size is 16384.
|
||||||
|
|
||||||
|
|
||||||
.. function:: decompressobj([wbits])
|
.. function:: decompressobj([wbits[, zdict]])
|
||||||
|
|
||||||
Returns a decompression object, to be used for decompressing data streams that
|
Returns a decompression object, to be used for decompressing data streams that
|
||||||
won't fit into memory at once. The *wbits* parameter controls the size of the
|
won't fit into memory at once.
|
||||||
window buffer.
|
|
||||||
|
The *wbits* parameter controls the size of the window buffer.
|
||||||
|
|
||||||
|
The *zdict* parameter specifies a predefined compression dictionary. If
|
||||||
|
provided, this must be the same dictionary as was used by the compressor that
|
||||||
|
produced the data that is to be decompressed.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
If *zdict* is a mutable object (such as a :class:`bytearray`), you must not
|
||||||
|
modify its contents between the call to :func:`decompressobj` and the first
|
||||||
|
call to the decompressor's ``decompress()`` method.
|
||||||
|
|
||||||
|
|
||||||
Compression objects support the following methods:
|
Compression objects support the following methods:
|
||||||
|
|
|
||||||
|
|
@ -425,6 +425,36 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
|
||||||
dco = zlib.decompressobj()
|
dco = zlib.decompressobj()
|
||||||
self.assertEqual(dco.flush(), b"") # Returns nothing
|
self.assertEqual(dco.flush(), b"") # Returns nothing
|
||||||
|
|
||||||
|
def test_dictionary(self):
|
||||||
|
h = HAMLET_SCENE
|
||||||
|
# build a simulated dictionary out of the words in HAMLET
|
||||||
|
words = h.split()
|
||||||
|
random.shuffle(words)
|
||||||
|
zdict = b''.join(words)
|
||||||
|
# use it to compress HAMLET
|
||||||
|
co = zlib.compressobj(zdict=zdict)
|
||||||
|
cd = co.compress(h) + co.flush()
|
||||||
|
# verify that it will decompress with the dictionary
|
||||||
|
dco = zlib.decompressobj(zdict=zdict)
|
||||||
|
self.assertEqual(dco.decompress(cd) + dco.flush(), h)
|
||||||
|
# verify that it fails when not given the dictionary
|
||||||
|
dco = zlib.decompressobj()
|
||||||
|
self.assertRaises(zlib.error, dco.decompress, cd)
|
||||||
|
|
||||||
|
def test_dictionary_streaming(self):
|
||||||
|
# this is simulating the needs of SPDY to be able to reuse the same
|
||||||
|
# stream object (with its compression state) between sets of compressed
|
||||||
|
# headers.
|
||||||
|
co = zlib.compressobj(zdict=HAMLET_SCENE)
|
||||||
|
do = zlib.decompressobj(zdict=HAMLET_SCENE)
|
||||||
|
piece = HAMLET_SCENE[1000:1500]
|
||||||
|
d0 = co.compress(piece) + co.flush(zlib.Z_SYNC_FLUSH)
|
||||||
|
d1 = co.compress(piece[100:]) + co.flush(zlib.Z_SYNC_FLUSH)
|
||||||
|
d2 = co.compress(piece[:-100]) + co.flush(zlib.Z_SYNC_FLUSH)
|
||||||
|
self.assertEqual(do.decompress(d0), piece)
|
||||||
|
self.assertEqual(do.decompress(d1), piece[100:])
|
||||||
|
self.assertEqual(do.decompress(d2), piece[:-100])
|
||||||
|
|
||||||
def test_decompress_incomplete_stream(self):
|
def test_decompress_incomplete_stream(self):
|
||||||
# This is 'foo', deflated
|
# This is 'foo', deflated
|
||||||
x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E'
|
x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E'
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #14684: zlib.compressobj() and zlib.decompressobj() now support the use
|
||||||
|
of predefined compression dictionaries. Original patch by Sam Rushing.
|
||||||
|
|
||||||
- Fix GzipFile's handling of filenames given as bytes objects.
|
- Fix GzipFile's handling of filenames given as bytes objects.
|
||||||
|
|
||||||
- Issue #14772: Return destination values from some shutil functions.
|
- Issue #14772: Return destination values from some shutil functions.
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,7 @@ typedef struct
|
||||||
PyObject *unconsumed_tail;
|
PyObject *unconsumed_tail;
|
||||||
char eof;
|
char eof;
|
||||||
int is_initialised;
|
int is_initialised;
|
||||||
|
PyObject *zdict;
|
||||||
#ifdef WITH_THREAD
|
#ifdef WITH_THREAD
|
||||||
PyThread_type_lock lock;
|
PyThread_type_lock lock;
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -80,14 +81,21 @@ zlib_error(z_stream zst, int err, char *msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(compressobj__doc__,
|
PyDoc_STRVAR(compressobj__doc__,
|
||||||
"compressobj([level]) -- Return a compressor object.\n"
|
"compressobj([level[, method[, wbits[, memlevel[, strategy[, zdict]]]]]])\n"
|
||||||
|
" -- Return a compressor object.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"Optional arg level is the compression level, in 1-9.");
|
"Optional arg level is the compression level, in 1-9.\n"
|
||||||
|
"\n"
|
||||||
|
"Optional arg zdict is the predefined compression dictionary - a sequence of\n"
|
||||||
|
"bytes containing subsequences that are likely to occur in the input data.");
|
||||||
|
|
||||||
PyDoc_STRVAR(decompressobj__doc__,
|
PyDoc_STRVAR(decompressobj__doc__,
|
||||||
"decompressobj([wbits]) -- Return a decompressor object.\n"
|
"decompressobj([wbits[, zdict]]) -- Return a decompressor object.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"Optional arg wbits is the window buffer size.");
|
"Optional arg wbits is the window buffer size.\n"
|
||||||
|
"\n"
|
||||||
|
"Optional arg zdict is the predefined compression dictionary. This must be\n"
|
||||||
|
"the same dictionary as used by the compressor that produced the input data.");
|
||||||
|
|
||||||
static compobject *
|
static compobject *
|
||||||
newcompobject(PyTypeObject *type)
|
newcompobject(PyTypeObject *type)
|
||||||
|
|
@ -98,6 +106,7 @@ newcompobject(PyTypeObject *type)
|
||||||
return NULL;
|
return NULL;
|
||||||
self->eof = 0;
|
self->eof = 0;
|
||||||
self->is_initialised = 0;
|
self->is_initialised = 0;
|
||||||
|
self->zdict = NULL;
|
||||||
self->unused_data = PyBytes_FromStringAndSize("", 0);
|
self->unused_data = PyBytes_FromStringAndSize("", 0);
|
||||||
if (self->unused_data == NULL) {
|
if (self->unused_data == NULL) {
|
||||||
Py_DECREF(self);
|
Py_DECREF(self);
|
||||||
|
|
@ -316,19 +325,24 @@ PyZlib_decompress(PyObject *self, PyObject *args)
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
PyZlib_compressobj(PyObject *selfptr, PyObject *args)
|
PyZlib_compressobj(PyObject *selfptr, PyObject *args, PyObject *kwargs)
|
||||||
{
|
{
|
||||||
compobject *self;
|
compobject *self;
|
||||||
int level=Z_DEFAULT_COMPRESSION, method=DEFLATED;
|
int level=Z_DEFAULT_COMPRESSION, method=DEFLATED;
|
||||||
int wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL, strategy=0, err;
|
int wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL, strategy=0, err;
|
||||||
|
Py_buffer zdict;
|
||||||
|
static char *kwlist[] = {"level", "method", "wbits",
|
||||||
|
"memLevel", "strategy", "zdict", NULL};
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "|iiiii:compressobj", &level, &method, &wbits,
|
zdict.buf = NULL; /* Sentinel, so we can tell whether zdict was supplied. */
|
||||||
&memLevel, &strategy))
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iiiiiy*:compressobj",
|
||||||
|
kwlist, &level, &method, &wbits,
|
||||||
|
&memLevel, &strategy, &zdict))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
self = newcompobject(&Comptype);
|
self = newcompobject(&Comptype);
|
||||||
if (self==NULL)
|
if (self==NULL)
|
||||||
return(NULL);
|
goto error;
|
||||||
self->zst.zalloc = (alloc_func)NULL;
|
self->zst.zalloc = (alloc_func)NULL;
|
||||||
self->zst.zfree = (free_func)Z_NULL;
|
self->zst.zfree = (free_func)Z_NULL;
|
||||||
self->zst.next_in = NULL;
|
self->zst.next_in = NULL;
|
||||||
|
|
@ -337,30 +351,58 @@ PyZlib_compressobj(PyObject *selfptr, PyObject *args)
|
||||||
switch(err) {
|
switch(err) {
|
||||||
case (Z_OK):
|
case (Z_OK):
|
||||||
self->is_initialised = 1;
|
self->is_initialised = 1;
|
||||||
return (PyObject*)self;
|
if (zdict.buf == NULL) {
|
||||||
|
goto success;
|
||||||
|
} else {
|
||||||
|
err = deflateSetDictionary(&self->zst, zdict.buf, zdict.len);
|
||||||
|
switch (err) {
|
||||||
|
case (Z_OK):
|
||||||
|
goto success;
|
||||||
|
case (Z_STREAM_ERROR):
|
||||||
|
PyErr_SetString(PyExc_ValueError, "Invalid dictionary");
|
||||||
|
goto error;
|
||||||
|
default:
|
||||||
|
PyErr_SetString(PyExc_ValueError, "deflateSetDictionary()");
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
}
|
||||||
case (Z_MEM_ERROR):
|
case (Z_MEM_ERROR):
|
||||||
Py_DECREF(self);
|
|
||||||
PyErr_SetString(PyExc_MemoryError,
|
PyErr_SetString(PyExc_MemoryError,
|
||||||
"Can't allocate memory for compression object");
|
"Can't allocate memory for compression object");
|
||||||
return NULL;
|
goto error;
|
||||||
case(Z_STREAM_ERROR):
|
case(Z_STREAM_ERROR):
|
||||||
Py_DECREF(self);
|
|
||||||
PyErr_SetString(PyExc_ValueError, "Invalid initialization option");
|
PyErr_SetString(PyExc_ValueError, "Invalid initialization option");
|
||||||
return NULL;
|
goto error;
|
||||||
default:
|
default:
|
||||||
zlib_error(self->zst, err, "while creating compression object");
|
zlib_error(self->zst, err, "while creating compression object");
|
||||||
Py_DECREF(self);
|
goto error;
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
error:
|
||||||
|
Py_XDECREF(self);
|
||||||
|
self = NULL;
|
||||||
|
success:
|
||||||
|
if (zdict.buf != NULL)
|
||||||
|
PyBuffer_Release(&zdict);
|
||||||
|
return (PyObject*)self;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
PyZlib_decompressobj(PyObject *selfptr, PyObject *args)
|
PyZlib_decompressobj(PyObject *selfptr, PyObject *args, PyObject *kwargs)
|
||||||
{
|
{
|
||||||
|
static char *kwlist[] = {"wbits", "zdict", NULL};
|
||||||
int wbits=DEF_WBITS, err;
|
int wbits=DEF_WBITS, err;
|
||||||
compobject *self;
|
compobject *self;
|
||||||
if (!PyArg_ParseTuple(args, "|i:decompressobj", &wbits))
|
PyObject *zdict=NULL;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO:decompressobj",
|
||||||
|
kwlist, &wbits, &zdict))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
if (zdict != NULL && !PyObject_CheckBuffer(zdict)) {
|
||||||
|
PyErr_SetString(PyExc_TypeError,
|
||||||
|
"zdict argument must support the buffer protocol");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
self = newcompobject(&Decomptype);
|
self = newcompobject(&Decomptype);
|
||||||
if (self == NULL)
|
if (self == NULL)
|
||||||
|
|
@ -369,6 +411,10 @@ PyZlib_decompressobj(PyObject *selfptr, PyObject *args)
|
||||||
self->zst.zfree = (free_func)Z_NULL;
|
self->zst.zfree = (free_func)Z_NULL;
|
||||||
self->zst.next_in = NULL;
|
self->zst.next_in = NULL;
|
||||||
self->zst.avail_in = 0;
|
self->zst.avail_in = 0;
|
||||||
|
if (zdict != NULL) {
|
||||||
|
Py_INCREF(zdict);
|
||||||
|
self->zdict = zdict;
|
||||||
|
}
|
||||||
err = inflateInit2(&self->zst, wbits);
|
err = inflateInit2(&self->zst, wbits);
|
||||||
switch(err) {
|
switch(err) {
|
||||||
case (Z_OK):
|
case (Z_OK):
|
||||||
|
|
@ -398,6 +444,7 @@ Dealloc(compobject *self)
|
||||||
#endif
|
#endif
|
||||||
Py_XDECREF(self->unused_data);
|
Py_XDECREF(self->unused_data);
|
||||||
Py_XDECREF(self->unconsumed_tail);
|
Py_XDECREF(self->unconsumed_tail);
|
||||||
|
Py_XDECREF(self->zdict);
|
||||||
PyObject_Del(self);
|
PyObject_Del(self);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -557,6 +604,27 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
|
||||||
err = inflate(&(self->zst), Z_SYNC_FLUSH);
|
err = inflate(&(self->zst), Z_SYNC_FLUSH);
|
||||||
Py_END_ALLOW_THREADS
|
Py_END_ALLOW_THREADS
|
||||||
|
|
||||||
|
if (err == Z_NEED_DICT && self->zdict != NULL) {
|
||||||
|
Py_buffer zdict_buf;
|
||||||
|
if (PyObject_GetBuffer(self->zdict, &zdict_buf, PyBUF_SIMPLE) == -1) {
|
||||||
|
Py_DECREF(RetVal);
|
||||||
|
RetVal = NULL;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
err = inflateSetDictionary(&(self->zst), zdict_buf.buf, zdict_buf.len);
|
||||||
|
PyBuffer_Release(&zdict_buf);
|
||||||
|
if (err != Z_OK) {
|
||||||
|
zlib_error(self->zst, err, "while decompressing data");
|
||||||
|
Py_DECREF(RetVal);
|
||||||
|
RetVal = NULL;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
/* repeat the call to inflate! */
|
||||||
|
Py_BEGIN_ALLOW_THREADS
|
||||||
|
err = inflate(&(self->zst), Z_SYNC_FLUSH);
|
||||||
|
Py_END_ALLOW_THREADS
|
||||||
|
}
|
||||||
|
|
||||||
/* While Z_OK and the output buffer is full, there might be more output.
|
/* While Z_OK and the output buffer is full, there might be more output.
|
||||||
So extend the output buffer and try again.
|
So extend the output buffer and try again.
|
||||||
*/
|
*/
|
||||||
|
|
@ -770,10 +838,13 @@ PyZlib_copy(compobject *self)
|
||||||
}
|
}
|
||||||
Py_INCREF(self->unused_data);
|
Py_INCREF(self->unused_data);
|
||||||
Py_INCREF(self->unconsumed_tail);
|
Py_INCREF(self->unconsumed_tail);
|
||||||
|
Py_XINCREF(self->zdict);
|
||||||
Py_XDECREF(retval->unused_data);
|
Py_XDECREF(retval->unused_data);
|
||||||
Py_XDECREF(retval->unconsumed_tail);
|
Py_XDECREF(retval->unconsumed_tail);
|
||||||
|
Py_XDECREF(retval->zdict);
|
||||||
retval->unused_data = self->unused_data;
|
retval->unused_data = self->unused_data;
|
||||||
retval->unconsumed_tail = self->unconsumed_tail;
|
retval->unconsumed_tail = self->unconsumed_tail;
|
||||||
|
retval->zdict = self->zdict;
|
||||||
retval->eof = self->eof;
|
retval->eof = self->eof;
|
||||||
|
|
||||||
/* Mark it as being initialized */
|
/* Mark it as being initialized */
|
||||||
|
|
@ -822,10 +893,13 @@ PyZlib_uncopy(compobject *self)
|
||||||
|
|
||||||
Py_INCREF(self->unused_data);
|
Py_INCREF(self->unused_data);
|
||||||
Py_INCREF(self->unconsumed_tail);
|
Py_INCREF(self->unconsumed_tail);
|
||||||
|
Py_XINCREF(self->zdict);
|
||||||
Py_XDECREF(retval->unused_data);
|
Py_XDECREF(retval->unused_data);
|
||||||
Py_XDECREF(retval->unconsumed_tail);
|
Py_XDECREF(retval->unconsumed_tail);
|
||||||
|
Py_XDECREF(retval->zdict);
|
||||||
retval->unused_data = self->unused_data;
|
retval->unused_data = self->unused_data;
|
||||||
retval->unconsumed_tail = self->unconsumed_tail;
|
retval->unconsumed_tail = self->unconsumed_tail;
|
||||||
|
retval->zdict = self->zdict;
|
||||||
retval->eof = self->eof;
|
retval->eof = self->eof;
|
||||||
|
|
||||||
/* Mark it as being initialized */
|
/* Mark it as being initialized */
|
||||||
|
|
@ -1032,13 +1106,13 @@ static PyMethodDef zlib_methods[] =
|
||||||
adler32__doc__},
|
adler32__doc__},
|
||||||
{"compress", (PyCFunction)PyZlib_compress, METH_VARARGS,
|
{"compress", (PyCFunction)PyZlib_compress, METH_VARARGS,
|
||||||
compress__doc__},
|
compress__doc__},
|
||||||
{"compressobj", (PyCFunction)PyZlib_compressobj, METH_VARARGS,
|
{"compressobj", (PyCFunction)PyZlib_compressobj, METH_VARARGS|METH_KEYWORDS,
|
||||||
compressobj__doc__},
|
compressobj__doc__},
|
||||||
{"crc32", (PyCFunction)PyZlib_crc32, METH_VARARGS,
|
{"crc32", (PyCFunction)PyZlib_crc32, METH_VARARGS,
|
||||||
crc32__doc__},
|
crc32__doc__},
|
||||||
{"decompress", (PyCFunction)PyZlib_decompress, METH_VARARGS,
|
{"decompress", (PyCFunction)PyZlib_decompress, METH_VARARGS,
|
||||||
decompress__doc__},
|
decompress__doc__},
|
||||||
{"decompressobj", (PyCFunction)PyZlib_decompressobj, METH_VARARGS,
|
{"decompressobj", (PyCFunction)PyZlib_decompressobj, METH_VARARGS|METH_KEYWORDS,
|
||||||
decompressobj__doc__},
|
decompressobj__doc__},
|
||||||
{NULL, NULL}
|
{NULL, NULL}
|
||||||
};
|
};
|
||||||
|
|
@ -1112,10 +1186,10 @@ PyDoc_STRVAR(zlib_module_documentation,
|
||||||
"\n"
|
"\n"
|
||||||
"adler32(string[, start]) -- Compute an Adler-32 checksum.\n"
|
"adler32(string[, start]) -- Compute an Adler-32 checksum.\n"
|
||||||
"compress(string[, level]) -- Compress string, with compression level in 1-9.\n"
|
"compress(string[, level]) -- Compress string, with compression level in 1-9.\n"
|
||||||
"compressobj([level]) -- Return a compressor object.\n"
|
"compressobj([level[, ...]]) -- Return a compressor object.\n"
|
||||||
"crc32(string[, start]) -- Compute a CRC-32 checksum.\n"
|
"crc32(string[, start]) -- Compute a CRC-32 checksum.\n"
|
||||||
"decompress(string,[wbits],[bufsize]) -- Decompresses a compressed string.\n"
|
"decompress(string,[wbits],[bufsize]) -- Decompresses a compressed string.\n"
|
||||||
"decompressobj([wbits]) -- Return a decompressor object.\n"
|
"decompressobj([wbits[, zdict]]]) -- Return a decompressor object.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"'wbits' is window buffer size.\n"
|
"'wbits' is window buffer size.\n"
|
||||||
"Compressor objects support compress() and flush() methods; decompressor\n"
|
"Compressor objects support compress() and flush() methods; decompressor\n"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue