Issue #14684: Add support for predefined compression dictionaries to the zlib module.

Original patch by Sam Rushing.
This commit is contained in:
Nadeem Vawda 2012-06-21 02:13:12 +02:00
parent 50b0a365ba
commit fd8a838d58
4 changed files with 152 additions and 28 deletions

View file

@ -45,6 +45,7 @@ typedef struct
PyObject *unconsumed_tail;
char eof;
int is_initialised;
PyObject *zdict;
#ifdef WITH_THREAD
PyThread_type_lock lock;
#endif
@ -80,14 +81,21 @@ zlib_error(z_stream zst, int err, char *msg)
}
PyDoc_STRVAR(compressobj__doc__,
"compressobj([level]) -- Return a compressor object.\n"
"compressobj([level[, method[, wbits[, memlevel[, strategy[, zdict]]]]]])\n"
" -- Return a compressor object.\n"
"\n"
"Optional arg level is the compression level, in 1-9.");
"Optional arg level is the compression level, in 1-9.\n"
"\n"
"Optional arg zdict is the predefined compression dictionary - a sequence of\n"
"bytes containing subsequences that are likely to occur in the input data.");
PyDoc_STRVAR(decompressobj__doc__,
"decompressobj([wbits]) -- Return a decompressor object.\n"
"decompressobj([wbits[, zdict]]) -- Return a decompressor object.\n"
"\n"
"Optional arg wbits is the window buffer size.");
"Optional arg wbits is the window buffer size.\n"
"\n"
"Optional arg zdict is the predefined compression dictionary. This must be\n"
"the same dictionary as used by the compressor that produced the input data.");
static compobject *
newcompobject(PyTypeObject *type)
@ -98,6 +106,7 @@ newcompobject(PyTypeObject *type)
return NULL;
self->eof = 0;
self->is_initialised = 0;
self->zdict = NULL;
self->unused_data = PyBytes_FromStringAndSize("", 0);
if (self->unused_data == NULL) {
Py_DECREF(self);
@ -316,19 +325,24 @@ PyZlib_decompress(PyObject *self, PyObject *args)
}
static PyObject *
PyZlib_compressobj(PyObject *selfptr, PyObject *args)
PyZlib_compressobj(PyObject *selfptr, PyObject *args, PyObject *kwargs)
{
compobject *self;
int level=Z_DEFAULT_COMPRESSION, method=DEFLATED;
int wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL, strategy=0, err;
Py_buffer zdict;
static char *kwlist[] = {"level", "method", "wbits",
"memLevel", "strategy", "zdict", NULL};
if (!PyArg_ParseTuple(args, "|iiiii:compressobj", &level, &method, &wbits,
&memLevel, &strategy))
zdict.buf = NULL; /* Sentinel, so we can tell whether zdict was supplied. */
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iiiiiy*:compressobj",
kwlist, &level, &method, &wbits,
&memLevel, &strategy, &zdict))
return NULL;
self = newcompobject(&Comptype);
if (self==NULL)
return(NULL);
goto error;
self->zst.zalloc = (alloc_func)NULL;
self->zst.zfree = (free_func)Z_NULL;
self->zst.next_in = NULL;
@ -337,30 +351,58 @@ PyZlib_compressobj(PyObject *selfptr, PyObject *args)
switch(err) {
case (Z_OK):
self->is_initialised = 1;
return (PyObject*)self;
if (zdict.buf == NULL) {
goto success;
} else {
err = deflateSetDictionary(&self->zst, zdict.buf, zdict.len);
switch (err) {
case (Z_OK):
goto success;
case (Z_STREAM_ERROR):
PyErr_SetString(PyExc_ValueError, "Invalid dictionary");
goto error;
default:
PyErr_SetString(PyExc_ValueError, "deflateSetDictionary()");
goto error;
}
}
case (Z_MEM_ERROR):
Py_DECREF(self);
PyErr_SetString(PyExc_MemoryError,
"Can't allocate memory for compression object");
return NULL;
goto error;
case(Z_STREAM_ERROR):
Py_DECREF(self);
PyErr_SetString(PyExc_ValueError, "Invalid initialization option");
return NULL;
goto error;
default:
zlib_error(self->zst, err, "while creating compression object");
Py_DECREF(self);
return NULL;
goto error;
}
error:
Py_XDECREF(self);
self = NULL;
success:
if (zdict.buf != NULL)
PyBuffer_Release(&zdict);
return (PyObject*)self;
}
static PyObject *
PyZlib_decompressobj(PyObject *selfptr, PyObject *args)
PyZlib_decompressobj(PyObject *selfptr, PyObject *args, PyObject *kwargs)
{
static char *kwlist[] = {"wbits", "zdict", NULL};
int wbits=DEF_WBITS, err;
compobject *self;
if (!PyArg_ParseTuple(args, "|i:decompressobj", &wbits))
PyObject *zdict=NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO:decompressobj",
kwlist, &wbits, &zdict))
return NULL;
if (zdict != NULL && !PyObject_CheckBuffer(zdict)) {
PyErr_SetString(PyExc_TypeError,
"zdict argument must support the buffer protocol");
return NULL;
}
self = newcompobject(&Decomptype);
if (self == NULL)
@ -369,6 +411,10 @@ PyZlib_decompressobj(PyObject *selfptr, PyObject *args)
self->zst.zfree = (free_func)Z_NULL;
self->zst.next_in = NULL;
self->zst.avail_in = 0;
if (zdict != NULL) {
Py_INCREF(zdict);
self->zdict = zdict;
}
err = inflateInit2(&self->zst, wbits);
switch(err) {
case (Z_OK):
@ -398,6 +444,7 @@ Dealloc(compobject *self)
#endif
Py_XDECREF(self->unused_data);
Py_XDECREF(self->unconsumed_tail);
Py_XDECREF(self->zdict);
PyObject_Del(self);
}
@ -557,6 +604,27 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
err = inflate(&(self->zst), Z_SYNC_FLUSH);
Py_END_ALLOW_THREADS
if (err == Z_NEED_DICT && self->zdict != NULL) {
Py_buffer zdict_buf;
if (PyObject_GetBuffer(self->zdict, &zdict_buf, PyBUF_SIMPLE) == -1) {
Py_DECREF(RetVal);
RetVal = NULL;
goto error;
}
err = inflateSetDictionary(&(self->zst), zdict_buf.buf, zdict_buf.len);
PyBuffer_Release(&zdict_buf);
if (err != Z_OK) {
zlib_error(self->zst, err, "while decompressing data");
Py_DECREF(RetVal);
RetVal = NULL;
goto error;
}
/* repeat the call to inflate! */
Py_BEGIN_ALLOW_THREADS
err = inflate(&(self->zst), Z_SYNC_FLUSH);
Py_END_ALLOW_THREADS
}
/* While Z_OK and the output buffer is full, there might be more output.
So extend the output buffer and try again.
*/
@ -770,10 +838,13 @@ PyZlib_copy(compobject *self)
}
Py_INCREF(self->unused_data);
Py_INCREF(self->unconsumed_tail);
Py_XINCREF(self->zdict);
Py_XDECREF(retval->unused_data);
Py_XDECREF(retval->unconsumed_tail);
Py_XDECREF(retval->zdict);
retval->unused_data = self->unused_data;
retval->unconsumed_tail = self->unconsumed_tail;
retval->zdict = self->zdict;
retval->eof = self->eof;
/* Mark it as being initialized */
@ -822,10 +893,13 @@ PyZlib_uncopy(compobject *self)
Py_INCREF(self->unused_data);
Py_INCREF(self->unconsumed_tail);
Py_XINCREF(self->zdict);
Py_XDECREF(retval->unused_data);
Py_XDECREF(retval->unconsumed_tail);
Py_XDECREF(retval->zdict);
retval->unused_data = self->unused_data;
retval->unconsumed_tail = self->unconsumed_tail;
retval->zdict = self->zdict;
retval->eof = self->eof;
/* Mark it as being initialized */
@ -1032,13 +1106,13 @@ static PyMethodDef zlib_methods[] =
adler32__doc__},
{"compress", (PyCFunction)PyZlib_compress, METH_VARARGS,
compress__doc__},
{"compressobj", (PyCFunction)PyZlib_compressobj, METH_VARARGS,
{"compressobj", (PyCFunction)PyZlib_compressobj, METH_VARARGS|METH_KEYWORDS,
compressobj__doc__},
{"crc32", (PyCFunction)PyZlib_crc32, METH_VARARGS,
crc32__doc__},
{"decompress", (PyCFunction)PyZlib_decompress, METH_VARARGS,
decompress__doc__},
{"decompressobj", (PyCFunction)PyZlib_decompressobj, METH_VARARGS,
{"decompressobj", (PyCFunction)PyZlib_decompressobj, METH_VARARGS|METH_KEYWORDS,
decompressobj__doc__},
{NULL, NULL}
};
@ -1112,10 +1186,10 @@ PyDoc_STRVAR(zlib_module_documentation,
"\n"
"adler32(string[, start]) -- Compute an Adler-32 checksum.\n"
"compress(string[, level]) -- Compress string, with compression level in 1-9.\n"
"compressobj([level]) -- Return a compressor object.\n"
"compressobj([level[, ...]]) -- Return a compressor object.\n"
"crc32(string[, start]) -- Compute a CRC-32 checksum.\n"
"decompress(string,[wbits],[bufsize]) -- Decompresses a compressed string.\n"
"decompressobj([wbits]) -- Return a decompressor object.\n"
"decompressobj([wbits[, zdict]]]) -- Return a decompressor object.\n"
"\n"
"'wbits' is window buffer size.\n"
"Compressor objects support compress() and flush() methods; decompressor\n"