mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00

* Add _zstd module for https://peps.python.org/pep-0784/ This commit introduces the `_zstd` module, with bindings to libzstd from the pyzstd project. It also includes the unix build system configuration. Windows build system support will be integrated independently as it depends on integration with cpython-source-deps. * Add _zstd to modules * Fix path for compression.zstd module * Ignore _zstd module like _io * Expand module state macros to improve code quality Also removes module state references from the classes in the _zstd module and instead uses PyType_GetModuleState() * Remove backticks suggested in review Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> * Use critical sections to lock object state This should avoid races and deadlocks. * Remove compress/decompress and mark module as not reliant on the GIL The `compress`/`decompress` functions will be moved to Python code for simplicity. C implementations can always be re-added in the future. Also, mark _zstd as not requiring the GIL. * Lift critical section to avoid clang warning * Respond to comments by picnixz * Call out pyzstd explicitly in license description Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> * Use a much more robust implementation... ... for `get_zstd_state_from_type` Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> * Use PyList_GetItemRef for thread safety purposes * Use a macro for the minimum supported version * remove const from primivite types * Use PyMem_New in another spot * Simplify error handling in _get_frame_size * Another simplification of error handling in get_frame_info * Rename _module_state to mod_state * Rewrite comment explaining the context of the code * Add link to pyzstd * Add TODO about refactoring dict training code * Use PyModule_AddObjectRef over PyModule_AddObject PyModule_AddObject is soft-deprecated, so we should use PyModule_AddObjectRef * Check result of OutputBufferGrow * Simplify return logic in `add_constant_to_type` Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> * Ignore return value of _zstd_clear() Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> * Remove redundant comments * Remove __reduce__ from ZstdDict We should instead document that to pickle a dictionary a user should use the `.dict_content` attribute. * Use PyUnicode_FromFormat instead of a buffer * Don't use C constants/types in error messages * Make error messages easier to understand for Python users * Lower minimum required version 1.4.0 * Use casts and make slot function signatures correct * Be consistent with CPython on const usage * Make else clauses in line with PEP 7 * Fix over-indented blocks in argument clinic * Add critical section around ZSTD_DCtx_setParameter * Add a TODO about refactoring critical sections * Use Py_UNREACHABLE * Move bytes operations out of Py_BEGIN_ALLOW_THREADS * Add TODO about ensuring a lock is held * Remove asserts that may not be correct * Add TODO to make ZstdDict and others GC objects * Make objects GC tracked * Remove unused include * Fix some memory issues * Fix refleaks on module and in ZstdDict * Update configure to check for ZDICT_finalizeDictionary * Properly check version in configure * exit(1) if check fails * Use AC_RUN_IFELSE * Use a define() to re-use version check * Actually properly set _zstd module status based on version --------- Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
891 lines
26 KiB
C
891 lines
26 KiB
C
/*
|
|
Low level interface to Meta's zstd library for use in the compression.zstd
|
|
Python module.
|
|
*/
|
|
|
|
/* ZstdDecompressor class definition */
|
|
|
|
/*[clinic input]
|
|
module _zstd
|
|
class _zstd.ZstdDecompressor "ZstdDecompressor *" "clinic_state()->ZstdDecompressor_type"
|
|
[clinic start generated code]*/
|
|
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4e6eae327c0c0c76]*/
|
|
|
|
#ifndef Py_BUILD_CORE_BUILTIN
|
|
# define Py_BUILD_CORE_MODULE 1
|
|
#endif
|
|
|
|
#include "_zstdmodule.h"
|
|
|
|
#include "buffer.h"
|
|
|
|
#include <stddef.h> // offsetof()
|
|
|
|
#define ZstdDecompressor_CAST(op) ((ZstdDecompressor *)op)
|
|
|
|
static inline ZSTD_DDict *
|
|
_get_DDict(ZstdDict *self)
|
|
{
|
|
ZSTD_DDict *ret;
|
|
|
|
/* Already created */
|
|
if (self->d_dict != NULL) {
|
|
return self->d_dict;
|
|
}
|
|
|
|
Py_BEGIN_CRITICAL_SECTION(self);
|
|
if (self->d_dict == NULL) {
|
|
/* Create ZSTD_DDict instance from dictionary content */
|
|
char *dict_buffer = PyBytes_AS_STRING(self->dict_content);
|
|
Py_ssize_t dict_len = Py_SIZE(self->dict_content);
|
|
Py_BEGIN_ALLOW_THREADS
|
|
self->d_dict = ZSTD_createDDict(dict_buffer,
|
|
dict_len);
|
|
Py_END_ALLOW_THREADS
|
|
|
|
if (self->d_dict == NULL) {
|
|
_zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
|
|
if (mod_state != NULL) {
|
|
PyErr_SetString(mod_state->ZstdError,
|
|
"Failed to create ZSTD_DDict instance from zstd "
|
|
"dictionary content. Maybe the content is corrupted.");
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Don't lose any exception */
|
|
ret = self->d_dict;
|
|
Py_END_CRITICAL_SECTION();
|
|
|
|
return ret;
|
|
}
|
|
|
|
/* Set decompression parameters to decompression context */
|
|
int
|
|
_PyZstd_set_d_parameters(ZstdDecompressor *self, PyObject *options)
|
|
{
|
|
size_t zstd_ret;
|
|
PyObject *key, *value;
|
|
Py_ssize_t pos;
|
|
_zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
|
|
if (mod_state == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
if (!PyDict_Check(options)) {
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"options argument should be dict object.");
|
|
return -1;
|
|
}
|
|
|
|
pos = 0;
|
|
while (PyDict_Next(options, &pos, &key, &value)) {
|
|
/* Check key type */
|
|
if (Py_TYPE(key) == mod_state->CParameter_type) {
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"Key of decompression options dict should "
|
|
"NOT be CParameter.");
|
|
return -1;
|
|
}
|
|
|
|
/* Both key & value should be 32-bit signed int */
|
|
int key_v = PyLong_AsInt(key);
|
|
if (key_v == -1 && PyErr_Occurred()) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"Key of options dict should be a DParameter attribute.");
|
|
return -1;
|
|
}
|
|
|
|
// TODO(emmatyping): check bounds when there is a value error here for better
|
|
// error message?
|
|
int value_v = PyLong_AsInt(value);
|
|
if (value_v == -1 && PyErr_Occurred()) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"Value of options dict should be an int.");
|
|
return -1;
|
|
}
|
|
|
|
/* Set parameter to compression context */
|
|
Py_BEGIN_CRITICAL_SECTION(self);
|
|
zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v);
|
|
Py_END_CRITICAL_SECTION();
|
|
|
|
/* Check error */
|
|
if (ZSTD_isError(zstd_ret)) {
|
|
set_parameter_error(mod_state, 0, key_v, value_v);
|
|
return -1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* Load dictionary or prefix to decompression context */
|
|
int
|
|
_PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict)
|
|
{
|
|
size_t zstd_ret;
|
|
_zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
|
|
if (mod_state == NULL) {
|
|
return -1;
|
|
}
|
|
ZstdDict *zd;
|
|
int type, ret;
|
|
|
|
/* Check ZstdDict */
|
|
ret = PyObject_IsInstance(dict, (PyObject*)mod_state->ZstdDict_type);
|
|
if (ret < 0) {
|
|
return -1;
|
|
}
|
|
else if (ret > 0) {
|
|
/* When decompressing, use digested dictionary by default. */
|
|
zd = (ZstdDict*)dict;
|
|
type = DICT_TYPE_DIGESTED;
|
|
goto load;
|
|
}
|
|
|
|
/* Check (ZstdDict, type) */
|
|
if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2) {
|
|
/* Check ZstdDict */
|
|
ret = PyObject_IsInstance(PyTuple_GET_ITEM(dict, 0),
|
|
(PyObject*)mod_state->ZstdDict_type);
|
|
if (ret < 0) {
|
|
return -1;
|
|
}
|
|
else if (ret > 0) {
|
|
/* type == -1 may indicate an error. */
|
|
type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1));
|
|
if (type == DICT_TYPE_DIGESTED ||
|
|
type == DICT_TYPE_UNDIGESTED ||
|
|
type == DICT_TYPE_PREFIX)
|
|
{
|
|
assert(type >= 0);
|
|
zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0);
|
|
goto load;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Wrong type */
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"zstd_dict argument should be ZstdDict object.");
|
|
return -1;
|
|
|
|
load:
|
|
if (type == DICT_TYPE_DIGESTED) {
|
|
/* Get ZSTD_DDict */
|
|
ZSTD_DDict *d_dict = _get_DDict(zd);
|
|
if (d_dict == NULL) {
|
|
return -1;
|
|
}
|
|
/* Reference a prepared dictionary */
|
|
Py_BEGIN_CRITICAL_SECTION(self);
|
|
zstd_ret = ZSTD_DCtx_refDDict(self->dctx, d_dict);
|
|
Py_END_CRITICAL_SECTION();
|
|
}
|
|
else if (type == DICT_TYPE_UNDIGESTED) {
|
|
/* Load a dictionary */
|
|
Py_BEGIN_CRITICAL_SECTION2(self, zd);
|
|
zstd_ret = ZSTD_DCtx_loadDictionary(
|
|
self->dctx,
|
|
PyBytes_AS_STRING(zd->dict_content),
|
|
Py_SIZE(zd->dict_content));
|
|
Py_END_CRITICAL_SECTION2();
|
|
}
|
|
else if (type == DICT_TYPE_PREFIX) {
|
|
/* Load a prefix */
|
|
Py_BEGIN_CRITICAL_SECTION2(self, zd);
|
|
zstd_ret = ZSTD_DCtx_refPrefix(
|
|
self->dctx,
|
|
PyBytes_AS_STRING(zd->dict_content),
|
|
Py_SIZE(zd->dict_content));
|
|
Py_END_CRITICAL_SECTION2();
|
|
}
|
|
else {
|
|
/* Impossible code path */
|
|
PyErr_SetString(PyExc_SystemError,
|
|
"load_d_dict() impossible code path");
|
|
return -1;
|
|
}
|
|
|
|
/* Check error */
|
|
if (ZSTD_isError(zstd_ret)) {
|
|
set_zstd_error(mod_state, ERR_LOAD_D_DICT, zstd_ret);
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
Given the two types of decompressors (defined in _zstdmodule.h):
|
|
|
|
typedef enum {
|
|
TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class
|
|
TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function
|
|
} decompress_type;
|
|
|
|
Decompress implementation for <D>, <E>, pseudo code:
|
|
|
|
initialize_output_buffer
|
|
while True:
|
|
decompress_data
|
|
set_object_flag # .eof for <D>, .at_frame_edge for <E>.
|
|
|
|
if output_buffer_exhausted:
|
|
if output_buffer_reached_max_length:
|
|
finish
|
|
grow_output_buffer
|
|
elif input_buffer_exhausted:
|
|
finish
|
|
|
|
ZSTD_decompressStream()'s size_t return value:
|
|
- 0 when a frame is completely decoded and fully flushed, zstd's internal
|
|
buffer has no data.
|
|
- An error code, which can be tested using ZSTD_isError().
|
|
- Or any other value > 0, which means there is still some decoding or
|
|
flushing to do to complete current frame.
|
|
|
|
Note, decompressing "an empty input" in any case will make it > 0.
|
|
|
|
<E> supports multiple frames, has an .at_frame_edge flag, it means both the
|
|
input and output streams are at a frame edge. The flag can be set by this
|
|
statement:
|
|
|
|
.at_frame_edge = (zstd_ret == 0) ? 1 : 0
|
|
|
|
But if decompressing "an empty input" at "a frame edge", zstd_ret will be
|
|
non-zero, then .at_frame_edge will be wrongly set to false. To solve this
|
|
problem, two AFE checks are needed to ensure that: when at "a frame edge",
|
|
empty input will not be decompressed.
|
|
|
|
// AFE check
|
|
if (self->at_frame_edge && in->pos == in->size) {
|
|
finish
|
|
}
|
|
|
|
In <E>, if .at_frame_edge is eventually set to true, but input stream has
|
|
unconsumed data (in->pos < in->size), then the outer function
|
|
stream_decompress() will set .at_frame_edge to false. In this case,
|
|
although the output stream is at a frame edge, for the caller, the input
|
|
stream is not at a frame edge, see below diagram. This behavior does not
|
|
affect the next AFE check, since (in->pos < in->size).
|
|
|
|
input stream: --------------|---
|
|
^
|
|
output stream: ====================|
|
|
^
|
|
*/
|
|
PyObject *
|
|
decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
|
|
Py_ssize_t max_length,
|
|
Py_ssize_t initial_size,
|
|
decompress_type type)
|
|
{
|
|
size_t zstd_ret;
|
|
ZSTD_outBuffer out;
|
|
_BlocksOutputBuffer buffer = {.list = NULL};
|
|
PyObject *ret;
|
|
|
|
/* The first AFE check for setting .at_frame_edge flag */
|
|
if (type == TYPE_ENDLESS_DECOMPRESSOR) {
|
|
if (self->at_frame_edge && in->pos == in->size) {
|
|
_zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
|
|
if (mod_state == NULL) {
|
|
return NULL;
|
|
}
|
|
ret = mod_state->empty_bytes;
|
|
Py_INCREF(ret);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
/* Initialize the output buffer */
|
|
if (initial_size >= 0) {
|
|
if (_OutputBuffer_InitWithSize(&buffer, &out, max_length, initial_size) < 0) {
|
|
goto error;
|
|
}
|
|
}
|
|
else {
|
|
if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) {
|
|
goto error;
|
|
}
|
|
}
|
|
assert(out.pos == 0);
|
|
|
|
while (1) {
|
|
/* Decompress */
|
|
Py_BEGIN_ALLOW_THREADS
|
|
zstd_ret = ZSTD_decompressStream(self->dctx, &out, in);
|
|
Py_END_ALLOW_THREADS
|
|
|
|
/* Check error */
|
|
if (ZSTD_isError(zstd_ret)) {
|
|
_zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
|
|
if (mod_state != NULL) {
|
|
set_zstd_error(mod_state, ERR_DECOMPRESS, zstd_ret);
|
|
}
|
|
goto error;
|
|
}
|
|
|
|
/* Set .eof/.af_frame_edge flag */
|
|
if (type == TYPE_DECOMPRESSOR) {
|
|
/* ZstdDecompressor class stops when a frame is decompressed */
|
|
if (zstd_ret == 0) {
|
|
self->eof = 1;
|
|
break;
|
|
}
|
|
}
|
|
else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
|
|
/* decompress() function supports multiple frames */
|
|
self->at_frame_edge = (zstd_ret == 0) ? 1 : 0;
|
|
|
|
/* The second AFE check for setting .at_frame_edge flag */
|
|
if (self->at_frame_edge && in->pos == in->size) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Need to check out before in. Maybe zstd's internal buffer still has
|
|
a few bytes can be output, grow the buffer and continue. */
|
|
if (out.pos == out.size) {
|
|
/* Output buffer exhausted */
|
|
|
|
/* Output buffer reached max_length */
|
|
if (_OutputBuffer_ReachedMaxLength(&buffer, &out)) {
|
|
break;
|
|
}
|
|
|
|
/* Grow output buffer */
|
|
if (_OutputBuffer_Grow(&buffer, &out) < 0) {
|
|
goto error;
|
|
}
|
|
assert(out.pos == 0);
|
|
|
|
}
|
|
else if (in->pos == in->size) {
|
|
/* Finished */
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Return a bytes object */
|
|
ret = _OutputBuffer_Finish(&buffer, &out);
|
|
if (ret != NULL) {
|
|
return ret;
|
|
}
|
|
|
|
error:
|
|
_OutputBuffer_OnError(&buffer);
|
|
return NULL;
|
|
}
|
|
|
|
void
|
|
decompressor_reset_session(ZstdDecompressor *self,
|
|
decompress_type type)
|
|
{
|
|
// TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here
|
|
// and ensure lock is always held
|
|
|
|
/* Reset variables */
|
|
self->in_begin = 0;
|
|
self->in_end = 0;
|
|
|
|
if (type == TYPE_DECOMPRESSOR) {
|
|
Py_CLEAR(self->unused_data);
|
|
}
|
|
|
|
/* Reset variables in one operation */
|
|
self->needs_input = 1;
|
|
self->at_frame_edge = 1;
|
|
self->eof = 0;
|
|
self->_unused_char_for_align = 0;
|
|
|
|
/* Resetting session never fail */
|
|
ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only);
|
|
}
|
|
|
|
PyObject *
|
|
stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length,
|
|
decompress_type type)
|
|
{
|
|
Py_ssize_t initial_buffer_size = -1;
|
|
ZSTD_inBuffer in;
|
|
PyObject *ret = NULL;
|
|
int use_input_buffer;
|
|
|
|
if (type == TYPE_DECOMPRESSOR) {
|
|
/* Check .eof flag */
|
|
if (self->eof) {
|
|
PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame.");
|
|
assert(ret == NULL);
|
|
goto success;
|
|
}
|
|
}
|
|
else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
|
|
/* Fast path for the first frame */
|
|
if (self->at_frame_edge && self->in_begin == self->in_end) {
|
|
/* Read decompressed size */
|
|
uint64_t decompressed_size = ZSTD_getFrameContentSize(data->buf, data->len);
|
|
|
|
/* These two zstd constants always > PY_SSIZE_T_MAX:
|
|
ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1)
|
|
ZSTD_CONTENTSIZE_ERROR is (0ULL - 2)
|
|
|
|
Use ZSTD_findFrameCompressedSize() to check complete frame,
|
|
prevent allocating too much memory for small input chunk. */
|
|
|
|
if (decompressed_size <= (uint64_t) PY_SSIZE_T_MAX &&
|
|
!ZSTD_isError(ZSTD_findFrameCompressedSize(data->buf, data->len)) )
|
|
{
|
|
initial_buffer_size = (Py_ssize_t) decompressed_size;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Prepare input buffer w/wo unconsumed data */
|
|
if (self->in_begin == self->in_end) {
|
|
/* No unconsumed data */
|
|
use_input_buffer = 0;
|
|
|
|
in.src = data->buf;
|
|
in.size = data->len;
|
|
in.pos = 0;
|
|
}
|
|
else if (data->len == 0) {
|
|
/* Has unconsumed data, fast path for b'' */
|
|
assert(self->in_begin < self->in_end);
|
|
|
|
use_input_buffer = 1;
|
|
|
|
in.src = self->input_buffer + self->in_begin;
|
|
in.size = self->in_end - self->in_begin;
|
|
in.pos = 0;
|
|
}
|
|
else {
|
|
/* Has unconsumed data */
|
|
use_input_buffer = 1;
|
|
|
|
/* Unconsumed data size in input_buffer */
|
|
size_t used_now = self->in_end - self->in_begin;
|
|
assert(self->in_end > self->in_begin);
|
|
|
|
/* Number of bytes we can append to input buffer */
|
|
size_t avail_now = self->input_buffer_size - self->in_end;
|
|
assert(self->input_buffer_size >= self->in_end);
|
|
|
|
/* Number of bytes we can append if we move existing contents to
|
|
beginning of buffer */
|
|
size_t avail_total = self->input_buffer_size - used_now;
|
|
assert(self->input_buffer_size >= used_now);
|
|
|
|
if (avail_total < (size_t) data->len) {
|
|
char *tmp;
|
|
size_t new_size = used_now + data->len;
|
|
|
|
/* Allocate with new size */
|
|
tmp = PyMem_Malloc(new_size);
|
|
if (tmp == NULL) {
|
|
PyErr_NoMemory();
|
|
goto error;
|
|
}
|
|
|
|
/* Copy unconsumed data to the beginning of new buffer */
|
|
memcpy(tmp,
|
|
self->input_buffer + self->in_begin,
|
|
used_now);
|
|
|
|
/* Switch to new buffer */
|
|
PyMem_Free(self->input_buffer);
|
|
self->input_buffer = tmp;
|
|
self->input_buffer_size = new_size;
|
|
|
|
/* Set begin & end position */
|
|
self->in_begin = 0;
|
|
self->in_end = used_now;
|
|
}
|
|
else if (avail_now < (size_t) data->len) {
|
|
/* Move unconsumed data to the beginning.
|
|
Overlap is possible, so use memmove(). */
|
|
memmove(self->input_buffer,
|
|
self->input_buffer + self->in_begin,
|
|
used_now);
|
|
|
|
/* Set begin & end position */
|
|
self->in_begin = 0;
|
|
self->in_end = used_now;
|
|
}
|
|
|
|
/* Copy data to input buffer */
|
|
memcpy(self->input_buffer + self->in_end, data->buf, data->len);
|
|
self->in_end += data->len;
|
|
|
|
in.src = self->input_buffer + self->in_begin;
|
|
in.size = used_now + data->len;
|
|
in.pos = 0;
|
|
}
|
|
assert(in.pos == 0);
|
|
|
|
/* Decompress */
|
|
ret = decompress_impl(self, &in,
|
|
max_length, initial_buffer_size,
|
|
type);
|
|
if (ret == NULL) {
|
|
goto error;
|
|
}
|
|
|
|
/* Unconsumed input data */
|
|
if (in.pos == in.size) {
|
|
if (type == TYPE_DECOMPRESSOR) {
|
|
if (Py_SIZE(ret) == max_length || self->eof) {
|
|
self->needs_input = 0;
|
|
}
|
|
else {
|
|
self->needs_input = 1;
|
|
}
|
|
}
|
|
else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
|
|
if (Py_SIZE(ret) == max_length && !self->at_frame_edge) {
|
|
self->needs_input = 0;
|
|
}
|
|
else {
|
|
self->needs_input = 1;
|
|
}
|
|
}
|
|
|
|
if (use_input_buffer) {
|
|
/* Clear input_buffer */
|
|
self->in_begin = 0;
|
|
self->in_end = 0;
|
|
}
|
|
}
|
|
else {
|
|
size_t data_size = in.size - in.pos;
|
|
|
|
self->needs_input = 0;
|
|
|
|
if (type == TYPE_ENDLESS_DECOMPRESSOR) {
|
|
self->at_frame_edge = 0;
|
|
}
|
|
|
|
if (!use_input_buffer) {
|
|
/* Discard buffer if it's too small
|
|
(resizing it may needlessly copy the current contents) */
|
|
if (self->input_buffer != NULL &&
|
|
self->input_buffer_size < data_size)
|
|
{
|
|
PyMem_Free(self->input_buffer);
|
|
self->input_buffer = NULL;
|
|
self->input_buffer_size = 0;
|
|
}
|
|
|
|
/* Allocate if necessary */
|
|
if (self->input_buffer == NULL) {
|
|
self->input_buffer = PyMem_Malloc(data_size);
|
|
if (self->input_buffer == NULL) {
|
|
PyErr_NoMemory();
|
|
goto error;
|
|
}
|
|
self->input_buffer_size = data_size;
|
|
}
|
|
|
|
/* Copy unconsumed data */
|
|
memcpy(self->input_buffer, (char*)in.src + in.pos, data_size);
|
|
self->in_begin = 0;
|
|
self->in_end = data_size;
|
|
}
|
|
else {
|
|
/* Use input buffer */
|
|
self->in_begin += in.pos;
|
|
}
|
|
}
|
|
|
|
goto success;
|
|
|
|
error:
|
|
/* Reset decompressor's states/session */
|
|
decompressor_reset_session(self, type);
|
|
|
|
Py_CLEAR(ret);
|
|
success:
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
static PyObject *
|
|
_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|
{
|
|
ZstdDecompressor *self;
|
|
self = PyObject_GC_New(ZstdDecompressor, type);
|
|
if (self == NULL) {
|
|
goto error;
|
|
}
|
|
|
|
self->inited = 0;
|
|
self->dict = NULL;
|
|
self->input_buffer = NULL;
|
|
self->input_buffer_size = 0;
|
|
self->in_begin = -1;
|
|
self->in_end = -1;
|
|
self->unused_data = NULL;
|
|
self->eof = 0;
|
|
|
|
/* needs_input flag */
|
|
self->needs_input = 1;
|
|
|
|
/* at_frame_edge flag */
|
|
self->at_frame_edge = 1;
|
|
|
|
/* Decompression context */
|
|
self->dctx = ZSTD_createDCtx();
|
|
if (self->dctx == NULL) {
|
|
_zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
|
|
if (mod_state != NULL) {
|
|
PyErr_SetString(mod_state->ZstdError,
|
|
"Unable to create ZSTD_DCtx instance.");
|
|
}
|
|
goto error;
|
|
}
|
|
|
|
return (PyObject*)self;
|
|
|
|
error:
|
|
if (self != NULL) {
|
|
PyObject_GC_Del(self);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void
|
|
ZstdDecompressor_dealloc(PyObject *ob)
|
|
{
|
|
ZstdDecompressor *self = ZstdDecompressor_CAST(ob);
|
|
|
|
PyObject_GC_UnTrack(self);
|
|
|
|
/* Free decompression context */
|
|
ZSTD_freeDCtx(self->dctx);
|
|
|
|
/* Py_CLEAR the dict after free decompression context */
|
|
Py_CLEAR(self->dict);
|
|
|
|
/* Free unconsumed input data buffer */
|
|
PyMem_Free(self->input_buffer);
|
|
|
|
/* Free unused data */
|
|
Py_CLEAR(self->unused_data);
|
|
|
|
PyTypeObject *tp = Py_TYPE(self);
|
|
PyObject_GC_Del(ob);
|
|
Py_DECREF(tp);
|
|
}
|
|
|
|
/*[clinic input]
|
|
_zstd.ZstdDecompressor.__init__
|
|
|
|
zstd_dict: object = None
|
|
A ZstdDict object, a pre-trained zstd dictionary.
|
|
options: object = None
|
|
A dict object that contains advanced decompression parameters.
|
|
|
|
Create a decompressor object for decompressing data incrementally.
|
|
|
|
Thread-safe at method level. For one-shot decompression, use the decompress()
|
|
function instead.
|
|
[clinic start generated code]*/
|
|
|
|
static int
|
|
_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self,
|
|
PyObject *zstd_dict, PyObject *options)
|
|
/*[clinic end generated code: output=703af2f1ec226642 input=8fd72999acc1a146]*/
|
|
{
|
|
/* Only called once */
|
|
if (self->inited) {
|
|
PyErr_SetString(PyExc_RuntimeError, init_twice_msg);
|
|
return -1;
|
|
}
|
|
self->inited = 1;
|
|
|
|
/* Load dictionary to decompression context */
|
|
if (zstd_dict != Py_None) {
|
|
if (_PyZstd_load_d_dict(self, zstd_dict) < 0) {
|
|
return -1;
|
|
}
|
|
|
|
/* Py_INCREF the dict */
|
|
Py_INCREF(zstd_dict);
|
|
self->dict = zstd_dict;
|
|
}
|
|
|
|
/* Set option to decompression context */
|
|
if (options != Py_None) {
|
|
if (_PyZstd_set_d_parameters(self, options) < 0) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
// We can only start tracking self with the GC once self->dict is set.
|
|
PyObject_GC_Track(self);
|
|
return 0;
|
|
}
|
|
|
|
/*[clinic input]
|
|
@critical_section
|
|
@getter
|
|
_zstd.ZstdDecompressor.unused_data
|
|
|
|
A bytes object of un-consumed input data.
|
|
|
|
When ZstdDecompressor object stops after a frame is
|
|
decompressed, unused input data after the frame. Otherwise this will be b''.
|
|
[clinic start generated code]*/
|
|
|
|
static PyObject *
|
|
_zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self)
|
|
/*[clinic end generated code: output=f3a20940f11b6b09 input=5233800bef00df04]*/
|
|
{
|
|
PyObject *ret;
|
|
|
|
/* Thread-safe code */
|
|
Py_BEGIN_CRITICAL_SECTION(self);
|
|
|
|
if (!self->eof) {
|
|
_zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
|
|
if (mod_state == NULL) {
|
|
return NULL;
|
|
}
|
|
ret = mod_state->empty_bytes;
|
|
Py_INCREF(ret);
|
|
}
|
|
else {
|
|
if (self->unused_data == NULL) {
|
|
self->unused_data = PyBytes_FromStringAndSize(
|
|
self->input_buffer + self->in_begin,
|
|
self->in_end - self->in_begin);
|
|
ret = self->unused_data;
|
|
Py_XINCREF(ret);
|
|
}
|
|
else {
|
|
ret = self->unused_data;
|
|
Py_INCREF(ret);
|
|
}
|
|
}
|
|
|
|
Py_END_CRITICAL_SECTION();
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*[clinic input]
|
|
_zstd.ZstdDecompressor.decompress
|
|
|
|
data: Py_buffer
|
|
A bytes-like object, zstd data to be decompressed.
|
|
max_length: Py_ssize_t = -1
|
|
Maximum size of returned data. When it is negative, the size of
|
|
output buffer is unlimited. When it is nonnegative, returns at
|
|
most max_length bytes of decompressed data.
|
|
|
|
Decompress *data*, returning uncompressed bytes if possible, or b'' otherwise.
|
|
|
|
If *max_length* is nonnegative, returns at most *max_length* bytes of
|
|
decompressed data. If this limit is reached and further output can be
|
|
produced, *self.needs_input* will be set to ``False``. In this case, the next
|
|
call to *decompress()* may provide *data* as b'' to obtain more of the output.
|
|
|
|
If all of the input data was decompressed and returned (either because this
|
|
was less than *max_length* bytes, or because *max_length* was negative),
|
|
*self.needs_input* will be set to True.
|
|
|
|
Attempting to decompress data after the end of a frame is reached raises an
|
|
EOFError. Any data found after the end of the frame is ignored and saved in
|
|
the self.unused_data attribute.
|
|
[clinic start generated code]*/
|
|
|
|
static PyObject *
|
|
_zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self,
|
|
Py_buffer *data,
|
|
Py_ssize_t max_length)
|
|
/*[clinic end generated code: output=a4302b3c940dbec6 input=830e455bc9a50b6e]*/
|
|
{
|
|
PyObject *ret;
|
|
/* Thread-safe code */
|
|
Py_BEGIN_CRITICAL_SECTION(self);
|
|
|
|
ret = stream_decompress(self, data, max_length, TYPE_DECOMPRESSOR);
|
|
Py_END_CRITICAL_SECTION();
|
|
return ret;
|
|
}
|
|
|
|
#define clinic_state() (get_zstd_state_from_type(type))
|
|
#include "clinic/decompressor.c.h"
|
|
#undef clinic_state
|
|
|
|
static PyMethodDef ZstdDecompressor_methods[] = {
|
|
_ZSTD_ZSTDDECOMPRESSOR_DECOMPRESS_METHODDEF
|
|
|
|
{0}
|
|
};
|
|
|
|
PyDoc_STRVAR(ZstdDecompressor_eof_doc,
|
|
"True means the end of the first frame has been reached. If decompress data\n"
|
|
"after that, an EOFError exception will be raised.");
|
|
|
|
PyDoc_STRVAR(ZstdDecompressor_needs_input_doc,
|
|
"If the max_length output limit in .decompress() method has been reached, and\n"
|
|
"the decompressor has (or may has) unconsumed input data, it will be set to\n"
|
|
"False. In this case, pass b'' to .decompress() method may output further data.");
|
|
|
|
static PyMemberDef ZstdDecompressor_members[] = {
|
|
{"eof", Py_T_BOOL, offsetof(ZstdDecompressor, eof),
|
|
Py_READONLY, ZstdDecompressor_eof_doc},
|
|
|
|
{"needs_input", Py_T_BOOL, offsetof(ZstdDecompressor, needs_input),
|
|
Py_READONLY, ZstdDecompressor_needs_input_doc},
|
|
|
|
{0}
|
|
};
|
|
|
|
static PyGetSetDef ZstdDecompressor_getset[] = {
|
|
_ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF
|
|
|
|
{0}
|
|
};
|
|
|
|
static int
|
|
ZstdDecompressor_traverse(PyObject *ob, visitproc visit, void *arg)
|
|
{
|
|
ZstdDecompressor *self = ZstdDecompressor_CAST(ob);
|
|
Py_VISIT(self->dict);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
ZstdDecompressor_clear(PyObject *ob)
|
|
{
|
|
ZstdDecompressor *self = ZstdDecompressor_CAST(ob);
|
|
Py_CLEAR(self->dict);
|
|
Py_CLEAR(self->unused_data);
|
|
return 0;
|
|
}
|
|
|
|
static PyType_Slot ZstdDecompressor_slots[] = {
|
|
{Py_tp_new, _zstd_ZstdDecompressor_new},
|
|
{Py_tp_dealloc, ZstdDecompressor_dealloc},
|
|
{Py_tp_init, _zstd_ZstdDecompressor___init__},
|
|
{Py_tp_methods, ZstdDecompressor_methods},
|
|
{Py_tp_members, ZstdDecompressor_members},
|
|
{Py_tp_getset, ZstdDecompressor_getset},
|
|
{Py_tp_doc, (char*)_zstd_ZstdDecompressor___init____doc__},
|
|
{Py_tp_traverse, ZstdDecompressor_traverse},
|
|
{Py_tp_clear, ZstdDecompressor_clear},
|
|
{0}
|
|
};
|
|
|
|
PyType_Spec ZstdDecompressor_type_spec = {
|
|
.name = "_zstd.ZstdDecompressor",
|
|
.basicsize = sizeof(ZstdDecompressor),
|
|
.flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
|
|
.slots = ZstdDecompressor_slots,
|
|
};
|