cpython/Modules/_zstd/_zstdmodule.c
Emma Smith 3b4333583f
gh-132983: Introduce _zstd bindings module (GH-133027)
* Add _zstd module for https://peps.python.org/pep-0784/

This commit introduces the `_zstd` module, with bindings to libzstd from
the pyzstd project. It also includes the unix build system configuration.
Windows build system support will be integrated independently as it
depends on integration with cpython-source-deps.

* Add _zstd to modules

* Fix path for compression.zstd module

* Ignore _zstd module like _io

* Expand module state macros to improve code quality

Also removes module state references from the classes in the _zstd
module and instead uses PyType_GetModuleState()

* Remove backticks suggested in review

Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>

* Use critical sections to lock object state

This should avoid races and deadlocks.

* Remove compress/decompress and mark module as not reliant on the GIL

The `compress`/`decompress` functions will be moved to Python code for simplicity.
C implementations can always be re-added in the future.

Also, mark _zstd as not requiring the GIL.

* Lift critical section to avoid clang warning

* Respond to comments by picnixz

* Call out pyzstd explicitly in license description

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>

* Use a much more robust implementation...

... for `get_zstd_state_from_type`

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>

* Use PyList_GetItemRef for thread safety purposes

* Use a macro for the minimum supported version

* remove const from primivite types

* Use PyMem_New in another spot

* Simplify error handling in _get_frame_size

* Another simplification of error handling in get_frame_info

* Rename _module_state to mod_state

* Rewrite comment explaining the context of the code

* Add link to pyzstd

* Add TODO about refactoring dict training code

* Use PyModule_AddObjectRef over PyModule_AddObject

PyModule_AddObject is soft-deprecated, so we should use PyModule_AddObjectRef

* Check result of OutputBufferGrow

* Simplify return logic in `add_constant_to_type`

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>

* Ignore return value of _zstd_clear()

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>

* Remove redundant comments

* Remove __reduce__ from ZstdDict

We should instead document that to pickle a dictionary a user should use
the `.dict_content` attribute.

* Use PyUnicode_FromFormat instead of a buffer

* Don't use C constants/types in error messages

* Make error messages easier to understand for Python users

* Lower minimum required version 1.4.0

* Use casts and make slot function signatures correct

* Be consistent with CPython on const usage

* Make else clauses in line with PEP 7

* Fix over-indented blocks in argument clinic

* Add critical section around ZSTD_DCtx_setParameter

* Add a TODO about refactoring critical sections

* Use Py_UNREACHABLE

* Move bytes operations out of Py_BEGIN_ALLOW_THREADS

* Add TODO about ensuring a lock is held

* Remove asserts that may not be correct

* Add TODO to make ZstdDict and others GC objects

* Make objects GC tracked

* Remove unused include

* Fix some memory issues

* Fix refleaks on module and in ZstdDict

* Update configure to check for ZDICT_finalizeDictionary

* Properly check version in configure

* exit(1) if check fails

* Use AC_RUN_IFELSE

* Use a define() to re-use version check

* Actually properly set _zstd module status based on version

---------

Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
2025-05-04 01:29:55 +00:00

914 lines
27 KiB
C

/*
Low level interface to Meta's zstd library for use in the compression.zstd
Python module.
*/
#ifndef Py_BUILD_CORE_BUILTIN
# define Py_BUILD_CORE_MODULE 1
#endif
#include "_zstdmodule.h"
/*[clinic input]
module _zstd
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b5f5587aac15c14]*/
#include "clinic/_zstdmodule.c.h"
/* Format error message and set ZstdError. */
void
set_zstd_error(const _zstd_state* const state,
error_type type, size_t zstd_ret)
{
char *msg;
assert(ZSTD_isError(zstd_ret));
switch (type)
{
case ERR_DECOMPRESS:
msg = "Unable to decompress zstd data: %s";
break;
case ERR_COMPRESS:
msg = "Unable to compress zstd data: %s";
break;
case ERR_SET_PLEDGED_INPUT_SIZE:
msg = "Unable to set pledged uncompressed content size: %s";
break;
case ERR_LOAD_D_DICT:
msg = "Unable to load zstd dictionary or prefix for decompression: %s";
break;
case ERR_LOAD_C_DICT:
msg = "Unable to load zstd dictionary or prefix for compression: %s";
break;
case ERR_GET_C_BOUNDS:
msg = "Unable to get zstd compression parameter bounds: %s";
break;
case ERR_GET_D_BOUNDS:
msg = "Unable to get zstd decompression parameter bounds: %s";
break;
case ERR_SET_C_LEVEL:
msg = "Unable to set zstd compression level: %s";
break;
case ERR_TRAIN_DICT:
msg = "Unable to train zstd dictionary: %s";
break;
case ERR_FINALIZE_DICT:
msg = "Unable to finalize zstd dictionary: %s";
break;
default:
Py_UNREACHABLE();
}
PyErr_Format(state->ZstdError, msg, ZSTD_getErrorName(zstd_ret));
}
typedef struct {
int parameter;
char parameter_name[32];
} ParameterInfo;
static const ParameterInfo cp_list[] =
{
{ZSTD_c_compressionLevel, "compressionLevel"},
{ZSTD_c_windowLog, "windowLog"},
{ZSTD_c_hashLog, "hashLog"},
{ZSTD_c_chainLog, "chainLog"},
{ZSTD_c_searchLog, "searchLog"},
{ZSTD_c_minMatch, "minMatch"},
{ZSTD_c_targetLength, "targetLength"},
{ZSTD_c_strategy, "strategy"},
{ZSTD_c_enableLongDistanceMatching, "enableLongDistanceMatching"},
{ZSTD_c_ldmHashLog, "ldmHashLog"},
{ZSTD_c_ldmMinMatch, "ldmMinMatch"},
{ZSTD_c_ldmBucketSizeLog, "ldmBucketSizeLog"},
{ZSTD_c_ldmHashRateLog, "ldmHashRateLog"},
{ZSTD_c_contentSizeFlag, "contentSizeFlag"},
{ZSTD_c_checksumFlag, "checksumFlag"},
{ZSTD_c_dictIDFlag, "dictIDFlag"},
{ZSTD_c_nbWorkers, "nbWorkers"},
{ZSTD_c_jobSize, "jobSize"},
{ZSTD_c_overlapLog, "overlapLog"}
};
static const ParameterInfo dp_list[] =
{
{ZSTD_d_windowLogMax, "windowLogMax"}
};
void
set_parameter_error(const _zstd_state* const state, int is_compress,
int key_v, int value_v)
{
ParameterInfo const *list;
int list_size;
char const *name;
char *type;
ZSTD_bounds bounds;
int i;
char pos_msg[128];
if (is_compress) {
list = cp_list;
list_size = Py_ARRAY_LENGTH(cp_list);
type = "compression";
}
else {
list = dp_list;
list_size = Py_ARRAY_LENGTH(dp_list);
type = "decompression";
}
/* Find parameter's name */
name = NULL;
for (i = 0; i < list_size; i++) {
if (key_v == (list+i)->parameter) {
name = (list+i)->parameter_name;
break;
}
}
/* Unknown parameter */
if (name == NULL) {
PyOS_snprintf(pos_msg, sizeof(pos_msg),
"unknown parameter (key %d)", key_v);
name = pos_msg;
}
/* Get parameter bounds */
if (is_compress) {
bounds = ZSTD_cParam_getBounds(key_v);
}
else {
bounds = ZSTD_dParam_getBounds(key_v);
}
if (ZSTD_isError(bounds.error)) {
PyErr_Format(state->ZstdError,
"Zstd %s parameter \"%s\" is invalid. (zstd v%s)",
type, name, ZSTD_versionString());
return;
}
/* Error message */
PyErr_Format(state->ZstdError,
"Error when setting zstd %s parameter \"%s\", it "
"should %d <= value <= %d, provided value is %d. "
"(zstd v%s, %d-bit build)",
type, name,
bounds.lowerBound, bounds.upperBound, value_v,
ZSTD_versionString(), 8*(int)sizeof(Py_ssize_t));
}
static inline _zstd_state*
get_zstd_state(PyObject *module)
{
void *state = PyModule_GetState(module);
assert(state != NULL);
return (_zstd_state *)state;
}
/*[clinic input]
_zstd._train_dict
samples_bytes: PyBytesObject
Concatenation of samples.
samples_size_list: object(subclass_of='&PyList_Type')
List of samples' sizes.
dict_size: Py_ssize_t
The size of the dictionary.
/
Internal function, train a zstd dictionary on sample data.
[clinic start generated code]*/
static PyObject *
_zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
PyObject *samples_size_list, Py_ssize_t dict_size)
/*[clinic end generated code: output=ee53c34c8f77886b input=b21d092c695a3a81]*/
{
// TODO(emmatyping): The preamble and suffix to this function and _finalize_dict
// are pretty similar. We should see if we can refactor them to share that code.
Py_ssize_t chunks_number;
size_t *chunk_sizes = NULL;
PyObject *dst_dict_bytes = NULL;
size_t zstd_ret;
Py_ssize_t sizes_sum;
Py_ssize_t i;
/* Check arguments */
if (dict_size <= 0) {
PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number.");
return NULL;
}
chunks_number = Py_SIZE(samples_size_list);
if ((size_t) chunks_number > UINT32_MAX) {
PyErr_Format(PyExc_ValueError,
"The number of samples should be <= %u.", UINT32_MAX);
return NULL;
}
/* Prepare chunk_sizes */
chunk_sizes = PyMem_New(size_t, chunks_number);
if (chunk_sizes == NULL) {
PyErr_NoMemory();
goto error;
}
sizes_sum = 0;
for (i = 0; i < chunks_number; i++) {
PyObject *size = PyList_GetItemRef(samples_size_list, i);
chunk_sizes[i] = PyLong_AsSize_t(size);
Py_DECREF(size);
if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) {
PyErr_Format(PyExc_ValueError,
"Items in samples_size_list should be an int "
"object, with a value between 0 and %u.", SIZE_MAX);
goto error;
}
sizes_sum += chunk_sizes[i];
}
if (sizes_sum != Py_SIZE(samples_bytes)) {
PyErr_SetString(PyExc_ValueError,
"The samples size list doesn't match the concatenation's size.");
goto error;
}
/* Allocate dict buffer */
dst_dict_bytes = PyBytes_FromStringAndSize(NULL, dict_size);
if (dst_dict_bytes == NULL) {
goto error;
}
/* Train the dictionary */
char *dst_dict_buffer = PyBytes_AS_STRING(dst_dict_bytes);
char *samples_buffer = PyBytes_AS_STRING(samples_bytes);
Py_BEGIN_ALLOW_THREADS
zstd_ret = ZDICT_trainFromBuffer(dst_dict_buffer, dict_size,
samples_buffer,
chunk_sizes, (uint32_t)chunks_number);
Py_END_ALLOW_THREADS
/* Check zstd dict error */
if (ZDICT_isError(zstd_ret)) {
_zstd_state* const mod_state = get_zstd_state(module);
set_zstd_error(mod_state, ERR_TRAIN_DICT, zstd_ret);
goto error;
}
/* Resize dict_buffer */
if (_PyBytes_Resize(&dst_dict_bytes, zstd_ret) < 0) {
goto error;
}
goto success;
error:
Py_CLEAR(dst_dict_bytes);
success:
PyMem_Free(chunk_sizes);
return dst_dict_bytes;
}
/*[clinic input]
_zstd._finalize_dict
custom_dict_bytes: PyBytesObject
Custom dictionary content.
samples_bytes: PyBytesObject
Concatenation of samples.
samples_size_list: object(subclass_of='&PyList_Type')
List of samples' sizes.
dict_size: Py_ssize_t
The size of the dictionary.
compression_level: int
Optimize for a specific zstd compression level, 0 means default.
/
Internal function, finalize a zstd dictionary.
[clinic start generated code]*/
static PyObject *
_zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
PyBytesObject *samples_bytes,
PyObject *samples_size_list, Py_ssize_t dict_size,
int compression_level)
/*[clinic end generated code: output=9c2a7d8c845cee93 input=08531a803d87c56f]*/
{
Py_ssize_t chunks_number;
size_t *chunk_sizes = NULL;
PyObject *dst_dict_bytes = NULL;
size_t zstd_ret;
ZDICT_params_t params;
Py_ssize_t sizes_sum;
Py_ssize_t i;
/* Check arguments */
if (dict_size <= 0) {
PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number.");
return NULL;
}
chunks_number = Py_SIZE(samples_size_list);
if ((size_t) chunks_number > UINT32_MAX) {
PyErr_Format(PyExc_ValueError,
"The number of samples should be <= %u.", UINT32_MAX);
return NULL;
}
/* Prepare chunk_sizes */
chunk_sizes = PyMem_New(size_t, chunks_number);
if (chunk_sizes == NULL) {
PyErr_NoMemory();
goto error;
}
sizes_sum = 0;
for (i = 0; i < chunks_number; i++) {
PyObject *size = PyList_GET_ITEM(samples_size_list, i);
chunk_sizes[i] = PyLong_AsSize_t(size);
if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) {
PyErr_Format(PyExc_ValueError,
"Items in samples_size_list should be an int "
"object, with a value between 0 and %u.", SIZE_MAX);
goto error;
}
sizes_sum += chunk_sizes[i];
}
if (sizes_sum != Py_SIZE(samples_bytes)) {
PyErr_SetString(PyExc_ValueError,
"The samples size list doesn't match the concatenation's size.");
goto error;
}
/* Allocate dict buffer */
dst_dict_bytes = PyBytes_FromStringAndSize(NULL, dict_size);
if (dst_dict_bytes == NULL) {
goto error;
}
/* Parameters */
/* Optimize for a specific zstd compression level, 0 means default. */
params.compressionLevel = compression_level;
/* Write log to stderr, 0 = none. */
params.notificationLevel = 0;
/* Force dictID value, 0 means auto mode (32-bits random value). */
params.dictID = 0;
/* Finalize the dictionary */
Py_BEGIN_ALLOW_THREADS
zstd_ret = ZDICT_finalizeDictionary(
PyBytes_AS_STRING(dst_dict_bytes), dict_size,
PyBytes_AS_STRING(custom_dict_bytes), Py_SIZE(custom_dict_bytes),
PyBytes_AS_STRING(samples_bytes), chunk_sizes,
(uint32_t)chunks_number, params);
Py_END_ALLOW_THREADS
/* Check zstd dict error */
if (ZDICT_isError(zstd_ret)) {
_zstd_state* const mod_state = get_zstd_state(module);
set_zstd_error(mod_state, ERR_FINALIZE_DICT, zstd_ret);
goto error;
}
/* Resize dict_buffer */
if (_PyBytes_Resize(&dst_dict_bytes, zstd_ret) < 0) {
goto error;
}
goto success;
error:
Py_CLEAR(dst_dict_bytes);
success:
PyMem_Free(chunk_sizes);
return dst_dict_bytes;
}
/*[clinic input]
_zstd._get_param_bounds
is_compress: bool
True for CParameter, False for DParameter.
parameter: int
The parameter to get bounds.
Internal function, get CParameter/DParameter bounds.
[clinic start generated code]*/
static PyObject *
_zstd__get_param_bounds_impl(PyObject *module, int is_compress,
int parameter)
/*[clinic end generated code: output=b751dc710f89ef55 input=fb21ff96aff65df1]*/
{
ZSTD_bounds bound;
if (is_compress) {
bound = ZSTD_cParam_getBounds(parameter);
if (ZSTD_isError(bound.error)) {
_zstd_state* const mod_state = get_zstd_state(module);
set_zstd_error(mod_state, ERR_GET_C_BOUNDS, bound.error);
return NULL;
}
}
else {
bound = ZSTD_dParam_getBounds(parameter);
if (ZSTD_isError(bound.error)) {
_zstd_state* const mod_state = get_zstd_state(module);
set_zstd_error(mod_state, ERR_GET_D_BOUNDS, bound.error);
return NULL;
}
}
return Py_BuildValue("ii", bound.lowerBound, bound.upperBound);
}
/*[clinic input]
_zstd.get_frame_size
frame_buffer: Py_buffer
A bytes-like object, it should start from the beginning of a frame,
and contains at least one complete frame.
Get the size of a zstd frame, including frame header and 4-byte checksum if it has one.
It will iterate all blocks' headers within a frame, to accumulate the frame size.
[clinic start generated code]*/
static PyObject *
_zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer)
/*[clinic end generated code: output=a7384c2f8780f442 input=7d3ad24311893bf3]*/
{
size_t frame_size;
frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf, frame_buffer->len);
if (ZSTD_isError(frame_size)) {
_zstd_state* const mod_state = get_zstd_state(module);
PyErr_Format(mod_state->ZstdError,
"Error when finding the compressed size of a zstd frame. "
"Make sure the frame_buffer argument starts from the "
"beginning of a frame, and its length not less than this "
"complete frame. Zstd error message: %s.",
ZSTD_getErrorName(frame_size));
return NULL;
}
return PyLong_FromSize_t(frame_size);
}
/*[clinic input]
_zstd._get_frame_info
frame_buffer: Py_buffer
A bytes-like object, containing the header of a zstd frame.
Internal function, get zstd frame infomation from a frame header.
[clinic start generated code]*/
static PyObject *
_zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer)
/*[clinic end generated code: output=5462855464ecdf81 input=67f1f8e4b7b89c4d]*/
{
uint64_t decompressed_size;
uint32_t dict_id;
/* ZSTD_getFrameContentSize */
decompressed_size = ZSTD_getFrameContentSize(frame_buffer->buf,
frame_buffer->len);
/* #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) */
if (decompressed_size == ZSTD_CONTENTSIZE_ERROR) {
_zstd_state* const mod_state = get_zstd_state(module);
PyErr_SetString(mod_state->ZstdError,
"Error when getting information from the header of "
"a zstd frame. Make sure the frame_buffer argument "
"starts from the beginning of a frame, and its length "
"not less than the frame header (6~18 bytes).");
return NULL;
}
/* ZSTD_getDictID_fromFrame */
dict_id = ZSTD_getDictID_fromFrame(frame_buffer->buf, frame_buffer->len);
/* Build tuple */
if (decompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) {
return Py_BuildValue("OI", Py_None, dict_id);
}
return Py_BuildValue("KI", decompressed_size, dict_id);
}
/*[clinic input]
_zstd._set_parameter_types
c_parameter_type: object(subclass_of='&PyType_Type')
CParameter IntEnum type object
d_parameter_type: object(subclass_of='&PyType_Type')
DParameter IntEnum type object
Internal function, set CParameter/DParameter types for validity check.
[clinic start generated code]*/
static PyObject *
_zstd__set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type,
PyObject *d_parameter_type)
/*[clinic end generated code: output=a13d4890ccbd2873 input=3e7d0d37c3a1045a]*/
{
_zstd_state* const mod_state = get_zstd_state(module);
if (!PyType_Check(c_parameter_type) || !PyType_Check(d_parameter_type)) {
PyErr_SetString(PyExc_ValueError,
"The two arguments should be CParameter and "
"DParameter types.");
return NULL;
}
Py_XDECREF(mod_state->CParameter_type);
Py_INCREF(c_parameter_type);
mod_state->CParameter_type = (PyTypeObject*) c_parameter_type;
Py_XDECREF(mod_state->DParameter_type);
Py_INCREF(d_parameter_type);
mod_state->DParameter_type = (PyTypeObject*)d_parameter_type;
Py_RETURN_NONE;
}
static PyMethodDef _zstd_methods[] = {
_ZSTD__TRAIN_DICT_METHODDEF
_ZSTD__FINALIZE_DICT_METHODDEF
_ZSTD__GET_PARAM_BOUNDS_METHODDEF
_ZSTD_GET_FRAME_SIZE_METHODDEF
_ZSTD__GET_FRAME_INFO_METHODDEF
_ZSTD__SET_PARAMETER_TYPES_METHODDEF
{0}
};
#define ADD_INT_PREFIX_MACRO(module, macro) \
do { \
if (PyModule_AddIntConstant(module, "_" #macro, macro) < 0) { \
return -1; \
} \
} while(0)
static int
add_parameters(PyObject *module)
{
/* If add new parameters, please also add to cp_list/dp_list above. */
/* Compression parameters */
ADD_INT_PREFIX_MACRO(module, ZSTD_c_compressionLevel);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_windowLog);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_hashLog);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_chainLog);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_searchLog);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_minMatch);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_targetLength);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_strategy);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_enableLongDistanceMatching);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashLog);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmMinMatch);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmBucketSizeLog);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashRateLog);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_contentSizeFlag);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_checksumFlag);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_dictIDFlag);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_nbWorkers);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_jobSize);
ADD_INT_PREFIX_MACRO(module, ZSTD_c_overlapLog);
/* Decompression parameters */
ADD_INT_PREFIX_MACRO(module, ZSTD_d_windowLogMax);
/* ZSTD_strategy enum */
ADD_INT_PREFIX_MACRO(module, ZSTD_fast);
ADD_INT_PREFIX_MACRO(module, ZSTD_dfast);
ADD_INT_PREFIX_MACRO(module, ZSTD_greedy);
ADD_INT_PREFIX_MACRO(module, ZSTD_lazy);
ADD_INT_PREFIX_MACRO(module, ZSTD_lazy2);
ADD_INT_PREFIX_MACRO(module, ZSTD_btlazy2);
ADD_INT_PREFIX_MACRO(module, ZSTD_btopt);
ADD_INT_PREFIX_MACRO(module, ZSTD_btultra);
ADD_INT_PREFIX_MACRO(module, ZSTD_btultra2);
return 0;
}
static inline PyObject *
get_zstd_version_info(void)
{
uint32_t ver = ZSTD_versionNumber();
uint32_t major, minor, release;
major = ver / 10000;
minor = (ver / 100) % 100;
release = ver % 100;
return Py_BuildValue("III", major, minor, release);
}
static inline int
add_vars_to_module(PyObject *module)
{
PyObject *obj;
/* zstd_version, a str. */
if (PyModule_AddStringConstant(module, "zstd_version",
ZSTD_versionString()) < 0) {
return -1;
}
/* zstd_version_info, a tuple. */
obj = get_zstd_version_info();
if (PyModule_AddObjectRef(module, "zstd_version_info", obj) < 0) {
Py_XDECREF(obj);
return -1;
}
Py_DECREF(obj);
/* Add zstd parameters */
if (add_parameters(module) < 0) {
return -1;
}
/* _compressionLevel_values: (default, min, max)
ZSTD_defaultCLevel() was added in zstd v1.5.0 */
obj = Py_BuildValue("iii",
#if ZSTD_VERSION_NUMBER < 10500
ZSTD_CLEVEL_DEFAULT,
#else
ZSTD_defaultCLevel(),
#endif
ZSTD_minCLevel(),
ZSTD_maxCLevel());
if (PyModule_AddObjectRef(module,
"_compressionLevel_values",
obj) < 0) {
Py_XDECREF(obj);
return -1;
}
Py_DECREF(obj);
/* _ZSTD_CStreamSizes */
obj = Py_BuildValue("II",
(uint32_t)ZSTD_CStreamInSize(),
(uint32_t)ZSTD_CStreamOutSize());
if (PyModule_AddObjectRef(module, "_ZSTD_CStreamSizes", obj) < 0) {
Py_XDECREF(obj);
return -1;
}
Py_DECREF(obj);
/* _ZSTD_DStreamSizes */
obj = Py_BuildValue("II",
(uint32_t)ZSTD_DStreamInSize(),
(uint32_t)ZSTD_DStreamOutSize());
if (PyModule_AddObjectRef(module, "_ZSTD_DStreamSizes", obj) < 0) {
Py_XDECREF(obj);
return -1;
}
Py_DECREF(obj);
/* _ZSTD_CONFIG */
obj = Py_BuildValue("isOOO", 8*(int)sizeof(Py_ssize_t), "c",
Py_False,
Py_True,
/* User mremap output buffer */
#if defined(HAVE_MREMAP)
Py_True
#else
Py_False
#endif
);
if (PyModule_AddObjectRef(module, "_ZSTD_CONFIG", obj) < 0) {
Py_XDECREF(obj);
return -1;
}
Py_DECREF(obj);
return 0;
}
#define ADD_STR_TO_STATE_MACRO(STR) \
do { \
mod_state->str_##STR = PyUnicode_FromString(#STR); \
if (mod_state->str_##STR == NULL) { \
return -1; \
} \
} while(0)
static inline int
add_type_to_module(PyObject *module, const char *name,
PyType_Spec *type_spec, PyTypeObject **dest)
{
PyObject *temp = PyType_FromModuleAndSpec(module, type_spec, NULL);
if (PyModule_AddObjectRef(module, name, temp) < 0) {
Py_XDECREF(temp);
return -1;
}
*dest = (PyTypeObject*) temp;
return 0;
}
static inline int
add_constant_to_type(PyTypeObject *type, const char *name, long value)
{
PyObject *temp;
temp = PyLong_FromLong(value);
if (temp == NULL) {
return -1;
}
int rc = PyObject_SetAttrString((PyObject*) type, name, temp);
Py_DECREF(temp);
return rc;
}
static int _zstd_exec(PyObject *module) {
_zstd_state* const mod_state = get_zstd_state(module);
/* Reusable objects & variables */
mod_state->empty_bytes = PyBytes_FromStringAndSize(NULL, 0);
if (mod_state->empty_bytes == NULL) {
return -1;
}
mod_state->empty_readonly_memoryview =
PyMemoryView_FromMemory((char*)mod_state, 0, PyBUF_READ);
if (mod_state->empty_readonly_memoryview == NULL) {
return -1;
}
/* Add str to module state */
ADD_STR_TO_STATE_MACRO(read);
ADD_STR_TO_STATE_MACRO(readinto);
ADD_STR_TO_STATE_MACRO(write);
ADD_STR_TO_STATE_MACRO(flush);
mod_state->CParameter_type = NULL;
mod_state->DParameter_type = NULL;
/* Add variables to module */
if (add_vars_to_module(module) < 0) {
return -1;
}
/* ZstdError */
mod_state->ZstdError = PyErr_NewExceptionWithDoc(
"_zstd.ZstdError",
"Call to the underlying zstd library failed.",
NULL, NULL);
if (mod_state->ZstdError == NULL) {
return -1;
}
if (PyModule_AddObjectRef(module, "ZstdError", mod_state->ZstdError) < 0) {
Py_DECREF(mod_state->ZstdError);
return -1;
}
/* ZstdDict */
if (add_type_to_module(module,
"ZstdDict",
&zstddict_type_spec,
&mod_state->ZstdDict_type) < 0) {
return -1;
}
// ZstdCompressor
if (add_type_to_module(module,
"ZstdCompressor",
&zstdcompressor_type_spec,
&mod_state->ZstdCompressor_type) < 0) {
return -1;
}
// Add EndDirective enum to ZstdCompressor
if (add_constant_to_type(mod_state->ZstdCompressor_type,
"CONTINUE",
ZSTD_e_continue) < 0) {
return -1;
}
if (add_constant_to_type(mod_state->ZstdCompressor_type,
"FLUSH_BLOCK",
ZSTD_e_flush) < 0) {
return -1;
}
if (add_constant_to_type(mod_state->ZstdCompressor_type,
"FLUSH_FRAME",
ZSTD_e_end) < 0) {
return -1;
}
// ZstdDecompressor
if (add_type_to_module(module,
"ZstdDecompressor",
&ZstdDecompressor_type_spec,
&mod_state->ZstdDecompressor_type) < 0) {
return -1;
}
return 0;
}
static int
_zstd_traverse(PyObject *module, visitproc visit, void *arg)
{
_zstd_state* const mod_state = get_zstd_state(module);
Py_VISIT(mod_state->empty_bytes);
Py_VISIT(mod_state->empty_readonly_memoryview);
Py_VISIT(mod_state->str_read);
Py_VISIT(mod_state->str_readinto);
Py_VISIT(mod_state->str_write);
Py_VISIT(mod_state->str_flush);
Py_VISIT(mod_state->ZstdDict_type);
Py_VISIT(mod_state->ZstdCompressor_type);
Py_VISIT(mod_state->ZstdDecompressor_type);
Py_VISIT(mod_state->ZstdError);
Py_VISIT(mod_state->CParameter_type);
Py_VISIT(mod_state->DParameter_type);
return 0;
}
static int
_zstd_clear(PyObject *module)
{
_zstd_state* const mod_state = get_zstd_state(module);
Py_CLEAR(mod_state->empty_bytes);
Py_CLEAR(mod_state->empty_readonly_memoryview);
Py_CLEAR(mod_state->str_read);
Py_CLEAR(mod_state->str_readinto);
Py_CLEAR(mod_state->str_write);
Py_CLEAR(mod_state->str_flush);
Py_CLEAR(mod_state->ZstdDict_type);
Py_CLEAR(mod_state->ZstdCompressor_type);
Py_CLEAR(mod_state->ZstdDecompressor_type);
Py_CLEAR(mod_state->ZstdError);
Py_CLEAR(mod_state->CParameter_type);
Py_CLEAR(mod_state->DParameter_type);
return 0;
}
static void
_zstd_free(void *module)
{
(void)_zstd_clear((PyObject *)module);
}
static struct PyModuleDef_Slot _zstd_slots[] = {
{Py_mod_exec, _zstd_exec},
{Py_mod_gil, Py_MOD_GIL_NOT_USED},
{0}
};
struct PyModuleDef _zstdmodule = {
PyModuleDef_HEAD_INIT,
.m_name = "_zstd",
.m_size = sizeof(_zstd_state),
.m_slots = _zstd_slots,
.m_methods = _zstd_methods,
.m_traverse = _zstd_traverse,
.m_clear = _zstd_clear,
.m_free = _zstd_free
};
PyMODINIT_FUNC
PyInit__zstd(void)
{
return PyModuleDef_Init(&_zstdmodule);
}