mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00

* Add _zstd module for https://peps.python.org/pep-0784/ This commit introduces the `_zstd` module, with bindings to libzstd from the pyzstd project. It also includes the unix build system configuration. Windows build system support will be integrated independently as it depends on integration with cpython-source-deps. * Add _zstd to modules * Fix path for compression.zstd module * Ignore _zstd module like _io * Expand module state macros to improve code quality Also removes module state references from the classes in the _zstd module and instead uses PyType_GetModuleState() * Remove backticks suggested in review Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> * Use critical sections to lock object state This should avoid races and deadlocks. * Remove compress/decompress and mark module as not reliant on the GIL The `compress`/`decompress` functions will be moved to Python code for simplicity. C implementations can always be re-added in the future. Also, mark _zstd as not requiring the GIL. * Lift critical section to avoid clang warning * Respond to comments by picnixz * Call out pyzstd explicitly in license description Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> * Use a much more robust implementation... ... for `get_zstd_state_from_type` Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> * Use PyList_GetItemRef for thread safety purposes * Use a macro for the minimum supported version * remove const from primivite types * Use PyMem_New in another spot * Simplify error handling in _get_frame_size * Another simplification of error handling in get_frame_info * Rename _module_state to mod_state * Rewrite comment explaining the context of the code * Add link to pyzstd * Add TODO about refactoring dict training code * Use PyModule_AddObjectRef over PyModule_AddObject PyModule_AddObject is soft-deprecated, so we should use PyModule_AddObjectRef * Check result of OutputBufferGrow * Simplify return logic in `add_constant_to_type` Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> * Ignore return value of _zstd_clear() Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> * Remove redundant comments * Remove __reduce__ from ZstdDict We should instead document that to pickle a dictionary a user should use the `.dict_content` attribute. * Use PyUnicode_FromFormat instead of a buffer * Don't use C constants/types in error messages * Make error messages easier to understand for Python users * Lower minimum required version 1.4.0 * Use casts and make slot function signatures correct * Be consistent with CPython on const usage * Make else clauses in line with PEP 7 * Fix over-indented blocks in argument clinic * Add critical section around ZSTD_DCtx_setParameter * Add a TODO about refactoring critical sections * Use Py_UNREACHABLE * Move bytes operations out of Py_BEGIN_ALLOW_THREADS * Add TODO about ensuring a lock is held * Remove asserts that may not be correct * Add TODO to make ZstdDict and others GC objects * Make objects GC tracked * Remove unused include * Fix some memory issues * Fix refleaks on module and in ZstdDict * Update configure to check for ZDICT_finalizeDictionary * Properly check version in configure * exit(1) if check fails * Use AC_RUN_IFELSE * Use a define() to re-use version check * Actually properly set _zstd module status based on version --------- Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
432 lines
13 KiB
C
Generated
432 lines
13 KiB
C
Generated
/*[clinic input]
|
|
preserve
|
|
[clinic start generated code]*/
|
|
|
|
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
|
|
# include "pycore_gc.h" // PyGC_Head
|
|
# include "pycore_runtime.h" // _Py_ID()
|
|
#endif
|
|
#include "pycore_abstract.h" // _PyNumber_Index()
|
|
#include "pycore_modsupport.h" // _PyArg_CheckPositional()
|
|
|
|
PyDoc_STRVAR(_zstd__train_dict__doc__,
|
|
"_train_dict($module, samples_bytes, samples_size_list, dict_size, /)\n"
|
|
"--\n"
|
|
"\n"
|
|
"Internal function, train a zstd dictionary on sample data.\n"
|
|
"\n"
|
|
" samples_bytes\n"
|
|
" Concatenation of samples.\n"
|
|
" samples_size_list\n"
|
|
" List of samples\' sizes.\n"
|
|
" dict_size\n"
|
|
" The size of the dictionary.");
|
|
|
|
#define _ZSTD__TRAIN_DICT_METHODDEF \
|
|
{"_train_dict", _PyCFunction_CAST(_zstd__train_dict), METH_FASTCALL, _zstd__train_dict__doc__},
|
|
|
|
static PyObject *
|
|
_zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
|
|
PyObject *samples_size_list, Py_ssize_t dict_size);
|
|
|
|
static PyObject *
|
|
_zstd__train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
|
|
{
|
|
PyObject *return_value = NULL;
|
|
PyBytesObject *samples_bytes;
|
|
PyObject *samples_size_list;
|
|
Py_ssize_t dict_size;
|
|
|
|
if (!_PyArg_CheckPositional("_train_dict", nargs, 3, 3)) {
|
|
goto exit;
|
|
}
|
|
if (!PyBytes_Check(args[0])) {
|
|
_PyArg_BadArgument("_train_dict", "argument 1", "bytes", args[0]);
|
|
goto exit;
|
|
}
|
|
samples_bytes = (PyBytesObject *)args[0];
|
|
if (!PyList_Check(args[1])) {
|
|
_PyArg_BadArgument("_train_dict", "argument 2", "list", args[1]);
|
|
goto exit;
|
|
}
|
|
samples_size_list = args[1];
|
|
{
|
|
Py_ssize_t ival = -1;
|
|
PyObject *iobj = _PyNumber_Index(args[2]);
|
|
if (iobj != NULL) {
|
|
ival = PyLong_AsSsize_t(iobj);
|
|
Py_DECREF(iobj);
|
|
}
|
|
if (ival == -1 && PyErr_Occurred()) {
|
|
goto exit;
|
|
}
|
|
dict_size = ival;
|
|
}
|
|
return_value = _zstd__train_dict_impl(module, samples_bytes, samples_size_list, dict_size);
|
|
|
|
exit:
|
|
return return_value;
|
|
}
|
|
|
|
PyDoc_STRVAR(_zstd__finalize_dict__doc__,
|
|
"_finalize_dict($module, custom_dict_bytes, samples_bytes,\n"
|
|
" samples_size_list, dict_size, compression_level, /)\n"
|
|
"--\n"
|
|
"\n"
|
|
"Internal function, finalize a zstd dictionary.\n"
|
|
"\n"
|
|
" custom_dict_bytes\n"
|
|
" Custom dictionary content.\n"
|
|
" samples_bytes\n"
|
|
" Concatenation of samples.\n"
|
|
" samples_size_list\n"
|
|
" List of samples\' sizes.\n"
|
|
" dict_size\n"
|
|
" The size of the dictionary.\n"
|
|
" compression_level\n"
|
|
" Optimize for a specific zstd compression level, 0 means default.");
|
|
|
|
#define _ZSTD__FINALIZE_DICT_METHODDEF \
|
|
{"_finalize_dict", _PyCFunction_CAST(_zstd__finalize_dict), METH_FASTCALL, _zstd__finalize_dict__doc__},
|
|
|
|
static PyObject *
|
|
_zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
|
|
PyBytesObject *samples_bytes,
|
|
PyObject *samples_size_list, Py_ssize_t dict_size,
|
|
int compression_level);
|
|
|
|
static PyObject *
|
|
_zstd__finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
|
|
{
|
|
PyObject *return_value = NULL;
|
|
PyBytesObject *custom_dict_bytes;
|
|
PyBytesObject *samples_bytes;
|
|
PyObject *samples_size_list;
|
|
Py_ssize_t dict_size;
|
|
int compression_level;
|
|
|
|
if (!_PyArg_CheckPositional("_finalize_dict", nargs, 5, 5)) {
|
|
goto exit;
|
|
}
|
|
if (!PyBytes_Check(args[0])) {
|
|
_PyArg_BadArgument("_finalize_dict", "argument 1", "bytes", args[0]);
|
|
goto exit;
|
|
}
|
|
custom_dict_bytes = (PyBytesObject *)args[0];
|
|
if (!PyBytes_Check(args[1])) {
|
|
_PyArg_BadArgument("_finalize_dict", "argument 2", "bytes", args[1]);
|
|
goto exit;
|
|
}
|
|
samples_bytes = (PyBytesObject *)args[1];
|
|
if (!PyList_Check(args[2])) {
|
|
_PyArg_BadArgument("_finalize_dict", "argument 3", "list", args[2]);
|
|
goto exit;
|
|
}
|
|
samples_size_list = args[2];
|
|
{
|
|
Py_ssize_t ival = -1;
|
|
PyObject *iobj = _PyNumber_Index(args[3]);
|
|
if (iobj != NULL) {
|
|
ival = PyLong_AsSsize_t(iobj);
|
|
Py_DECREF(iobj);
|
|
}
|
|
if (ival == -1 && PyErr_Occurred()) {
|
|
goto exit;
|
|
}
|
|
dict_size = ival;
|
|
}
|
|
compression_level = PyLong_AsInt(args[4]);
|
|
if (compression_level == -1 && PyErr_Occurred()) {
|
|
goto exit;
|
|
}
|
|
return_value = _zstd__finalize_dict_impl(module, custom_dict_bytes, samples_bytes, samples_size_list, dict_size, compression_level);
|
|
|
|
exit:
|
|
return return_value;
|
|
}
|
|
|
|
PyDoc_STRVAR(_zstd__get_param_bounds__doc__,
|
|
"_get_param_bounds($module, /, is_compress, parameter)\n"
|
|
"--\n"
|
|
"\n"
|
|
"Internal function, get CParameter/DParameter bounds.\n"
|
|
"\n"
|
|
" is_compress\n"
|
|
" True for CParameter, False for DParameter.\n"
|
|
" parameter\n"
|
|
" The parameter to get bounds.");
|
|
|
|
#define _ZSTD__GET_PARAM_BOUNDS_METHODDEF \
|
|
{"_get_param_bounds", _PyCFunction_CAST(_zstd__get_param_bounds), METH_FASTCALL|METH_KEYWORDS, _zstd__get_param_bounds__doc__},
|
|
|
|
static PyObject *
|
|
_zstd__get_param_bounds_impl(PyObject *module, int is_compress,
|
|
int parameter);
|
|
|
|
static PyObject *
|
|
_zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
|
|
{
|
|
PyObject *return_value = NULL;
|
|
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
|
|
|
|
#define NUM_KEYWORDS 2
|
|
static struct {
|
|
PyGC_Head _this_is_not_used;
|
|
PyObject_VAR_HEAD
|
|
Py_hash_t ob_hash;
|
|
PyObject *ob_item[NUM_KEYWORDS];
|
|
} _kwtuple = {
|
|
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
|
|
.ob_hash = -1,
|
|
.ob_item = { &_Py_ID(is_compress), &_Py_ID(parameter), },
|
|
};
|
|
#undef NUM_KEYWORDS
|
|
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
|
|
|
|
#else // !Py_BUILD_CORE
|
|
# define KWTUPLE NULL
|
|
#endif // !Py_BUILD_CORE
|
|
|
|
static const char * const _keywords[] = {"is_compress", "parameter", NULL};
|
|
static _PyArg_Parser _parser = {
|
|
.keywords = _keywords,
|
|
.fname = "_get_param_bounds",
|
|
.kwtuple = KWTUPLE,
|
|
};
|
|
#undef KWTUPLE
|
|
PyObject *argsbuf[2];
|
|
int is_compress;
|
|
int parameter;
|
|
|
|
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
|
|
/*minpos*/ 2, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
|
|
if (!args) {
|
|
goto exit;
|
|
}
|
|
is_compress = PyObject_IsTrue(args[0]);
|
|
if (is_compress < 0) {
|
|
goto exit;
|
|
}
|
|
parameter = PyLong_AsInt(args[1]);
|
|
if (parameter == -1 && PyErr_Occurred()) {
|
|
goto exit;
|
|
}
|
|
return_value = _zstd__get_param_bounds_impl(module, is_compress, parameter);
|
|
|
|
exit:
|
|
return return_value;
|
|
}
|
|
|
|
PyDoc_STRVAR(_zstd_get_frame_size__doc__,
|
|
"get_frame_size($module, /, frame_buffer)\n"
|
|
"--\n"
|
|
"\n"
|
|
"Get the size of a zstd frame, including frame header and 4-byte checksum if it has one.\n"
|
|
"\n"
|
|
" frame_buffer\n"
|
|
" A bytes-like object, it should start from the beginning of a frame,\n"
|
|
" and contains at least one complete frame.\n"
|
|
"\n"
|
|
"It will iterate all blocks\' headers within a frame, to accumulate the frame size.");
|
|
|
|
#define _ZSTD_GET_FRAME_SIZE_METHODDEF \
|
|
{"get_frame_size", _PyCFunction_CAST(_zstd_get_frame_size), METH_FASTCALL|METH_KEYWORDS, _zstd_get_frame_size__doc__},
|
|
|
|
static PyObject *
|
|
_zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer);
|
|
|
|
static PyObject *
|
|
_zstd_get_frame_size(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
|
|
{
|
|
PyObject *return_value = NULL;
|
|
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
|
|
|
|
#define NUM_KEYWORDS 1
|
|
static struct {
|
|
PyGC_Head _this_is_not_used;
|
|
PyObject_VAR_HEAD
|
|
Py_hash_t ob_hash;
|
|
PyObject *ob_item[NUM_KEYWORDS];
|
|
} _kwtuple = {
|
|
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
|
|
.ob_hash = -1,
|
|
.ob_item = { &_Py_ID(frame_buffer), },
|
|
};
|
|
#undef NUM_KEYWORDS
|
|
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
|
|
|
|
#else // !Py_BUILD_CORE
|
|
# define KWTUPLE NULL
|
|
#endif // !Py_BUILD_CORE
|
|
|
|
static const char * const _keywords[] = {"frame_buffer", NULL};
|
|
static _PyArg_Parser _parser = {
|
|
.keywords = _keywords,
|
|
.fname = "get_frame_size",
|
|
.kwtuple = KWTUPLE,
|
|
};
|
|
#undef KWTUPLE
|
|
PyObject *argsbuf[1];
|
|
Py_buffer frame_buffer = {NULL, NULL};
|
|
|
|
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
|
|
/*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
|
|
if (!args) {
|
|
goto exit;
|
|
}
|
|
if (PyObject_GetBuffer(args[0], &frame_buffer, PyBUF_SIMPLE) != 0) {
|
|
goto exit;
|
|
}
|
|
return_value = _zstd_get_frame_size_impl(module, &frame_buffer);
|
|
|
|
exit:
|
|
/* Cleanup for frame_buffer */
|
|
if (frame_buffer.obj) {
|
|
PyBuffer_Release(&frame_buffer);
|
|
}
|
|
|
|
return return_value;
|
|
}
|
|
|
|
PyDoc_STRVAR(_zstd__get_frame_info__doc__,
|
|
"_get_frame_info($module, /, frame_buffer)\n"
|
|
"--\n"
|
|
"\n"
|
|
"Internal function, get zstd frame infomation from a frame header.\n"
|
|
"\n"
|
|
" frame_buffer\n"
|
|
" A bytes-like object, containing the header of a zstd frame.");
|
|
|
|
#define _ZSTD__GET_FRAME_INFO_METHODDEF \
|
|
{"_get_frame_info", _PyCFunction_CAST(_zstd__get_frame_info), METH_FASTCALL|METH_KEYWORDS, _zstd__get_frame_info__doc__},
|
|
|
|
static PyObject *
|
|
_zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer);
|
|
|
|
static PyObject *
|
|
_zstd__get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
|
|
{
|
|
PyObject *return_value = NULL;
|
|
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
|
|
|
|
#define NUM_KEYWORDS 1
|
|
static struct {
|
|
PyGC_Head _this_is_not_used;
|
|
PyObject_VAR_HEAD
|
|
Py_hash_t ob_hash;
|
|
PyObject *ob_item[NUM_KEYWORDS];
|
|
} _kwtuple = {
|
|
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
|
|
.ob_hash = -1,
|
|
.ob_item = { &_Py_ID(frame_buffer), },
|
|
};
|
|
#undef NUM_KEYWORDS
|
|
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
|
|
|
|
#else // !Py_BUILD_CORE
|
|
# define KWTUPLE NULL
|
|
#endif // !Py_BUILD_CORE
|
|
|
|
static const char * const _keywords[] = {"frame_buffer", NULL};
|
|
static _PyArg_Parser _parser = {
|
|
.keywords = _keywords,
|
|
.fname = "_get_frame_info",
|
|
.kwtuple = KWTUPLE,
|
|
};
|
|
#undef KWTUPLE
|
|
PyObject *argsbuf[1];
|
|
Py_buffer frame_buffer = {NULL, NULL};
|
|
|
|
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
|
|
/*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
|
|
if (!args) {
|
|
goto exit;
|
|
}
|
|
if (PyObject_GetBuffer(args[0], &frame_buffer, PyBUF_SIMPLE) != 0) {
|
|
goto exit;
|
|
}
|
|
return_value = _zstd__get_frame_info_impl(module, &frame_buffer);
|
|
|
|
exit:
|
|
/* Cleanup for frame_buffer */
|
|
if (frame_buffer.obj) {
|
|
PyBuffer_Release(&frame_buffer);
|
|
}
|
|
|
|
return return_value;
|
|
}
|
|
|
|
PyDoc_STRVAR(_zstd__set_parameter_types__doc__,
|
|
"_set_parameter_types($module, /, c_parameter_type, d_parameter_type)\n"
|
|
"--\n"
|
|
"\n"
|
|
"Internal function, set CParameter/DParameter types for validity check.\n"
|
|
"\n"
|
|
" c_parameter_type\n"
|
|
" CParameter IntEnum type object\n"
|
|
" d_parameter_type\n"
|
|
" DParameter IntEnum type object");
|
|
|
|
#define _ZSTD__SET_PARAMETER_TYPES_METHODDEF \
|
|
{"_set_parameter_types", _PyCFunction_CAST(_zstd__set_parameter_types), METH_FASTCALL|METH_KEYWORDS, _zstd__set_parameter_types__doc__},
|
|
|
|
static PyObject *
|
|
_zstd__set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type,
|
|
PyObject *d_parameter_type);
|
|
|
|
static PyObject *
|
|
_zstd__set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
|
|
{
|
|
PyObject *return_value = NULL;
|
|
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
|
|
|
|
#define NUM_KEYWORDS 2
|
|
static struct {
|
|
PyGC_Head _this_is_not_used;
|
|
PyObject_VAR_HEAD
|
|
Py_hash_t ob_hash;
|
|
PyObject *ob_item[NUM_KEYWORDS];
|
|
} _kwtuple = {
|
|
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
|
|
.ob_hash = -1,
|
|
.ob_item = { &_Py_ID(c_parameter_type), &_Py_ID(d_parameter_type), },
|
|
};
|
|
#undef NUM_KEYWORDS
|
|
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
|
|
|
|
#else // !Py_BUILD_CORE
|
|
# define KWTUPLE NULL
|
|
#endif // !Py_BUILD_CORE
|
|
|
|
static const char * const _keywords[] = {"c_parameter_type", "d_parameter_type", NULL};
|
|
static _PyArg_Parser _parser = {
|
|
.keywords = _keywords,
|
|
.fname = "_set_parameter_types",
|
|
.kwtuple = KWTUPLE,
|
|
};
|
|
#undef KWTUPLE
|
|
PyObject *argsbuf[2];
|
|
PyObject *c_parameter_type;
|
|
PyObject *d_parameter_type;
|
|
|
|
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
|
|
/*minpos*/ 2, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
|
|
if (!args) {
|
|
goto exit;
|
|
}
|
|
if (!PyObject_TypeCheck(args[0], &PyType_Type)) {
|
|
_PyArg_BadArgument("_set_parameter_types", "argument 'c_parameter_type'", (&PyType_Type)->tp_name, args[0]);
|
|
goto exit;
|
|
}
|
|
c_parameter_type = args[0];
|
|
if (!PyObject_TypeCheck(args[1], &PyType_Type)) {
|
|
_PyArg_BadArgument("_set_parameter_types", "argument 'd_parameter_type'", (&PyType_Type)->tp_name, args[1]);
|
|
goto exit;
|
|
}
|
|
d_parameter_type = args[1];
|
|
return_value = _zstd__set_parameter_types_impl(module, c_parameter_type, d_parameter_type);
|
|
|
|
exit:
|
|
return return_value;
|
|
}
|
|
/*[clinic end generated code: output=077c8ea2b11fb188 input=a9049054013a1b77]*/
|