gh-132983: Add the compression.zstd pacakge and tests (#133365)

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
Co-authored-by: Tomas R. <tomas.roun8@gmail.com>
Co-authored-by: Rogdham <contact@rogdham.net>
This commit is contained in:
Emma Smith 2025-05-05 17:38:08 -07:00 committed by GitHub
parent 793402e217
commit c273f59fb3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 3358 additions and 100 deletions

View file

@ -74,33 +74,33 @@ typedef struct {
static const ParameterInfo cp_list[] =
{
{ZSTD_c_compressionLevel, "compressionLevel"},
{ZSTD_c_windowLog, "windowLog"},
{ZSTD_c_hashLog, "hashLog"},
{ZSTD_c_chainLog, "chainLog"},
{ZSTD_c_searchLog, "searchLog"},
{ZSTD_c_minMatch, "minMatch"},
{ZSTD_c_targetLength, "targetLength"},
{ZSTD_c_compressionLevel, "compression_level"},
{ZSTD_c_windowLog, "window_log"},
{ZSTD_c_hashLog, "hash_log"},
{ZSTD_c_chainLog, "chain_log"},
{ZSTD_c_searchLog, "search_log"},
{ZSTD_c_minMatch, "min_match"},
{ZSTD_c_targetLength, "target_length"},
{ZSTD_c_strategy, "strategy"},
{ZSTD_c_enableLongDistanceMatching, "enableLongDistanceMatching"},
{ZSTD_c_ldmHashLog, "ldmHashLog"},
{ZSTD_c_ldmMinMatch, "ldmMinMatch"},
{ZSTD_c_ldmBucketSizeLog, "ldmBucketSizeLog"},
{ZSTD_c_ldmHashRateLog, "ldmHashRateLog"},
{ZSTD_c_enableLongDistanceMatching, "enable_long_distance_matching"},
{ZSTD_c_ldmHashLog, "ldm_hash_log"},
{ZSTD_c_ldmMinMatch, "ldm_min_match"},
{ZSTD_c_ldmBucketSizeLog, "ldm_bucket_size_log"},
{ZSTD_c_ldmHashRateLog, "ldm_hash_rate_log"},
{ZSTD_c_contentSizeFlag, "contentSizeFlag"},
{ZSTD_c_checksumFlag, "checksumFlag"},
{ZSTD_c_dictIDFlag, "dictIDFlag"},
{ZSTD_c_contentSizeFlag, "content_size_flag"},
{ZSTD_c_checksumFlag, "checksum_flag"},
{ZSTD_c_dictIDFlag, "dict_id_flag"},
{ZSTD_c_nbWorkers, "nbWorkers"},
{ZSTD_c_jobSize, "jobSize"},
{ZSTD_c_overlapLog, "overlapLog"}
{ZSTD_c_nbWorkers, "nb_workers"},
{ZSTD_c_jobSize, "job_size"},
{ZSTD_c_overlapLog, "overlap_log"}
};
static const ParameterInfo dp_list[] =
{
{ZSTD_d_windowLogMax, "windowLogMax"}
{ZSTD_d_windowLogMax, "window_log_max"}
};
void
@ -180,8 +180,8 @@ _zstd._train_dict
samples_bytes: PyBytesObject
Concatenation of samples.
samples_size_list: object(subclass_of='&PyList_Type')
List of samples' sizes.
samples_sizes: object(subclass_of='&PyTuple_Type')
Tuple of samples' sizes.
dict_size: Py_ssize_t
The size of the dictionary.
/
@ -191,8 +191,8 @@ Internal function, train a zstd dictionary on sample data.
static PyObject *
_zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
PyObject *samples_size_list, Py_ssize_t dict_size)
/*[clinic end generated code: output=ee53c34c8f77886b input=b21d092c695a3a81]*/
PyObject *samples_sizes, Py_ssize_t dict_size)
/*[clinic end generated code: output=b5b4f36347c0addd input=2dce5b57d63923e2]*/
{
// TODO(emmatyping): The preamble and suffix to this function and _finalize_dict
// are pretty similar. We should see if we can refactor them to share that code.
@ -209,7 +209,7 @@ _zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
return NULL;
}
chunks_number = Py_SIZE(samples_size_list);
chunks_number = Py_SIZE(samples_sizes);
if ((size_t) chunks_number > UINT32_MAX) {
PyErr_Format(PyExc_ValueError,
"The number of samples should be <= %u.", UINT32_MAX);
@ -225,12 +225,11 @@ _zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
sizes_sum = 0;
for (i = 0; i < chunks_number; i++) {
PyObject *size = PyList_GetItemRef(samples_size_list, i);
PyObject *size = PyTuple_GetItem(samples_sizes, i);
chunk_sizes[i] = PyLong_AsSize_t(size);
Py_DECREF(size);
if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) {
PyErr_Format(PyExc_ValueError,
"Items in samples_size_list should be an int "
"Items in samples_sizes should be an int "
"object, with a value between 0 and %u.", SIZE_MAX);
goto error;
}
@ -239,7 +238,7 @@ _zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
if (sizes_sum != Py_SIZE(samples_bytes)) {
PyErr_SetString(PyExc_ValueError,
"The samples size list doesn't match the concatenation's size.");
"The samples size tuple doesn't match the concatenation's size.");
goto error;
}
@ -287,8 +286,8 @@ _zstd._finalize_dict
Custom dictionary content.
samples_bytes: PyBytesObject
Concatenation of samples.
samples_size_list: object(subclass_of='&PyList_Type')
List of samples' sizes.
samples_sizes: object(subclass_of='&PyTuple_Type')
Tuple of samples' sizes.
dict_size: Py_ssize_t
The size of the dictionary.
compression_level: int
@ -301,9 +300,9 @@ Internal function, finalize a zstd dictionary.
static PyObject *
_zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
PyBytesObject *samples_bytes,
PyObject *samples_size_list, Py_ssize_t dict_size,
PyObject *samples_sizes, Py_ssize_t dict_size,
int compression_level)
/*[clinic end generated code: output=9c2a7d8c845cee93 input=08531a803d87c56f]*/
/*[clinic end generated code: output=5dc5b520fddba37f input=8afd42a249078460]*/
{
Py_ssize_t chunks_number;
size_t *chunk_sizes = NULL;
@ -319,7 +318,7 @@ _zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
return NULL;
}
chunks_number = Py_SIZE(samples_size_list);
chunks_number = Py_SIZE(samples_sizes);
if ((size_t) chunks_number > UINT32_MAX) {
PyErr_Format(PyExc_ValueError,
"The number of samples should be <= %u.", UINT32_MAX);
@ -335,11 +334,11 @@ _zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
sizes_sum = 0;
for (i = 0; i < chunks_number; i++) {
PyObject *size = PyList_GET_ITEM(samples_size_list, i);
PyObject *size = PyTuple_GetItem(samples_sizes, i);
chunk_sizes[i] = PyLong_AsSize_t(size);
if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) {
PyErr_Format(PyExc_ValueError,
"Items in samples_size_list should be an int "
"Items in samples_sizes should be an int "
"object, with a value between 0 and %u.", SIZE_MAX);
goto error;
}
@ -348,7 +347,7 @@ _zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
if (sizes_sum != Py_SIZE(samples_bytes)) {
PyErr_SetString(PyExc_ValueError,
"The samples size list doesn't match the concatenation's size.");
"The samples size tuple doesn't match the concatenation's size.");
goto error;
}
@ -402,18 +401,18 @@ success:
/*[clinic input]
_zstd._get_param_bounds
is_compress: bool
True for CParameter, False for DParameter.
parameter: int
The parameter to get bounds.
is_compress: bool
True for CompressionParameter, False for DecompressionParameter.
Internal function, get CParameter/DParameter bounds.
Internal function, get CompressionParameter/DecompressionParameter bounds.
[clinic start generated code]*/
static PyObject *
_zstd__get_param_bounds_impl(PyObject *module, int is_compress,
int parameter)
/*[clinic end generated code: output=b751dc710f89ef55 input=fb21ff96aff65df1]*/
_zstd__get_param_bounds_impl(PyObject *module, int parameter,
int is_compress)
/*[clinic end generated code: output=9892cd822f937e79 input=884cd1a01125267d]*/
{
ZSTD_bounds bound;
if (is_compress) {
@ -515,30 +514,30 @@ _zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer)
_zstd._set_parameter_types
c_parameter_type: object(subclass_of='&PyType_Type')
CParameter IntEnum type object
CompressionParameter IntEnum type object
d_parameter_type: object(subclass_of='&PyType_Type')
DParameter IntEnum type object
DecompressionParameter IntEnum type object
Internal function, set CParameter/DParameter types for validity check.
Internal function, set CompressionParameter/DecompressionParameter types for validity check.
[clinic start generated code]*/
static PyObject *
_zstd__set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type,
PyObject *d_parameter_type)
/*[clinic end generated code: output=a13d4890ccbd2873 input=3e7d0d37c3a1045a]*/
/*[clinic end generated code: output=a13d4890ccbd2873 input=4535545d903853d3]*/
{
_zstd_state* const mod_state = get_zstd_state(module);
if (!PyType_Check(c_parameter_type) || !PyType_Check(d_parameter_type)) {
PyErr_SetString(PyExc_ValueError,
"The two arguments should be CParameter and "
"DParameter types.");
"The two arguments should be CompressionParameter and "
"DecompressionParameter types.");
return NULL;
}
Py_XDECREF(mod_state->CParameter_type);
Py_INCREF(c_parameter_type);
mod_state->CParameter_type = (PyTypeObject*) c_parameter_type;
mod_state->CParameter_type = (PyTypeObject*)c_parameter_type;
Py_XDECREF(mod_state->DParameter_type);
Py_INCREF(d_parameter_type);

View file

@ -10,15 +10,15 @@ preserve
#include "pycore_modsupport.h" // _PyArg_CheckPositional()
PyDoc_STRVAR(_zstd__train_dict__doc__,
"_train_dict($module, samples_bytes, samples_size_list, dict_size, /)\n"
"_train_dict($module, samples_bytes, samples_sizes, dict_size, /)\n"
"--\n"
"\n"
"Internal function, train a zstd dictionary on sample data.\n"
"\n"
" samples_bytes\n"
" Concatenation of samples.\n"
" samples_size_list\n"
" List of samples\' sizes.\n"
" samples_sizes\n"
" Tuple of samples\' sizes.\n"
" dict_size\n"
" The size of the dictionary.");
@ -27,14 +27,14 @@ PyDoc_STRVAR(_zstd__train_dict__doc__,
static PyObject *
_zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
PyObject *samples_size_list, Py_ssize_t dict_size);
PyObject *samples_sizes, Py_ssize_t dict_size);
static PyObject *
_zstd__train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
PyBytesObject *samples_bytes;
PyObject *samples_size_list;
PyObject *samples_sizes;
Py_ssize_t dict_size;
if (!_PyArg_CheckPositional("_train_dict", nargs, 3, 3)) {
@ -45,11 +45,11 @@ _zstd__train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
goto exit;
}
samples_bytes = (PyBytesObject *)args[0];
if (!PyList_Check(args[1])) {
_PyArg_BadArgument("_train_dict", "argument 2", "list", args[1]);
if (!PyTuple_Check(args[1])) {
_PyArg_BadArgument("_train_dict", "argument 2", "tuple", args[1]);
goto exit;
}
samples_size_list = args[1];
samples_sizes = args[1];
{
Py_ssize_t ival = -1;
PyObject *iobj = _PyNumber_Index(args[2]);
@ -62,7 +62,7 @@ _zstd__train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
}
dict_size = ival;
}
return_value = _zstd__train_dict_impl(module, samples_bytes, samples_size_list, dict_size);
return_value = _zstd__train_dict_impl(module, samples_bytes, samples_sizes, dict_size);
exit:
return return_value;
@ -70,7 +70,7 @@ exit:
PyDoc_STRVAR(_zstd__finalize_dict__doc__,
"_finalize_dict($module, custom_dict_bytes, samples_bytes,\n"
" samples_size_list, dict_size, compression_level, /)\n"
" samples_sizes, dict_size, compression_level, /)\n"
"--\n"
"\n"
"Internal function, finalize a zstd dictionary.\n"
@ -79,8 +79,8 @@ PyDoc_STRVAR(_zstd__finalize_dict__doc__,
" Custom dictionary content.\n"
" samples_bytes\n"
" Concatenation of samples.\n"
" samples_size_list\n"
" List of samples\' sizes.\n"
" samples_sizes\n"
" Tuple of samples\' sizes.\n"
" dict_size\n"
" The size of the dictionary.\n"
" compression_level\n"
@ -92,7 +92,7 @@ PyDoc_STRVAR(_zstd__finalize_dict__doc__,
static PyObject *
_zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
PyBytesObject *samples_bytes,
PyObject *samples_size_list, Py_ssize_t dict_size,
PyObject *samples_sizes, Py_ssize_t dict_size,
int compression_level);
static PyObject *
@ -101,7 +101,7 @@ _zstd__finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
PyObject *return_value = NULL;
PyBytesObject *custom_dict_bytes;
PyBytesObject *samples_bytes;
PyObject *samples_size_list;
PyObject *samples_sizes;
Py_ssize_t dict_size;
int compression_level;
@ -118,11 +118,11 @@ _zstd__finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
goto exit;
}
samples_bytes = (PyBytesObject *)args[1];
if (!PyList_Check(args[2])) {
_PyArg_BadArgument("_finalize_dict", "argument 3", "list", args[2]);
if (!PyTuple_Check(args[2])) {
_PyArg_BadArgument("_finalize_dict", "argument 3", "tuple", args[2]);
goto exit;
}
samples_size_list = args[2];
samples_sizes = args[2];
{
Py_ssize_t ival = -1;
PyObject *iobj = _PyNumber_Index(args[3]);
@ -139,29 +139,29 @@ _zstd__finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
if (compression_level == -1 && PyErr_Occurred()) {
goto exit;
}
return_value = _zstd__finalize_dict_impl(module, custom_dict_bytes, samples_bytes, samples_size_list, dict_size, compression_level);
return_value = _zstd__finalize_dict_impl(module, custom_dict_bytes, samples_bytes, samples_sizes, dict_size, compression_level);
exit:
return return_value;
}
PyDoc_STRVAR(_zstd__get_param_bounds__doc__,
"_get_param_bounds($module, /, is_compress, parameter)\n"
"_get_param_bounds($module, /, parameter, is_compress)\n"
"--\n"
"\n"
"Internal function, get CParameter/DParameter bounds.\n"
"Internal function, get CompressionParameter/DecompressionParameter bounds.\n"
"\n"
" is_compress\n"
" True for CParameter, False for DParameter.\n"
" parameter\n"
" The parameter to get bounds.");
" The parameter to get bounds.\n"
" is_compress\n"
" True for CompressionParameter, False for DecompressionParameter.");
#define _ZSTD__GET_PARAM_BOUNDS_METHODDEF \
{"_get_param_bounds", _PyCFunction_CAST(_zstd__get_param_bounds), METH_FASTCALL|METH_KEYWORDS, _zstd__get_param_bounds__doc__},
static PyObject *
_zstd__get_param_bounds_impl(PyObject *module, int is_compress,
int parameter);
_zstd__get_param_bounds_impl(PyObject *module, int parameter,
int is_compress);
static PyObject *
_zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
@ -178,7 +178,7 @@ _zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t narg
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_hash = -1,
.ob_item = { &_Py_ID(is_compress), &_Py_ID(parameter), },
.ob_item = { &_Py_ID(parameter), &_Py_ID(is_compress), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
@ -187,7 +187,7 @@ _zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t narg
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"is_compress", "parameter", NULL};
static const char * const _keywords[] = {"parameter", "is_compress", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "_get_param_bounds",
@ -195,23 +195,23 @@ _zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t narg
};
#undef KWTUPLE
PyObject *argsbuf[2];
int is_compress;
int parameter;
int is_compress;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
/*minpos*/ 2, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
if (!args) {
goto exit;
}
is_compress = PyObject_IsTrue(args[0]);
if (is_compress < 0) {
goto exit;
}
parameter = PyLong_AsInt(args[1]);
parameter = PyLong_AsInt(args[0]);
if (parameter == -1 && PyErr_Occurred()) {
goto exit;
}
return_value = _zstd__get_param_bounds_impl(module, is_compress, parameter);
is_compress = PyObject_IsTrue(args[1]);
if (is_compress < 0) {
goto exit;
}
return_value = _zstd__get_param_bounds_impl(module, parameter, is_compress);
exit:
return return_value;
@ -360,12 +360,12 @@ PyDoc_STRVAR(_zstd__set_parameter_types__doc__,
"_set_parameter_types($module, /, c_parameter_type, d_parameter_type)\n"
"--\n"
"\n"
"Internal function, set CParameter/DParameter types for validity check.\n"
"Internal function, set CompressionParameter/DecompressionParameter types for validity check.\n"
"\n"
" c_parameter_type\n"
" CParameter IntEnum type object\n"
" CompressionParameter IntEnum type object\n"
" d_parameter_type\n"
" DParameter IntEnum type object");
" DecompressionParameter IntEnum type object");
#define _ZSTD__SET_PARAMETER_TYPES_METHODDEF \
{"_set_parameter_types", _PyCFunction_CAST(_zstd__set_parameter_types), METH_FASTCALL|METH_KEYWORDS, _zstd__set_parameter_types__doc__},
@ -429,4 +429,4 @@ _zstd__set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t n
exit:
return return_value;
}
/*[clinic end generated code: output=077c8ea2b11fb188 input=a9049054013a1b77]*/
/*[clinic end generated code: output=189c462236a7096c input=a9049054013a1b77]*/

View file

@ -71,14 +71,14 @@ _PyZstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options,
if (Py_TYPE(key) == mod_state->DParameter_type) {
PyErr_SetString(PyExc_TypeError,
"Key of compression option dict should "
"NOT be DParameter.");
"NOT be DecompressionParameter.");
return -1;
}
int key_v = PyLong_AsInt(key);
if (key_v == -1 && PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"Key of options dict should be a CParameter attribute.");
"Key of options dict should be a CompressionParameter attribute.");
return -1;
}

View file

@ -84,7 +84,7 @@ _PyZstd_set_d_parameters(ZstdDecompressor *self, PyObject *options)
if (Py_TYPE(key) == mod_state->CParameter_type) {
PyErr_SetString(PyExc_TypeError,
"Key of decompression options dict should "
"NOT be CParameter.");
"NOT be CompressionParameter.");
return -1;
}
@ -92,7 +92,7 @@ _PyZstd_set_d_parameters(ZstdDecompressor *self, PyObject *options)
int key_v = PyLong_AsInt(key);
if (key_v == -1 && PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"Key of options dict should be a DParameter attribute.");
"Key of options dict should be a DecompressionParameter attribute.");
return -1;
}