gh-132983: Split `_zstd_set_c_parameters` (#133921)

This commit is contained in:
Adam Turner 2025-05-28 15:45:08 +01:00 committed by GitHub
parent 0d499c7e93
commit 11f7a939de
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 233 additions and 162 deletions

View file

@ -1,7 +1,6 @@
import io
from os import PathLike
from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdError,
ZSTD_DStreamOutSize)
from _zstd import ZstdCompressor, ZstdDecompressor, ZSTD_DStreamOutSize
from compression._common import _streams
__all__ = ('ZstdFile', 'open')

View file

@ -64,6 +64,10 @@ TRAINED_DICT = None
SUPPORT_MULTITHREADING = False
C_INT_MIN = -(2**31)
C_INT_MAX = (2**31) - 1
def setUpModule():
global SUPPORT_MULTITHREADING
SUPPORT_MULTITHREADING = CompressionParameter.nb_workers.bounds() != (0, 0)
@ -195,14 +199,21 @@ class CompressorTestCase(unittest.TestCase):
self.assertRaises(TypeError, ZstdCompressor, zstd_dict=b"abcd1234")
self.assertRaises(TypeError, ZstdCompressor, zstd_dict={1: 2, 3: 4})
with self.assertRaises(ValueError):
ZstdCompressor(2**31)
with self.assertRaises(ValueError):
ZstdCompressor(options={2**31: 100})
# valid range for compression level is [-(1<<17), 22]
msg = r'illegal compression level {}; the valid range is \[-?\d+, -?\d+\]'
with self.assertRaisesRegex(ValueError, msg.format(C_INT_MAX)):
ZstdCompressor(C_INT_MAX)
with self.assertRaisesRegex(ValueError, msg.format(C_INT_MIN)):
ZstdCompressor(C_INT_MIN)
msg = r'illegal compression level; the valid range is \[-?\d+, -?\d+\]'
with self.assertRaisesRegex(ValueError, msg):
ZstdCompressor(level=-(2**1000))
with self.assertRaisesRegex(ValueError, msg):
ZstdCompressor(level=2**1000)
with self.assertRaises(ZstdError):
with self.assertRaises(ValueError):
ZstdCompressor(options={CompressionParameter.window_log: 100})
with self.assertRaises(ZstdError):
with self.assertRaises(ValueError):
ZstdCompressor(options={3333: 100})
# Method bad arguments
@ -253,18 +264,32 @@ class CompressorTestCase(unittest.TestCase):
}
ZstdCompressor(options=d)
# larger than signed int, ValueError
d1 = d.copy()
d1[CompressionParameter.ldm_bucket_size_log] = 2**31
self.assertRaises(ValueError, ZstdCompressor, options=d1)
# larger than signed int
d1[CompressionParameter.ldm_bucket_size_log] = C_INT_MAX
with self.assertRaises(ValueError):
ZstdCompressor(options=d1)
# smaller than signed int
d1[CompressionParameter.ldm_bucket_size_log] = C_INT_MIN
with self.assertRaises(ValueError):
ZstdCompressor(options=d1)
# clamp compressionLevel
# out of bounds compression level
level_min, level_max = CompressionParameter.compression_level.bounds()
compress(b'', level_max+1)
compress(b'', level_min-1)
compress(b'', options={CompressionParameter.compression_level:level_max+1})
compress(b'', options={CompressionParameter.compression_level:level_min-1})
with self.assertRaises(ValueError):
compress(b'', level_max+1)
with self.assertRaises(ValueError):
compress(b'', level_min-1)
with self.assertRaises(ValueError):
compress(b'', 2**1000)
with self.assertRaises(ValueError):
compress(b'', -(2**1000))
with self.assertRaises(ValueError):
compress(b'', options={
CompressionParameter.compression_level: level_max+1})
with self.assertRaises(ValueError):
compress(b'', options={
CompressionParameter.compression_level: level_min-1})
# zstd lib doesn't support MT compression
if not SUPPORT_MULTITHREADING:
@ -277,19 +302,19 @@ class CompressorTestCase(unittest.TestCase):
# out of bounds error msg
option = {CompressionParameter.window_log:100}
with self.assertRaisesRegex(ZstdError,
(r'Error when setting zstd compression parameter "window_log", '
r'it should \d+ <= value <= \d+, provided value is 100\. '
r'\((?:32|64)-bit build\)')):
with self.assertRaisesRegex(
ValueError,
"compression parameter 'window_log' received an illegal value 100; "
r'the valid range is \[-?\d+, -?\d+\]',
):
compress(b'', options=option)
def test_unknown_compression_parameter(self):
KEY = 100001234
option = {CompressionParameter.compression_level: 10,
KEY: 200000000}
pattern = (r'Invalid zstd compression parameter.*?'
fr'"unknown parameter \(key {KEY}\)"')
with self.assertRaisesRegex(ZstdError, pattern):
pattern = rf"invalid compression parameter 'unknown parameter \(key {KEY}\)'"
with self.assertRaisesRegex(ValueError, pattern):
ZstdCompressor(options=option)
@unittest.skipIf(not SUPPORT_MULTITHREADING,
@ -384,12 +409,22 @@ class DecompressorTestCase(unittest.TestCase):
self.assertRaises(TypeError, ZstdDecompressor, options=b'abc')
with self.assertRaises(ValueError):
ZstdDecompressor(options={2**31 : 100})
ZstdDecompressor(options={C_INT_MAX: 100})
with self.assertRaises(ValueError):
ZstdDecompressor(options={C_INT_MIN: 100})
with self.assertRaises(ValueError):
ZstdDecompressor(options={0: C_INT_MAX})
with self.assertRaises(OverflowError):
ZstdDecompressor(options={2**1000: 100})
with self.assertRaises(OverflowError):
ZstdDecompressor(options={-(2**1000): 100})
with self.assertRaises(OverflowError):
ZstdDecompressor(options={0: -(2**1000)})
with self.assertRaises(ZstdError):
ZstdDecompressor(options={DecompressionParameter.window_log_max:100})
with self.assertRaises(ZstdError):
ZstdDecompressor(options={3333 : 100})
with self.assertRaises(ValueError):
ZstdDecompressor(options={DecompressionParameter.window_log_max: 100})
with self.assertRaises(ValueError):
ZstdDecompressor(options={3333: 100})
empty = compress(b'')
lzd = ZstdDecompressor()
@ -402,26 +437,52 @@ class DecompressorTestCase(unittest.TestCase):
d = {DecompressionParameter.window_log_max : 15}
ZstdDecompressor(options=d)
# larger than signed int, ValueError
d1 = d.copy()
d1[DecompressionParameter.window_log_max] = 2**31
self.assertRaises(ValueError, ZstdDecompressor, None, d1)
# larger than signed int
d1[DecompressionParameter.window_log_max] = 2**1000
with self.assertRaises(OverflowError):
ZstdDecompressor(None, d1)
# smaller than signed int
d1[DecompressionParameter.window_log_max] = -(2**1000)
with self.assertRaises(OverflowError):
ZstdDecompressor(None, d1)
d1[DecompressionParameter.window_log_max] = C_INT_MAX
with self.assertRaises(ValueError):
ZstdDecompressor(None, d1)
d1[DecompressionParameter.window_log_max] = C_INT_MIN
with self.assertRaises(ValueError):
ZstdDecompressor(None, d1)
# out of bounds error msg
options = {DecompressionParameter.window_log_max:100}
with self.assertRaisesRegex(ZstdError,
(r'Error when setting zstd decompression parameter "window_log_max", '
r'it should \d+ <= value <= \d+, provided value is 100\. '
r'\((?:32|64)-bit build\)')):
with self.assertRaisesRegex(
ValueError,
"decompression parameter 'window_log_max' received an illegal value 100; "
r'the valid range is \[-?\d+, -?\d+\]',
):
decompress(b'', options=options)
# out of bounds deecompression parameter
options[DecompressionParameter.window_log_max] = C_INT_MAX
with self.assertRaises(ValueError):
decompress(b'', options=options)
options[DecompressionParameter.window_log_max] = C_INT_MIN
with self.assertRaises(ValueError):
decompress(b'', options=options)
options[DecompressionParameter.window_log_max] = 2**1000
with self.assertRaises(OverflowError):
decompress(b'', options=options)
options[DecompressionParameter.window_log_max] = -(2**1000)
with self.assertRaises(OverflowError):
decompress(b'', options=options)
def test_unknown_decompression_parameter(self):
KEY = 100001234
options = {DecompressionParameter.window_log_max: DecompressionParameter.window_log_max.bounds()[1],
KEY: 200000000}
pattern = (r'Invalid zstd decompression parameter.*?'
fr'"unknown parameter \(key {KEY}\)"')
with self.assertRaisesRegex(ZstdError, pattern):
pattern = rf"invalid decompression parameter 'unknown parameter \(key {KEY}\)'"
with self.assertRaisesRegex(ValueError, pattern):
ZstdDecompressor(options=options)
def test_decompress_epilogue_flags(self):
@ -1424,11 +1485,11 @@ class FileTestCase(unittest.TestCase):
ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), "rw")
with self.assertRaisesRegex(TypeError,
r"NOT be a CompressionParameter"):
r"not be a CompressionParameter"):
ZstdFile(io.BytesIO(), 'rb',
options={CompressionParameter.compression_level:5})
with self.assertRaisesRegex(TypeError,
r"NOT be a DecompressionParameter"):
r"not be a DecompressionParameter"):
ZstdFile(io.BytesIO(), 'wb',
options={DecompressionParameter.window_log_max:21})
@ -1439,19 +1500,19 @@ class FileTestCase(unittest.TestCase):
with self.assertRaises(TypeError):
ZstdFile(io.BytesIO(), "w", level='asd')
# CHECK_UNKNOWN and anything above CHECK_ID_MAX should be invalid.
with self.assertRaises(ZstdError):
with self.assertRaises(ValueError):
ZstdFile(io.BytesIO(), "w", options={999:9999})
with self.assertRaises(ZstdError):
with self.assertRaises(ValueError):
ZstdFile(io.BytesIO(), "w", options={CompressionParameter.window_log:99})
with self.assertRaises(TypeError):
ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), "r", options=33)
with self.assertRaises(ValueError):
with self.assertRaises(OverflowError):
ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB),
options={DecompressionParameter.window_log_max:2**31})
with self.assertRaises(ZstdError):
with self.assertRaises(ValueError):
ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB),
options={444:333})
@ -1467,7 +1528,7 @@ class FileTestCase(unittest.TestCase):
tmp_f.write(DAT_130K_C)
filename = tmp_f.name
with self.assertRaises(ValueError):
with self.assertRaises(TypeError):
ZstdFile(filename, options={'a':'b'})
# for PyPy

View file

@ -103,16 +103,13 @@ static const ParameterInfo dp_list[] = {
};
void
set_parameter_error(const _zstd_state* const state, int is_compress,
int key_v, int value_v)
set_parameter_error(int is_compress, int key_v, int value_v)
{
ParameterInfo const *list;
int list_size;
char const *name;
char *type;
ZSTD_bounds bounds;
int i;
char pos_msg[128];
char pos_msg[64];
if (is_compress) {
list = cp_list;
@ -126,8 +123,8 @@ set_parameter_error(const _zstd_state* const state, int is_compress,
}
/* Find parameter's name */
name = NULL;
for (i = 0; i < list_size; i++) {
char const *name = NULL;
for (int i = 0; i < list_size; i++) {
if (key_v == (list+i)->parameter) {
name = (list+i)->parameter_name;
break;
@ -149,20 +146,16 @@ set_parameter_error(const _zstd_state* const state, int is_compress,
bounds = ZSTD_dParam_getBounds(key_v);
}
if (ZSTD_isError(bounds.error)) {
PyErr_Format(state->ZstdError,
"Invalid zstd %s parameter \"%s\".",
PyErr_Format(PyExc_ValueError, "invalid %s parameter '%s'",
type, name);
return;
}
/* Error message */
PyErr_Format(state->ZstdError,
"Error when setting zstd %s parameter \"%s\", it "
"should %d <= value <= %d, provided value is %d. "
"(%d-bit build)",
type, name,
bounds.lowerBound, bounds.upperBound, value_v,
8*(int)sizeof(Py_ssize_t));
PyErr_Format(PyExc_ValueError,
"%s parameter '%s' received an illegal value %d; "
"the valid range is [%d, %d]",
type, name, value_v, bounds.lowerBound, bounds.upperBound);
}
static inline _zstd_state*

View file

@ -49,7 +49,6 @@ set_zstd_error(const _zstd_state* const state,
const error_type type, size_t zstd_ret);
extern void
set_parameter_error(const _zstd_state* const state, int is_compress,
int key_v, int value_v);
set_parameter_error(int is_compress, int key_v, int value_v);
#endif // !ZSTD_MODULE_H

View file

@ -49,98 +49,106 @@ typedef struct {
#include "clinic/compressor.c.h"
static int
_zstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options,
const char *arg_name, const char* arg_type)
_zstd_set_c_level(ZstdCompressor *self, int level)
{
/* Set integer compression level */
int min_level = ZSTD_minCLevel();
int max_level = ZSTD_maxCLevel();
if (level < min_level || level > max_level) {
PyErr_Format(PyExc_ValueError,
"illegal compression level %d; the valid range is [%d, %d]",
level, min_level, max_level);
return -1;
}
/* Save for generating ZSTD_CDICT */
self->compression_level = level;
/* Set compressionLevel to compression context */
size_t zstd_ret = ZSTD_CCtx_setParameter(
self->cctx, ZSTD_c_compressionLevel, level);
/* Check error */
if (ZSTD_isError(zstd_ret)) {
_zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
if (mod_state == NULL) {
return -1;
}
set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret);
return -1;
}
return 0;
}
static int
_zstd_set_c_parameters(ZstdCompressor *self, PyObject *options)
{
size_t zstd_ret;
_zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
if (mod_state == NULL) {
return -1;
}
/* Integer compression level */
if (PyLong_Check(level_or_options)) {
int level = PyLong_AsInt(level_or_options);
if (level == -1 && PyErr_Occurred()) {
PyErr_Format(PyExc_ValueError,
"Compression level should be an int value between "
"%d and %d.", ZSTD_minCLevel(), ZSTD_maxCLevel());
if (!PyDict_Check(options)) {
PyErr_Format(PyExc_TypeError,
"ZstdCompressor() argument 'options' must be dict, not %T",
options);
return -1;
}
Py_ssize_t pos = 0;
PyObject *key, *value;
while (PyDict_Next(options, &pos, &key, &value)) {
/* Check key type */
if (Py_TYPE(key) == mod_state->DParameter_type) {
PyErr_SetString(PyExc_TypeError,
"compression options dictionary key must not be a "
"DecompressionParameter attribute");
return -1;
}
/* Save for generating ZSTD_CDICT */
self->compression_level = level;
Py_INCREF(key);
Py_INCREF(value);
int key_v = PyLong_AsInt(key);
Py_DECREF(key);
if (key_v == -1 && PyErr_Occurred()) {
Py_DECREF(value);
return -1;
}
/* Set compressionLevel to compression context */
zstd_ret = ZSTD_CCtx_setParameter(self->cctx,
ZSTD_c_compressionLevel,
level);
int value_v = PyLong_AsInt(value);
Py_DECREF(value);
if (value_v == -1 && PyErr_Occurred()) {
return -1;
}
if (key_v == ZSTD_c_compressionLevel) {
if (_zstd_set_c_level(self, value_v) < 0) {
return -1;
}
continue;
}
if (key_v == ZSTD_c_nbWorkers) {
/* From the zstd library docs:
1. When nbWorkers >= 1, triggers asynchronous mode when
used with ZSTD_compressStream2().
2, Default value is `0`, aka "single-threaded mode" : no
worker is spawned, compression is performed inside
caller's thread, all invocations are blocking. */
if (value_v != 0) {
self->use_multithread = 1;
}
}
/* Set parameter to compression context */
size_t zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v);
/* Check error */
if (ZSTD_isError(zstd_ret)) {
set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret);
set_parameter_error(1, key_v, value_v);
return -1;
}
return 0;
}
/* Options dict */
if (PyDict_Check(level_or_options)) {
PyObject *key, *value;
Py_ssize_t pos = 0;
while (PyDict_Next(level_or_options, &pos, &key, &value)) {
/* Check key type */
if (Py_TYPE(key) == mod_state->DParameter_type) {
PyErr_SetString(PyExc_TypeError,
"Key of compression options dict should "
"NOT be a DecompressionParameter attribute.");
return -1;
}
int key_v = PyLong_AsInt(key);
if (key_v == -1 && PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"Key of options dict should be either a "
"CompressionParameter attribute or an int.");
return -1;
}
int value_v = PyLong_AsInt(value);
if (value_v == -1 && PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"Value of options dict should be an int.");
return -1;
}
if (key_v == ZSTD_c_compressionLevel) {
/* Save for generating ZSTD_CDICT */
self->compression_level = value_v;
}
else if (key_v == ZSTD_c_nbWorkers) {
/* From the zstd library docs:
1. When nbWorkers >= 1, triggers asynchronous mode when
used with ZSTD_compressStream2().
2, Default value is `0`, aka "single-threaded mode" : no
worker is spawned, compression is performed inside
caller's thread, all invocations are blocking. */
if (value_v != 0) {
self->use_multithread = 1;
}
}
/* Set parameter to compression context */
zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v);
if (ZSTD_isError(zstd_ret)) {
set_parameter_error(mod_state, 1, key_v, value_v);
return -1;
}
}
return 0;
}
PyErr_Format(PyExc_TypeError,
"Invalid type for %s. Expected %s", arg_name, arg_type);
return -1;
return 0;
}
static void
@ -354,20 +362,35 @@ _zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level,
self->last_mode = ZSTD_e_end;
if (level != Py_None && options != Py_None) {
PyErr_SetString(PyExc_RuntimeError,
PyErr_SetString(PyExc_TypeError,
"Only one of level or options should be used.");
goto error;
}
/* Set compressLevel/options to compression context */
/* Set compression level */
if (level != Py_None) {
if (_zstd_set_c_parameters(self, level, "level", "int") < 0) {
if (!PyLong_Check(level)) {
PyErr_SetString(PyExc_TypeError,
"invalid type for level, expected int");
goto error;
}
int level_v = PyLong_AsInt(level);
if (level_v == -1 && PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
PyErr_Format(PyExc_ValueError,
"illegal compression level; the valid range is [%d, %d]",
ZSTD_minCLevel(), ZSTD_maxCLevel());
}
goto error;
}
if (_zstd_set_c_level(self, level_v) < 0) {
goto error;
}
}
/* Set options dictionary */
if (options != Py_None) {
if (_zstd_set_c_parameters(self, options, "options", "dict") < 0) {
if (_zstd_set_c_parameters(self, options) < 0) {
goto error;
}
}

View file

@ -86,56 +86,52 @@ _get_DDict(ZstdDict *self)
return self->d_dict;
}
/* Set decompression parameters to decompression context */
static int
_zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options)
{
size_t zstd_ret;
PyObject *key, *value;
Py_ssize_t pos;
_zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self));
if (mod_state == NULL) {
return -1;
}
if (!PyDict_Check(options)) {
PyErr_SetString(PyExc_TypeError,
"options argument should be dict object.");
PyErr_Format(PyExc_TypeError,
"ZstdDecompressor() argument 'options' must be dict, not %T",
options);
return -1;
}
pos = 0;
Py_ssize_t pos = 0;
PyObject *key, *value;
while (PyDict_Next(options, &pos, &key, &value)) {
/* Check key type */
if (Py_TYPE(key) == mod_state->CParameter_type) {
PyErr_SetString(PyExc_TypeError,
"Key of decompression options dict should "
"NOT be a CompressionParameter attribute.");
"compression options dictionary key must not be a "
"CompressionParameter attribute");
return -1;
}
/* Both key & value should be 32-bit signed int */
Py_INCREF(key);
Py_INCREF(value);
int key_v = PyLong_AsInt(key);
Py_DECREF(key);
if (key_v == -1 && PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"Key of options dict should be either a "
"DecompressionParameter attribute or an int.");
return -1;
}
int value_v = PyLong_AsInt(value);
Py_DECREF(value);
if (value_v == -1 && PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"Value of options dict should be an int.");
return -1;
}
/* Set parameter to compression context */
zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v);
size_t zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v);
/* Check error */
if (ZSTD_isError(zstd_ret)) {
set_parameter_error(mod_state, 0, key_v, value_v);
set_parameter_error(0, key_v, value_v);
return -1;
}
}
@ -577,7 +573,7 @@ _zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict,
self->dict = zstd_dict;
}
/* Set option to decompression context */
/* Set options dictionary */
if (options != Py_None) {
if (_zstd_set_d_parameters(self, options) < 0) {
goto error;