cpython/Modules/_zstd/_zstdmodule.h
Emma Smith 3b4333583f
gh-132983: Introduce _zstd bindings module (GH-133027)
* Add _zstd module for https://peps.python.org/pep-0784/

This commit introduces the `_zstd` module, with bindings to libzstd from
the pyzstd project. It also includes the unix build system configuration.
Windows build system support will be integrated independently as it
depends on integration with cpython-source-deps.

* Add _zstd to modules

* Fix path for compression.zstd module

* Ignore _zstd module like _io

* Expand module state macros to improve code quality

Also removes module state references from the classes in the _zstd
module and instead uses PyType_GetModuleState()

* Remove backticks suggested in review

Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>

* Use critical sections to lock object state

This should avoid races and deadlocks.

* Remove compress/decompress and mark module as not reliant on the GIL

The `compress`/`decompress` functions will be moved to Python code for simplicity.
C implementations can always be re-added in the future.

Also, mark _zstd as not requiring the GIL.

* Lift critical section to avoid clang warning

* Respond to comments by picnixz

* Call out pyzstd explicitly in license description

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>

* Use a much more robust implementation...

... for `get_zstd_state_from_type`

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>

* Use PyList_GetItemRef for thread safety purposes

* Use a macro for the minimum supported version

* remove const from primivite types

* Use PyMem_New in another spot

* Simplify error handling in _get_frame_size

* Another simplification of error handling in get_frame_info

* Rename _module_state to mod_state

* Rewrite comment explaining the context of the code

* Add link to pyzstd

* Add TODO about refactoring dict training code

* Use PyModule_AddObjectRef over PyModule_AddObject

PyModule_AddObject is soft-deprecated, so we should use PyModule_AddObjectRef

* Check result of OutputBufferGrow

* Simplify return logic in `add_constant_to_type`

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>

* Ignore return value of _zstd_clear()

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>

* Remove redundant comments

* Remove __reduce__ from ZstdDict

We should instead document that to pickle a dictionary a user should use
the `.dict_content` attribute.

* Use PyUnicode_FromFormat instead of a buffer

* Don't use C constants/types in error messages

* Make error messages easier to understand for Python users

* Lower minimum required version 1.4.0

* Use casts and make slot function signatures correct

* Be consistent with CPython on const usage

* Make else clauses in line with PEP 7

* Fix over-indented blocks in argument clinic

* Add critical section around ZSTD_DCtx_setParameter

* Add a TODO about refactoring critical sections

* Use Py_UNREACHABLE

* Move bytes operations out of Py_BEGIN_ALLOW_THREADS

* Add TODO about ensuring a lock is held

* Remove asserts that may not be correct

* Add TODO to make ZstdDict and others GC objects

* Make objects GC tracked

* Remove unused include

* Fix some memory issues

* Fix refleaks on module and in ZstdDict

* Update configure to check for ZDICT_finalizeDictionary

* Properly check version in configure

* exit(1) if check fails

* Use AC_RUN_IFELSE

* Use a define() to re-use version check

* Actually properly set _zstd module status based on version

---------

Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
2025-05-04 01:29:55 +00:00

204 lines
5 KiB
C

#pragma once
/*
Low level interface to Meta's zstd library for use in the compression.zstd
Python module.
*/
/* Declarations shared between different parts of the _zstd module*/
#include "Python.h"
#include "zstd.h"
#include "zdict.h"
// if you update the minimum version, you should update the compile
// check in configure.ac
#define PYTHON_MINIMUM_SUPPORTED_ZSTD_VERSION 10405
#if ZSTD_VERSION_NUMBER < PYTHON_MINIMUM_SUPPORTED_ZSTD_VERSION
#error "_zstd module requires zstd v1.4.5+"
#endif
/* Forward declaration of module state */
typedef struct _zstd_state _zstd_state;
/* Forward reference of module def */
extern PyModuleDef _zstdmodule;
/* For clinic type calculations */
static inline _zstd_state *
get_zstd_state_from_type(PyTypeObject *type) {
PyObject *module = PyType_GetModuleByDef(type, &_zstdmodule);
if (module == NULL) {
return NULL;
}
void *state = PyModule_GetState(module);
assert(state != NULL);
return (_zstd_state *)state;
}
extern PyType_Spec zstddict_type_spec;
extern PyType_Spec zstdcompressor_type_spec;
extern PyType_Spec ZstdDecompressor_type_spec;
struct _zstd_state {
PyObject *empty_bytes;
PyObject *empty_readonly_memoryview;
PyObject *str_read;
PyObject *str_readinto;
PyObject *str_write;
PyObject *str_flush;
PyTypeObject *ZstdDict_type;
PyTypeObject *ZstdCompressor_type;
PyTypeObject *ZstdDecompressor_type;
PyObject *ZstdError;
PyTypeObject *CParameter_type;
PyTypeObject *DParameter_type;
};
typedef struct {
PyObject_HEAD
/* Reusable compress/decompress dictionary, they are created once and
can be shared by multiple threads concurrently, since its usage is
read-only.
c_dicts is a dict, int(compressionLevel):PyCapsule(ZSTD_CDict*) */
ZSTD_DDict *d_dict;
PyObject *c_dicts;
/* Content of the dictionary, bytes object. */
PyObject *dict_content;
/* Dictionary id */
uint32_t dict_id;
/* __init__ has been called, 0 or 1. */
int inited;
} ZstdDict;
typedef struct {
PyObject_HEAD
/* Compression context */
ZSTD_CCtx *cctx;
/* ZstdDict object in use */
PyObject *dict;
/* Last mode, initialized to ZSTD_e_end */
int last_mode;
/* (nbWorker >= 1) ? 1 : 0 */
int use_multithread;
/* Compression level */
int compression_level;
/* __init__ has been called, 0 or 1. */
int inited;
} ZstdCompressor;
typedef struct {
PyObject_HEAD
/* Decompression context */
ZSTD_DCtx *dctx;
/* ZstdDict object in use */
PyObject *dict;
/* Unconsumed input data */
char *input_buffer;
size_t input_buffer_size;
size_t in_begin, in_end;
/* Unused data */
PyObject *unused_data;
/* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */
char needs_input;
/* For decompress(), 0 or 1.
1 when both input and output streams are at a frame edge, means a
frame is completely decoded and fully flushed, or the decompressor
just be initialized. */
char at_frame_edge;
/* For ZstdDecompressor, 0 or 1.
1 means the end of the first frame has been reached. */
char eof;
/* Used for fast reset above three variables */
char _unused_char_for_align;
/* __init__ has been called, 0 or 1. */
int inited;
} ZstdDecompressor;
typedef enum {
TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class
TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function
} decompress_type;
typedef enum {
ERR_DECOMPRESS,
ERR_COMPRESS,
ERR_SET_PLEDGED_INPUT_SIZE,
ERR_LOAD_D_DICT,
ERR_LOAD_C_DICT,
ERR_GET_C_BOUNDS,
ERR_GET_D_BOUNDS,
ERR_SET_C_LEVEL,
ERR_TRAIN_DICT,
ERR_FINALIZE_DICT
} error_type;
typedef enum {
DICT_TYPE_DIGESTED = 0,
DICT_TYPE_UNDIGESTED = 1,
DICT_TYPE_PREFIX = 2
} dictionary_type;
static inline int
mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out) {
return in->size == in->pos && out->size != out->pos;
}
/* Format error message and set ZstdError. */
extern void
set_zstd_error(const _zstd_state* const state,
const error_type type, size_t zstd_ret);
extern void
set_parameter_error(const _zstd_state* const state, int is_compress,
int key_v, int value_v);
static const char init_twice_msg[] = "__init__ method is called twice.";
extern int
_PyZstd_load_c_dict(ZstdCompressor *self, PyObject *dict);
extern int
_PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict);
extern int
_PyZstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options,
const char *arg_name, const char *arg_type);
extern int
_PyZstd_set_d_parameters(ZstdDecompressor *self, PyObject *options);
extern PyObject *
decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
Py_ssize_t max_length,
Py_ssize_t initial_size,
decompress_type type);
extern PyObject *
compress_impl(ZstdCompressor *self, Py_buffer *data,
ZSTD_EndDirective end_directive);