mirror of
https://github.com/python/cpython.git
synced 2025-07-07 11:25:30 +00:00
gh-134635: add zlib.{adler32,crc32}_combine
to combine checksums (#134650)
This commit is contained in:
parent
8704d6b391
commit
737b4ba020
6 changed files with 356 additions and 1 deletions
|
@ -44,6 +44,20 @@ The available exception and functions in this module are:
|
|||
.. versionchanged:: 3.0
|
||||
The result is always unsigned.
|
||||
|
||||
.. function:: adler32_combine(adler1, adler2, len2, /)
|
||||
|
||||
Combine two Adler-32 checksums into one.
|
||||
|
||||
Given the Adler-32 checksum *adler1* of a sequence ``A`` and the
|
||||
Adler-32 checksum *adler2* of a sequence ``B`` of length *len2*,
|
||||
return the Adler-32 checksum of ``A`` and ``B`` concatenated.
|
||||
|
||||
This function is typically useful to combine Adler-32 checksums
|
||||
that were concurrently computed. To compute checksums sequentially, use
|
||||
:func:`adler32` with the running checksum as the ``value`` argument.
|
||||
|
||||
.. versionadded:: next
|
||||
|
||||
.. function:: compress(data, /, level=-1, wbits=MAX_WBITS)
|
||||
|
||||
Compresses the bytes in *data*, returning a bytes object containing compressed data.
|
||||
|
@ -136,6 +150,20 @@ The available exception and functions in this module are:
|
|||
.. versionchanged:: 3.0
|
||||
The result is always unsigned.
|
||||
|
||||
.. function:: crc32_combine(crc1, crc2, len2, /)
|
||||
|
||||
Combine two CRC-32 checksums into one.
|
||||
|
||||
Given the CRC-32 checksum *crc1* of a sequence ``A`` and the
|
||||
CRC-32 checksum *crc2* of a sequence ``B`` of length *len2*,
|
||||
return the CRC-32 checksum of ``A`` and ``B`` concatenated.
|
||||
|
||||
This function is typically useful to combine CRC-32 checksums
|
||||
that were concurrently computed. To compute checksums sequentially, use
|
||||
:func:`crc32` with the running checksum as the ``value`` argument.
|
||||
|
||||
.. versionadded:: next
|
||||
|
||||
.. function:: decompress(data, /, wbits=MAX_WBITS, bufsize=DEF_BUF_SIZE)
|
||||
|
||||
Decompresses the bytes in *data*, returning a bytes object containing the
|
||||
|
|
|
@ -97,6 +97,16 @@ ssl
|
|||
(Contributed by Will Childs-Klein in :gh:`133624`.)
|
||||
|
||||
|
||||
zlib
|
||||
----
|
||||
|
||||
* Allow combining two Adler-32 checksums via :func:`~zlib.adler32_combine`.
|
||||
(Contributed by Callum Attryde and Bénédikt Tran in :gh:`134635`.)
|
||||
|
||||
* Allow combining two CRC-32 checksums via :func:`~zlib.crc32_combine`.
|
||||
(Contributed by Bénédikt Tran in :gh:`134635`.)
|
||||
|
||||
|
||||
.. Add improved modules above alphabetically, not here at the end.
|
||||
|
||||
Optimizations
|
||||
|
|
|
@ -119,6 +119,114 @@ class ChecksumTestCase(unittest.TestCase):
|
|||
self.assertEqual(binascii.crc32(b'spam'), zlib.crc32(b'spam'))
|
||||
|
||||
|
||||
class ChecksumCombineMixin:
|
||||
"""Mixin class for testing checksum combination."""
|
||||
|
||||
N = 1000
|
||||
default_iv: int
|
||||
|
||||
def parse_iv(self, iv):
|
||||
"""Parse an IV value.
|
||||
|
||||
- The default IV is returned if *iv* is None.
|
||||
- A random IV is returned if *iv* is -1.
|
||||
- Otherwise, *iv* is returned as is.
|
||||
"""
|
||||
if iv is None:
|
||||
return self.default_iv
|
||||
if iv == -1:
|
||||
return random.randint(1, 0x80000000)
|
||||
return iv
|
||||
|
||||
def checksum(self, data, init=None):
|
||||
"""Compute the checksum of data with a given initial value.
|
||||
|
||||
The *init* value is parsed by ``parse_iv``.
|
||||
"""
|
||||
iv = self.parse_iv(init)
|
||||
return self._checksum(data, iv)
|
||||
|
||||
def _checksum(self, data, init):
|
||||
raise NotImplementedError
|
||||
|
||||
def combine(self, a, b, blen):
|
||||
"""Combine two checksums together."""
|
||||
raise NotImplementedError
|
||||
|
||||
def get_random_data(self, data_len, *, iv=None):
|
||||
"""Get a triplet (data, iv, checksum)."""
|
||||
data = random.randbytes(data_len)
|
||||
init = self.parse_iv(iv)
|
||||
checksum = self.checksum(data, init)
|
||||
return data, init, checksum
|
||||
|
||||
def test_combine_empty(self):
|
||||
for _ in range(self.N):
|
||||
a, iv, checksum = self.get_random_data(32, iv=-1)
|
||||
res = self.combine(iv, self.checksum(a), len(a))
|
||||
self.assertEqual(res, checksum)
|
||||
|
||||
def test_combine_no_iv(self):
|
||||
for _ in range(self.N):
|
||||
a, _, chk_a = self.get_random_data(32)
|
||||
b, _, chk_b = self.get_random_data(64)
|
||||
res = self.combine(chk_a, chk_b, len(b))
|
||||
self.assertEqual(res, self.checksum(a + b))
|
||||
|
||||
def test_combine_no_iv_invalid_length(self):
|
||||
a, _, chk_a = self.get_random_data(32)
|
||||
b, _, chk_b = self.get_random_data(64)
|
||||
checksum = self.checksum(a + b)
|
||||
for invalid_len in [1, len(a), 48, len(b) + 1, 191]:
|
||||
invalid_res = self.combine(chk_a, chk_b, invalid_len)
|
||||
self.assertNotEqual(invalid_res, checksum)
|
||||
|
||||
self.assertRaises(TypeError, self.combine, 0, 0, "len")
|
||||
|
||||
def test_combine_with_iv(self):
|
||||
for _ in range(self.N):
|
||||
a, iv_a, chk_a_with_iv = self.get_random_data(32, iv=-1)
|
||||
chk_a_no_iv = self.checksum(a)
|
||||
b, iv_b, chk_b_with_iv = self.get_random_data(64, iv=-1)
|
||||
chk_b_no_iv = self.checksum(b)
|
||||
|
||||
# We can represent c = COMBINE(CHK(a, iv_a), CHK(b, iv_b)) as:
|
||||
#
|
||||
# c = CHK(CHK(b'', iv_a) + CHK(a) + CHK(b'', iv_b) + CHK(b))
|
||||
# = COMBINE(
|
||||
# COMBINE(CHK(b'', iv_a), CHK(a)),
|
||||
# COMBINE(CHK(b'', iv_b), CHK(b)),
|
||||
# )
|
||||
# = COMBINE(COMBINE(iv_a, CHK(a)), COMBINE(iv_b, CHK(b)))
|
||||
tmp0 = self.combine(iv_a, chk_a_no_iv, len(a))
|
||||
tmp1 = self.combine(iv_b, chk_b_no_iv, len(b))
|
||||
expected = self.combine(tmp0, tmp1, len(b))
|
||||
checksum = self.combine(chk_a_with_iv, chk_b_with_iv, len(b))
|
||||
self.assertEqual(checksum, expected)
|
||||
|
||||
|
||||
class CRC32CombineTestCase(ChecksumCombineMixin, unittest.TestCase):
|
||||
|
||||
default_iv = 0
|
||||
|
||||
def _checksum(self, data, init):
|
||||
return zlib.crc32(data, init)
|
||||
|
||||
def combine(self, a, b, blen):
|
||||
return zlib.crc32_combine(a, b, blen)
|
||||
|
||||
|
||||
class Adler32CombineTestCase(ChecksumCombineMixin, unittest.TestCase):
|
||||
|
||||
default_iv = 1
|
||||
|
||||
def _checksum(self, data, init):
|
||||
return zlib.adler32(data, init)
|
||||
|
||||
def combine(self, a, b, blen):
|
||||
return zlib.adler32_combine(a, b, blen)
|
||||
|
||||
|
||||
# Issue #10276 - check that inputs >=4 GiB are handled correctly.
|
||||
class ChecksumBigBufferTestCase(unittest.TestCase):
|
||||
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
:mod:`zlib`: Allow to combine Adler-32 and CRC-32 checksums via
|
||||
:func:`~zlib.adler32_combine` and :func:`~zlib.crc32_combine`. Patch by
|
||||
Callum Attryde and Bénédikt Tran.
|
120
Modules/clinic/zlibmodule.c.h
generated
120
Modules/clinic/zlibmodule.c.h
generated
|
@ -1044,6 +1044,65 @@ exit:
|
|||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(zlib_adler32_combine__doc__,
|
||||
"adler32_combine($module, adler1, adler2, len2, /)\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"Combine two Adler-32 checksums into one.\n"
|
||||
"\n"
|
||||
" adler1\n"
|
||||
" Adler-32 checksum for sequence A\n"
|
||||
" adler2\n"
|
||||
" Adler-32 checksum for sequence B\n"
|
||||
" len2\n"
|
||||
" Length of sequence B\n"
|
||||
"\n"
|
||||
"Given the Adler-32 checksum \'adler1\' of a sequence A and the\n"
|
||||
"Adler-32 checksum \'adler2\' of a sequence B of length \'len2\',\n"
|
||||
"return the Adler-32 checksum of A and B concatenated.");
|
||||
|
||||
#define ZLIB_ADLER32_COMBINE_METHODDEF \
|
||||
{"adler32_combine", _PyCFunction_CAST(zlib_adler32_combine), METH_FASTCALL, zlib_adler32_combine__doc__},
|
||||
|
||||
static unsigned int
|
||||
zlib_adler32_combine_impl(PyObject *module, unsigned int adler1,
|
||||
unsigned int adler2, PyObject *len2);
|
||||
|
||||
static PyObject *
|
||||
zlib_adler32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
unsigned int adler1;
|
||||
unsigned int adler2;
|
||||
PyObject *len2;
|
||||
unsigned int _return_value;
|
||||
|
||||
if (!_PyArg_CheckPositional("adler32_combine", nargs, 3, 3)) {
|
||||
goto exit;
|
||||
}
|
||||
adler1 = (unsigned int)PyLong_AsUnsignedLongMask(args[0]);
|
||||
if (adler1 == (unsigned int)-1 && PyErr_Occurred()) {
|
||||
goto exit;
|
||||
}
|
||||
adler2 = (unsigned int)PyLong_AsUnsignedLongMask(args[1]);
|
||||
if (adler2 == (unsigned int)-1 && PyErr_Occurred()) {
|
||||
goto exit;
|
||||
}
|
||||
if (!PyLong_Check(args[2])) {
|
||||
_PyArg_BadArgument("adler32_combine", "argument 3", "int", args[2]);
|
||||
goto exit;
|
||||
}
|
||||
len2 = args[2];
|
||||
_return_value = zlib_adler32_combine_impl(module, adler1, adler2, len2);
|
||||
if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = PyLong_FromUnsignedLong((unsigned long)_return_value);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(zlib_crc32__doc__,
|
||||
"crc32($module, data, value=0, /)\n"
|
||||
"--\n"
|
||||
|
@ -1098,6 +1157,65 @@ exit:
|
|||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(zlib_crc32_combine__doc__,
|
||||
"crc32_combine($module, crc1, crc2, len2, /)\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"Combine two CRC-32 checksums into one.\n"
|
||||
"\n"
|
||||
" crc1\n"
|
||||
" CRC-32 checksum for sequence A\n"
|
||||
" crc2\n"
|
||||
" CRC-32 checksum for sequence B\n"
|
||||
" len2\n"
|
||||
" Length of sequence B\n"
|
||||
"\n"
|
||||
"Given the CRC-32 checksum \'crc1\' of a sequence A and the\n"
|
||||
"CRC-32 checksum \'crc2\' of a sequence B of length \'len2\',\n"
|
||||
"return the CRC-32 checksum of A and B concatenated.");
|
||||
|
||||
#define ZLIB_CRC32_COMBINE_METHODDEF \
|
||||
{"crc32_combine", _PyCFunction_CAST(zlib_crc32_combine), METH_FASTCALL, zlib_crc32_combine__doc__},
|
||||
|
||||
static unsigned int
|
||||
zlib_crc32_combine_impl(PyObject *module, unsigned int crc1,
|
||||
unsigned int crc2, PyObject *len2);
|
||||
|
||||
static PyObject *
|
||||
zlib_crc32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
unsigned int crc1;
|
||||
unsigned int crc2;
|
||||
PyObject *len2;
|
||||
unsigned int _return_value;
|
||||
|
||||
if (!_PyArg_CheckPositional("crc32_combine", nargs, 3, 3)) {
|
||||
goto exit;
|
||||
}
|
||||
crc1 = (unsigned int)PyLong_AsUnsignedLongMask(args[0]);
|
||||
if (crc1 == (unsigned int)-1 && PyErr_Occurred()) {
|
||||
goto exit;
|
||||
}
|
||||
crc2 = (unsigned int)PyLong_AsUnsignedLongMask(args[1]);
|
||||
if (crc2 == (unsigned int)-1 && PyErr_Occurred()) {
|
||||
goto exit;
|
||||
}
|
||||
if (!PyLong_Check(args[2])) {
|
||||
_PyArg_BadArgument("crc32_combine", "argument 3", "int", args[2]);
|
||||
goto exit;
|
||||
}
|
||||
len2 = args[2];
|
||||
_return_value = zlib_crc32_combine_impl(module, crc1, crc2, len2);
|
||||
if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = PyLong_FromUnsignedLong((unsigned long)_return_value);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
#ifndef ZLIB_COMPRESS_COPY_METHODDEF
|
||||
#define ZLIB_COMPRESS_COPY_METHODDEF
|
||||
#endif /* !defined(ZLIB_COMPRESS_COPY_METHODDEF) */
|
||||
|
@ -1121,4 +1239,4 @@ exit:
|
|||
#ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
|
||||
#define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
|
||||
#endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */
|
||||
/*[clinic end generated code: output=33938c7613a8c1c7 input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=3f7692eb3b5d5a0c input=a9049054013a1b77]*/
|
||||
|
|
|
@ -17,6 +17,16 @@
|
|||
#error "At least zlib version 1.2.2.1 is required"
|
||||
#endif
|
||||
|
||||
#if (SIZEOF_OFF_T == SIZEOF_SIZE_T)
|
||||
# define convert_to_z_off_t PyLong_AsSsize_t
|
||||
#elif (SIZEOF_OFF_T == SIZEOF_LONG_LONG)
|
||||
# define convert_to_z_off_t PyLong_AsLongLong
|
||||
#elif (SIZEOF_OFF_T == SIZEOF_LONG)
|
||||
# define convert_to_z_off_t PyLong_AsLong
|
||||
#else
|
||||
# error off_t does not match either size_t, long, or long long!
|
||||
#endif
|
||||
|
||||
// Blocks output buffer wrappers
|
||||
#include "pycore_blocks_output_buffer.h"
|
||||
|
||||
|
@ -1876,6 +1886,44 @@ zlib_adler32_impl(PyObject *module, Py_buffer *data, unsigned int value)
|
|||
return PyLong_FromUnsignedLong(value & 0xffffffffU);
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
zlib.adler32_combine -> unsigned_int
|
||||
|
||||
adler1: unsigned_int(bitwise=True)
|
||||
Adler-32 checksum for sequence A
|
||||
|
||||
adler2: unsigned_int(bitwise=True)
|
||||
Adler-32 checksum for sequence B
|
||||
|
||||
len2: object(subclass_of='&PyLong_Type')
|
||||
Length of sequence B
|
||||
/
|
||||
|
||||
Combine two Adler-32 checksums into one.
|
||||
|
||||
Given the Adler-32 checksum 'adler1' of a sequence A and the
|
||||
Adler-32 checksum 'adler2' of a sequence B of length 'len2',
|
||||
return the Adler-32 checksum of A and B concatenated.
|
||||
[clinic start generated code]*/
|
||||
|
||||
static unsigned int
|
||||
zlib_adler32_combine_impl(PyObject *module, unsigned int adler1,
|
||||
unsigned int adler2, PyObject *len2)
|
||||
/*[clinic end generated code: output=61842cefb16afb1b input=51bb045c95130c6f]*/
|
||||
{
|
||||
#if defined(Z_WANT64)
|
||||
z_off64_t len = convert_to_z_off_t(len2);
|
||||
#else
|
||||
z_off_t len = convert_to_z_off_t(len2);
|
||||
#endif
|
||||
if (PyErr_Occurred()) {
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
return adler32_combine(adler1, adler2, len);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*[clinic input]
|
||||
zlib.crc32 -> unsigned_int
|
||||
|
||||
|
@ -1923,13 +1971,50 @@ zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value)
|
|||
return value;
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
zlib.crc32_combine -> unsigned_int
|
||||
|
||||
crc1: unsigned_int(bitwise=True)
|
||||
CRC-32 checksum for sequence A
|
||||
|
||||
crc2: unsigned_int(bitwise=True)
|
||||
CRC-32 checksum for sequence B
|
||||
|
||||
len2: object(subclass_of='&PyLong_Type')
|
||||
Length of sequence B
|
||||
/
|
||||
|
||||
Combine two CRC-32 checksums into one.
|
||||
|
||||
Given the CRC-32 checksum 'crc1' of a sequence A and the
|
||||
CRC-32 checksum 'crc2' of a sequence B of length 'len2',
|
||||
return the CRC-32 checksum of A and B concatenated.
|
||||
[clinic start generated code]*/
|
||||
|
||||
static unsigned int
|
||||
zlib_crc32_combine_impl(PyObject *module, unsigned int crc1,
|
||||
unsigned int crc2, PyObject *len2)
|
||||
/*[clinic end generated code: output=c4def907c602e6eb input=9c8a065d9040dc66]*/
|
||||
{
|
||||
#if defined(Z_WANT64)
|
||||
z_off64_t len = convert_to_z_off_t(len2);
|
||||
#else
|
||||
z_off_t len = convert_to_z_off_t(len2);
|
||||
#endif
|
||||
if (PyErr_Occurred()) {
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
return crc32_combine(crc1, crc2, len);
|
||||
}
|
||||
|
||||
static PyMethodDef zlib_methods[] =
|
||||
{
|
||||
ZLIB_ADLER32_METHODDEF
|
||||
ZLIB_ADLER32_COMBINE_METHODDEF
|
||||
ZLIB_COMPRESS_METHODDEF
|
||||
ZLIB_COMPRESSOBJ_METHODDEF
|
||||
ZLIB_CRC32_METHODDEF
|
||||
ZLIB_CRC32_COMBINE_METHODDEF
|
||||
ZLIB_DECOMPRESS_METHODDEF
|
||||
ZLIB_DECOMPRESSOBJ_METHODDEF
|
||||
{NULL, NULL}
|
||||
|
@ -1981,14 +2066,17 @@ static PyType_Spec ZlibDecompressor_type_spec = {
|
|||
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
|
||||
.slots = ZlibDecompressor_type_slots,
|
||||
};
|
||||
|
||||
PyDoc_STRVAR(zlib_module_documentation,
|
||||
"The functions in this module allow compression and decompression using the\n"
|
||||
"zlib library, which is based on GNU zip.\n"
|
||||
"\n"
|
||||
"adler32(string[, start]) -- Compute an Adler-32 checksum.\n"
|
||||
"adler32_combine(adler1, adler2, len2, /) -- Combine two Adler-32 checksums.\n"
|
||||
"compress(data[, level]) -- Compress data, with compression level 0-9 or -1.\n"
|
||||
"compressobj([level[, ...]]) -- Return a compressor object.\n"
|
||||
"crc32(string[, start]) -- Compute a CRC-32 checksum.\n"
|
||||
"crc32_combine(crc1, crc2, len2, /) -- Combine two CRC-32 checksums.\n"
|
||||
"decompress(string,[wbits],[bufsize]) -- Decompresses a compressed string.\n"
|
||||
"decompressobj([wbits[, zdict]]) -- Return a decompressor object.\n"
|
||||
"\n"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue