mirror of
https://github.com/python/cpython.git
synced 2025-09-19 07:00:59 +00:00
gh-96735: Fix undefined behaviour in struct unpacking functions (#96739)
This PR fixes undefined behaviour in the struct module unpacking support functions `bu_longlong`, `lu_longlong`, `bu_int` and `lu_int`; thanks to @kumaraditya303 for finding these. The fix is to accumulate the bytes in an unsigned integer type instead of a signed integer type, then to convert to the appropriate signed type. In cases where the width matches, that conversion will typically be compiled away to a no-op. (Evidence from Godbolt: https://godbolt.org/z/5zvxodj64 .) To make the conversions efficient, I've specialised the relevant functions for their output size: for `bu_longlong` and `lu_longlong`, this only entails checking that the output size is indeed `8`. But `bu_int` and `lu_int` were used for format sizes `2` and `4` - I've split those into two separate functions each. No tests, because all of the affected cases are already exercised by the test suite.
This commit is contained in:
parent
817fa28f81
commit
f5f047aa62
2 changed files with 73 additions and 28 deletions
|
@ -0,0 +1 @@
|
||||||
|
Fix undefined behaviour in :func:`struct.unpack`.
|
|
@ -806,18 +806,37 @@ static const formatdef native_table[] = {
|
||||||
/* Big-endian routines. *****************************************************/
|
/* Big-endian routines. *****************************************************/
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
bu_int(_structmodulestate *state, const char *p, const formatdef *f)
|
bu_short(_structmodulestate *state, const char *p, const formatdef *f)
|
||||||
{
|
{
|
||||||
long x = 0;
|
unsigned long x = 0;
|
||||||
Py_ssize_t i = f->size;
|
|
||||||
|
/* This function is only ever used in the case f->size == 2. */
|
||||||
|
assert(f->size == 2);
|
||||||
|
Py_ssize_t i = 2;
|
||||||
const unsigned char *bytes = (const unsigned char *)p;
|
const unsigned char *bytes = (const unsigned char *)p;
|
||||||
do {
|
do {
|
||||||
x = (x<<8) | *bytes++;
|
x = (x<<8) | *bytes++;
|
||||||
} while (--i > 0);
|
} while (--i > 0);
|
||||||
/* Extend the sign bit. */
|
/* Extend sign, avoiding implementation-defined or undefined behaviour. */
|
||||||
if (SIZEOF_LONG > f->size)
|
x = (x ^ 0x8000U) - 0x8000U;
|
||||||
x |= -(x & (1L << ((8 * f->size) - 1)));
|
return PyLong_FromLong(x & 0x8000U ? -1 - (long)(~x) : (long)x);
|
||||||
return PyLong_FromLong(x);
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
bu_int(_structmodulestate *state, const char *p, const formatdef *f)
|
||||||
|
{
|
||||||
|
unsigned long x = 0;
|
||||||
|
|
||||||
|
/* This function is only ever used in the case f->size == 4. */
|
||||||
|
assert(f->size == 4);
|
||||||
|
Py_ssize_t i = 4;
|
||||||
|
const unsigned char *bytes = (const unsigned char *)p;
|
||||||
|
do {
|
||||||
|
x = (x<<8) | *bytes++;
|
||||||
|
} while (--i > 0);
|
||||||
|
/* Extend sign, avoiding implementation-defined or undefined behaviour. */
|
||||||
|
x = (x ^ 0x80000000U) - 0x80000000U;
|
||||||
|
return PyLong_FromLong(x & 0x80000000U ? -1 - (long)(~x) : (long)x);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -835,16 +854,19 @@ bu_uint(_structmodulestate *state, const char *p, const formatdef *f)
|
||||||
static PyObject *
|
static PyObject *
|
||||||
bu_longlong(_structmodulestate *state, const char *p, const formatdef *f)
|
bu_longlong(_structmodulestate *state, const char *p, const formatdef *f)
|
||||||
{
|
{
|
||||||
long long x = 0;
|
unsigned long long x = 0;
|
||||||
Py_ssize_t i = f->size;
|
|
||||||
|
/* This function is only ever used in the case f->size == 8. */
|
||||||
|
assert(f->size == 8);
|
||||||
|
Py_ssize_t i = 8;
|
||||||
const unsigned char *bytes = (const unsigned char *)p;
|
const unsigned char *bytes = (const unsigned char *)p;
|
||||||
do {
|
do {
|
||||||
x = (x<<8) | *bytes++;
|
x = (x<<8) | *bytes++;
|
||||||
} while (--i > 0);
|
} while (--i > 0);
|
||||||
/* Extend the sign bit. */
|
/* Extend sign, avoiding implementation-defined or undefined behaviour. */
|
||||||
if (SIZEOF_LONG_LONG > f->size)
|
x = (x ^ 0x8000000000000000U) - 0x8000000000000000U;
|
||||||
x |= -(x & ((long long)1 << ((8 * f->size) - 1)));
|
return PyLong_FromLongLong(
|
||||||
return PyLong_FromLongLong(x);
|
x & 0x8000000000000000U ? -1 - (long long)(~x) : (long long)x);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -1009,7 +1031,7 @@ static formatdef bigendian_table[] = {
|
||||||
{'c', 1, 0, nu_char, np_char},
|
{'c', 1, 0, nu_char, np_char},
|
||||||
{'s', 1, 0, NULL},
|
{'s', 1, 0, NULL},
|
||||||
{'p', 1, 0, NULL},
|
{'p', 1, 0, NULL},
|
||||||
{'h', 2, 0, bu_int, bp_int},
|
{'h', 2, 0, bu_short, bp_int},
|
||||||
{'H', 2, 0, bu_uint, bp_uint},
|
{'H', 2, 0, bu_uint, bp_uint},
|
||||||
{'i', 4, 0, bu_int, bp_int},
|
{'i', 4, 0, bu_int, bp_int},
|
||||||
{'I', 4, 0, bu_uint, bp_uint},
|
{'I', 4, 0, bu_uint, bp_uint},
|
||||||
|
@ -1027,18 +1049,37 @@ static formatdef bigendian_table[] = {
|
||||||
/* Little-endian routines. *****************************************************/
|
/* Little-endian routines. *****************************************************/
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
lu_int(_structmodulestate *state, const char *p, const formatdef *f)
|
lu_short(_structmodulestate *state, const char *p, const formatdef *f)
|
||||||
{
|
{
|
||||||
long x = 0;
|
unsigned long x = 0;
|
||||||
Py_ssize_t i = f->size;
|
|
||||||
|
/* This function is only ever used in the case f->size == 2. */
|
||||||
|
assert(f->size == 2);
|
||||||
|
Py_ssize_t i = 2;
|
||||||
const unsigned char *bytes = (const unsigned char *)p;
|
const unsigned char *bytes = (const unsigned char *)p;
|
||||||
do {
|
do {
|
||||||
x = (x<<8) | bytes[--i];
|
x = (x<<8) | bytes[--i];
|
||||||
} while (i > 0);
|
} while (i > 0);
|
||||||
/* Extend the sign bit. */
|
/* Extend sign, avoiding implementation-defined or undefined behaviour. */
|
||||||
if (SIZEOF_LONG > f->size)
|
x = (x ^ 0x8000U) - 0x8000U;
|
||||||
x |= -(x & (1L << ((8 * f->size) - 1)));
|
return PyLong_FromLong(x & 0x8000U ? -1 - (long)(~x) : (long)x);
|
||||||
return PyLong_FromLong(x);
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
lu_int(_structmodulestate *state, const char *p, const formatdef *f)
|
||||||
|
{
|
||||||
|
unsigned long x = 0;
|
||||||
|
|
||||||
|
/* This function is only ever used in the case f->size == 4. */
|
||||||
|
assert(f->size == 4);
|
||||||
|
Py_ssize_t i = 4;
|
||||||
|
const unsigned char *bytes = (const unsigned char *)p;
|
||||||
|
do {
|
||||||
|
x = (x<<8) | bytes[--i];
|
||||||
|
} while (i > 0);
|
||||||
|
/* Extend sign, avoiding implementation-defined or undefined behaviour. */
|
||||||
|
x = (x ^ 0x80000000U) - 0x80000000U;
|
||||||
|
return PyLong_FromLong(x & 0x80000000U ? -1 - (long)(~x) : (long)x);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -1056,16 +1097,19 @@ lu_uint(_structmodulestate *state, const char *p, const formatdef *f)
|
||||||
static PyObject *
|
static PyObject *
|
||||||
lu_longlong(_structmodulestate *state, const char *p, const formatdef *f)
|
lu_longlong(_structmodulestate *state, const char *p, const formatdef *f)
|
||||||
{
|
{
|
||||||
long long x = 0;
|
unsigned long long x = 0;
|
||||||
Py_ssize_t i = f->size;
|
|
||||||
|
/* This function is only ever used in the case f->size == 8. */
|
||||||
|
assert(f->size == 8);
|
||||||
|
Py_ssize_t i = 8;
|
||||||
const unsigned char *bytes = (const unsigned char *)p;
|
const unsigned char *bytes = (const unsigned char *)p;
|
||||||
do {
|
do {
|
||||||
x = (x<<8) | bytes[--i];
|
x = (x<<8) | bytes[--i];
|
||||||
} while (i > 0);
|
} while (i > 0);
|
||||||
/* Extend the sign bit. */
|
/* Extend sign, avoiding implementation-defined or undefined behaviour. */
|
||||||
if (SIZEOF_LONG_LONG > f->size)
|
x = (x ^ 0x8000000000000000U) - 0x8000000000000000U;
|
||||||
x |= -(x & ((long long)1 << ((8 * f->size) - 1)));
|
return PyLong_FromLongLong(
|
||||||
return PyLong_FromLongLong(x);
|
x & 0x8000000000000000U ? -1 - (long long)(~x) : (long long)x);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -1213,7 +1257,7 @@ static formatdef lilendian_table[] = {
|
||||||
{'c', 1, 0, nu_char, np_char},
|
{'c', 1, 0, nu_char, np_char},
|
||||||
{'s', 1, 0, NULL},
|
{'s', 1, 0, NULL},
|
||||||
{'p', 1, 0, NULL},
|
{'p', 1, 0, NULL},
|
||||||
{'h', 2, 0, lu_int, lp_int},
|
{'h', 2, 0, lu_short, lp_int},
|
||||||
{'H', 2, 0, lu_uint, lp_uint},
|
{'H', 2, 0, lu_uint, lp_uint},
|
||||||
{'i', 4, 0, lu_int, lp_int},
|
{'i', 4, 0, lu_int, lp_int},
|
||||||
{'I', 4, 0, lu_uint, lp_uint},
|
{'I', 4, 0, lu_uint, lp_uint},
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue