mirror of
https://github.com/python/cpython.git
synced 2025-12-10 02:50:09 +00:00
Forward-port of r52136,52138: a review of overflow-detecting code.
* unified the way intobject, longobject and mystrtoul handle
values around -sys.maxint-1.
* in general, trying to entierely avoid overflows in any computation
involving signed ints or longs is extremely involved. Fixed a few
simple cases where a compiler might be too clever (but that's all
guesswork).
* more overflow checks against bad data in marshal.c.
* 2.5 specific: fixed a number of places that were still confusing int
and Py_ssize_t. Some of them could potentially have caused
"real-world" breakage.
* list.pop(x): fixing overflow issues on x was messy. I just reverted
to PyArg_ParseTuple("n"), which does the right thing. (An obscure
test was trying to give a Decimal to list.pop()... doesn't make
sense any more IMHO)
* trying to write a few tests...
This commit is contained in:
parent
0d2f498a4c
commit
7ccbca93a2
19 changed files with 186 additions and 106 deletions
|
|
@ -1652,20 +1652,18 @@ _PySequence_IterSearch(PyObject *seq, PyObject *obj, int operation)
|
|||
if (cmp > 0) {
|
||||
switch (operation) {
|
||||
case PY_ITERSEARCH_COUNT:
|
||||
++n;
|
||||
if (n <= 0) {
|
||||
/* XXX(nnorwitz): int means ssize_t */
|
||||
if (n == PY_SSIZE_T_MAX) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"count exceeds C int size");
|
||||
"count exceeds C integer size");
|
||||
goto Fail;
|
||||
}
|
||||
++n;
|
||||
break;
|
||||
|
||||
case PY_ITERSEARCH_INDEX:
|
||||
if (wrapped) {
|
||||
/* XXX(nnorwitz): int means ssize_t */
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"index exceeds C int size");
|
||||
"index exceeds C integer size");
|
||||
goto Fail;
|
||||
}
|
||||
goto Done;
|
||||
|
|
@ -1680,9 +1678,9 @@ _PySequence_IterSearch(PyObject *seq, PyObject *obj, int operation)
|
|||
}
|
||||
|
||||
if (operation == PY_ITERSEARCH_INDEX) {
|
||||
++n;
|
||||
if (n <= 0)
|
||||
if (n == PY_SSIZE_T_MAX)
|
||||
wrapped = 1;
|
||||
++n;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1001,6 +1001,7 @@ getline_via_fgets(FILE *fp)
|
|||
size_t nfree; /* # of free buffer slots; pvend-pvfree */
|
||||
size_t total_v_size; /* total # of slots in buffer */
|
||||
size_t increment; /* amount to increment the buffer */
|
||||
size_t prev_v_size;
|
||||
|
||||
/* Optimize for normal case: avoid _PyString_Resize if at all
|
||||
* possible via first reading into stack buffer "buf".
|
||||
|
|
@ -1115,8 +1116,11 @@ getline_via_fgets(FILE *fp)
|
|||
/* expand buffer and try again */
|
||||
assert(*(pvend-1) == '\0');
|
||||
increment = total_v_size >> 2; /* mild exponential growth */
|
||||
prev_v_size = total_v_size;
|
||||
total_v_size += increment;
|
||||
if (total_v_size > PY_SSIZE_T_MAX) {
|
||||
/* check for overflow */
|
||||
if (total_v_size <= prev_v_size ||
|
||||
total_v_size > PY_SSIZE_T_MAX) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"line is longer than a Python string can hold");
|
||||
Py_DECREF(v);
|
||||
|
|
@ -1125,7 +1129,7 @@ getline_via_fgets(FILE *fp)
|
|||
if (_PyString_Resize(&v, (int)total_v_size) < 0)
|
||||
return NULL;
|
||||
/* overwrite the trailing null byte */
|
||||
pvfree = BUF(v) + (total_v_size - increment - 1);
|
||||
pvfree = BUF(v) + (prev_v_size - 1);
|
||||
}
|
||||
if (BUF(v) + total_v_size != p)
|
||||
_PyString_Resize(&v, p - BUF(v));
|
||||
|
|
|
|||
|
|
@ -546,6 +546,17 @@ int_mul(PyObject *v, PyObject *w)
|
|||
}
|
||||
}
|
||||
|
||||
/* Integer overflow checking for unary negation: on a 2's-complement
|
||||
* box, -x overflows iff x is the most negative long. In this case we
|
||||
* get -x == x. However, -x is undefined (by C) if x /is/ the most
|
||||
* negative long (it's a signed overflow case), and some compilers care.
|
||||
* So we cast x to unsigned long first. However, then other compilers
|
||||
* warn about applying unary minus to an unsigned operand. Hence the
|
||||
* weird "0-".
|
||||
*/
|
||||
#define UNARY_NEG_WOULD_OVERFLOW(x) \
|
||||
((x) < 0 && (unsigned long)(x) == 0-(unsigned long)(x))
|
||||
|
||||
/* Return type of i_divmod */
|
||||
enum divmod_result {
|
||||
DIVMOD_OK, /* Correct result */
|
||||
|
|
@ -565,7 +576,7 @@ i_divmod(register long x, register long y,
|
|||
return DIVMOD_ERROR;
|
||||
}
|
||||
/* (-sys.maxint-1)/-1 is the only overflow case. */
|
||||
if (y == -1 && x == LONG_MIN)
|
||||
if (y == -1 && UNARY_NEG_WOULD_OVERFLOW(x))
|
||||
return DIVMOD_OVERFLOW;
|
||||
xdivy = x / y;
|
||||
xmody = x - xdivy * y;
|
||||
|
|
@ -756,7 +767,8 @@ int_neg(PyIntObject *v)
|
|||
{
|
||||
register long a;
|
||||
a = v->ob_ival;
|
||||
if (a < 0 && (unsigned long)a == 0-(unsigned long)a) {
|
||||
/* check for overflow */
|
||||
if (UNARY_NEG_WOULD_OVERFLOW(a)) {
|
||||
PyObject *o = PyLong_FromLong(a);
|
||||
if (o != NULL) {
|
||||
PyObject *result = PyNumber_Negative(o);
|
||||
|
|
|
|||
|
|
@ -863,17 +863,12 @@ static PyObject *
|
|||
listpop(PyListObject *self, PyObject *args)
|
||||
{
|
||||
Py_ssize_t i = -1;
|
||||
PyObject *v, *arg = NULL;
|
||||
PyObject *v;
|
||||
int status;
|
||||
|
||||
if (!PyArg_UnpackTuple(args, "pop", 0, 1, &arg))
|
||||
if (!PyArg_ParseTuple(args, "|n:pop", &i))
|
||||
return NULL;
|
||||
if (arg != NULL) {
|
||||
if (PyInt_Check(arg))
|
||||
i = PyInt_AS_LONG((PyIntObject*) arg);
|
||||
else if (!PyArg_ParseTuple(args, "|n:pop", &i))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (self->ob_size == 0) {
|
||||
/* Special-case most common failure cause */
|
||||
PyErr_SetString(PyExc_IndexError, "pop from empty list");
|
||||
|
|
|
|||
|
|
@ -193,6 +193,18 @@ PyLong_FromDouble(double dval)
|
|||
return (PyObject *)v;
|
||||
}
|
||||
|
||||
/* Checking for overflow in PyLong_AsLong is a PITA since C doesn't define
|
||||
* anything about what happens when a signed integer operation overflows,
|
||||
* and some compilers think they're doing you a favor by being "clever"
|
||||
* then. The bit pattern for the largest postive signed long is
|
||||
* (unsigned long)LONG_MAX, and for the smallest negative signed long
|
||||
* it is abs(LONG_MIN), which we could write -(unsigned long)LONG_MIN.
|
||||
* However, some other compilers warn about applying unary minus to an
|
||||
* unsigned operand. Hence the weird "0-".
|
||||
*/
|
||||
#define PY_ABS_LONG_MIN (0-(unsigned long)LONG_MIN)
|
||||
#define PY_ABS_SSIZE_T_MIN (0-(size_t)PY_SSIZE_T_MIN)
|
||||
|
||||
/* Get a C long int from a long int object.
|
||||
Returns -1 and sets an error condition if overflow occurs. */
|
||||
|
||||
|
|
@ -225,14 +237,16 @@ PyLong_AsLong(PyObject *vv)
|
|||
if ((x >> SHIFT) != prev)
|
||||
goto overflow;
|
||||
}
|
||||
/* Haven't lost any bits, but if the sign bit is set we're in
|
||||
* trouble *unless* this is the min negative number. So,
|
||||
* trouble iff sign bit set && (positive || some bit set other
|
||||
* than the sign bit).
|
||||
*/
|
||||
if ((long)x < 0 && (sign > 0 || (x << 1) != 0))
|
||||
goto overflow;
|
||||
return (long)x * sign;
|
||||
/* Haven't lost any bits, but casting to long requires extra care
|
||||
* (see comment above).
|
||||
*/
|
||||
if (x <= (unsigned long)LONG_MAX) {
|
||||
return (long)x * sign;
|
||||
}
|
||||
else if (sign < 0 && x == PY_ABS_LONG_MIN) {
|
||||
return LONG_MIN;
|
||||
}
|
||||
/* else overflow */
|
||||
|
||||
overflow:
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
|
|
@ -268,14 +282,16 @@ _PyLong_AsSsize_t(PyObject *vv) {
|
|||
if ((x >> SHIFT) != prev)
|
||||
goto overflow;
|
||||
}
|
||||
/* Haven't lost any bits, but if the sign bit is set we're in
|
||||
* trouble *unless* this is the min negative number. So,
|
||||
* trouble iff sign bit set && (positive || some bit set other
|
||||
* than the sign bit).
|
||||
/* Haven't lost any bits, but casting to a signed type requires
|
||||
* extra care (see comment above).
|
||||
*/
|
||||
if ((Py_ssize_t)x < 0 && (sign > 0 || (x << 1) != 0))
|
||||
goto overflow;
|
||||
return (Py_ssize_t)x * sign;
|
||||
if (x <= (size_t)PY_SSIZE_T_MAX) {
|
||||
return (Py_ssize_t)x * sign;
|
||||
}
|
||||
else if (sign < 0 && x == PY_ABS_SSIZE_T_MIN) {
|
||||
return PY_SSIZE_T_MIN;
|
||||
}
|
||||
/* else overflow */
|
||||
|
||||
overflow:
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
|
|
@ -1167,7 +1183,7 @@ long_format(PyObject *aa, int base, int addL)
|
|||
{
|
||||
register PyLongObject *a = (PyLongObject *)aa;
|
||||
PyStringObject *str;
|
||||
Py_ssize_t i;
|
||||
Py_ssize_t i, j, sz;
|
||||
Py_ssize_t size_a;
|
||||
char *p;
|
||||
int bits;
|
||||
|
|
@ -1187,11 +1203,18 @@ long_format(PyObject *aa, int base, int addL)
|
|||
++bits;
|
||||
i >>= 1;
|
||||
}
|
||||
i = 5 + (addL ? 1 : 0) + (size_a*SHIFT + bits-1) / bits;
|
||||
str = (PyStringObject *) PyString_FromStringAndSize((char *)0, i);
|
||||
i = 5 + (addL ? 1 : 0);
|
||||
j = size_a*SHIFT + bits-1;
|
||||
sz = i + j / bits;
|
||||
if (j / SHIFT < size_a || sz < i) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"long is too large to format");
|
||||
return NULL;
|
||||
}
|
||||
str = (PyStringObject *) PyString_FromStringAndSize((char *)0, sz);
|
||||
if (str == NULL)
|
||||
return NULL;
|
||||
p = PyString_AS_STRING(str) + i;
|
||||
p = PyString_AS_STRING(str) + sz;
|
||||
*p = '\0';
|
||||
if (addL)
|
||||
*--p = 'L';
|
||||
|
|
@ -1305,7 +1328,7 @@ long_format(PyObject *aa, int base, int addL)
|
|||
} while ((*q++ = *p++) != '\0');
|
||||
q--;
|
||||
_PyString_Resize((PyObject **)&str,
|
||||
(int) (q - PyString_AS_STRING(str)));
|
||||
(Py_ssize_t) (q - PyString_AS_STRING(str)));
|
||||
}
|
||||
return (PyObject *)str;
|
||||
}
|
||||
|
|
@ -1363,14 +1386,14 @@ long_from_binary_base(char **str, int base)
|
|||
while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base)
|
||||
++p;
|
||||
*str = p;
|
||||
n = (p - start) * bits_per_char;
|
||||
if (n / bits_per_char != p - start) {
|
||||
/* n <- # of Python digits needed, = ceiling(n/SHIFT). */
|
||||
n = (p - start) * bits_per_char + SHIFT - 1;
|
||||
if (n / bits_per_char < p - start) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"long string too large to convert");
|
||||
return NULL;
|
||||
}
|
||||
/* n <- # of Python digits needed, = ceiling(n/SHIFT). */
|
||||
n = (n + SHIFT - 1) / SHIFT;
|
||||
n = n / SHIFT;
|
||||
z = _PyLong_New(n);
|
||||
if (z == NULL)
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -804,10 +804,22 @@ string_print(PyStringObject *op, FILE *fp, int flags)
|
|||
return ret;
|
||||
}
|
||||
if (flags & Py_PRINT_RAW) {
|
||||
char *data = op->ob_sval;
|
||||
Py_ssize_t size = op->ob_size;
|
||||
while (size > INT_MAX) {
|
||||
/* Very long strings cannot be written atomically.
|
||||
* But don't write exactly INT_MAX bytes at a time
|
||||
* to avoid memory aligment issues.
|
||||
*/
|
||||
const int chunk_size = INT_MAX & ~0x3FFF;
|
||||
fwrite(data, 1, chunk_size, fp);
|
||||
data += chunk_size;
|
||||
size -= chunk_size;
|
||||
}
|
||||
#ifdef __VMS
|
||||
if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
|
||||
if (size) fwrite(data, (int)size, 1, fp);
|
||||
#else
|
||||
fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
|
||||
fwrite(data, 1, (int)size, fp);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -844,7 +856,7 @@ PyString_Repr(PyObject *obj, int smartquotes)
|
|||
register PyStringObject* op = (PyStringObject*) obj;
|
||||
size_t newsize = 2 + 4 * op->ob_size;
|
||||
PyObject *v;
|
||||
if (newsize > PY_SSIZE_T_MAX) {
|
||||
if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"string is too large to make repr");
|
||||
}
|
||||
|
|
@ -4237,7 +4249,7 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
|
|||
return NULL;
|
||||
}
|
||||
llen = PyString_Size(result);
|
||||
if (llen > PY_SSIZE_T_MAX) {
|
||||
if (llen > INT_MAX) {
|
||||
PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -4726,9 +4738,10 @@ PyString_Format(PyObject *format, PyObject *args)
|
|||
default:
|
||||
PyErr_Format(PyExc_ValueError,
|
||||
"unsupported format character '%c' (0x%x) "
|
||||
"at index %i",
|
||||
"at index %zd",
|
||||
c, c,
|
||||
(int)(fmt - 1 - PyString_AsString(format)));
|
||||
(Py_ssize_t)(fmt - 1 -
|
||||
PyString_AsString(format)));
|
||||
goto error;
|
||||
}
|
||||
if (sign) {
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ type_module(PyTypeObject *type, void *context)
|
|||
s = strrchr(type->tp_name, '.');
|
||||
if (s != NULL)
|
||||
return PyString_FromStringAndSize(
|
||||
type->tp_name, (int)(s - type->tp_name));
|
||||
type->tp_name, (Py_ssize_t)(s - type->tp_name));
|
||||
return PyString_FromString("__builtin__");
|
||||
}
|
||||
}
|
||||
|
|
@ -4116,19 +4116,10 @@ slot_sq_length(PyObject *self)
|
|||
return -1;
|
||||
len = PyInt_AsSsize_t(res);
|
||||
Py_DECREF(res);
|
||||
if (len == -1 && PyErr_Occurred())
|
||||
return -1;
|
||||
#if SIZEOF_SIZE_T < SIZEOF_INT
|
||||
/* Overflow check -- range of PyInt is more than C ssize_t */
|
||||
if (len != (int)len) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"__len__() should return 0 <= outcome < 2**31");
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
if (len < 0) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"__len__() should return >= 0");
|
||||
if (!PyErr_Occurred())
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"__len__() should return >= 0");
|
||||
return -1;
|
||||
}
|
||||
return len;
|
||||
|
|
|
|||
|
|
@ -2384,6 +2384,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
|
|||
Py_UNICODE unimax = PyUnicode_GetMax();
|
||||
#endif
|
||||
|
||||
/* XXX overflow detection missing */
|
||||
v = _PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE);
|
||||
if (v == NULL)
|
||||
goto onError;
|
||||
|
|
@ -3170,6 +3171,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
|
|||
Py_ssize_t needed = (targetsize - extrachars) + \
|
||||
(targetsize << 2);
|
||||
extrachars += needed;
|
||||
/* XXX overflow detection missing */
|
||||
if (_PyUnicode_Resize(&v,
|
||||
PyUnicode_GET_SIZE(v) + needed) < 0) {
|
||||
Py_DECREF(x);
|
||||
|
|
@ -7762,10 +7764,11 @@ PyObject *PyUnicode_Format(PyObject *format,
|
|||
default:
|
||||
PyErr_Format(PyExc_ValueError,
|
||||
"unsupported format character '%c' (0x%x) "
|
||||
"at index %i",
|
||||
"at index %zd",
|
||||
(31<=c && c<=126) ? (char)c : '?',
|
||||
(int)c,
|
||||
(int)(fmt -1 - PyUnicode_AS_UNICODE(uformat)));
|
||||
(Py_ssize_t)(fmt - 1 -
|
||||
PyUnicode_AS_UNICODE(uformat)));
|
||||
goto onError;
|
||||
}
|
||||
if (sign) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue