Avoid signed overflow in some xrange calculations, and extend

xrange tests to cover some special cases that caused problems in py3k. This is a partial backport of r76292-76293 (see issue #7298.)
2025-09-27 10:50:04 +00:00 · 2009-11-15 12:31:13 +00:00 · 2009-11-15 12:31:13 +00:00 · 009ae861f2
commit 009ae861f2
parent 20eb4f0782
2 changed files with 112 additions and 34 deletions
--- a/Lib/test/test_xrange.py
+++ b/Lib/test/test_xrange.py
@ -3,12 +3,49 @@
 import test.test_support, unittest
 import sys
 import pickle
 import itertools
 import warnings
 warnings.filterwarnings("ignore", "integer argument expected",
                        DeprecationWarning, "unittest")
 # pure Python implementations (3 args only), for comparison
 def pyrange(start, stop, step):
    if (start - stop) // step < 0:
        # replace stop with next element in the sequence of integers
        # that are congruent to start modulo step.
        stop += (start - stop) % step
        while start != stop:
            yield start
            start += step
 def pyrange_reversed(start, stop, step):
    stop += (start - stop) % step
    return pyrange(stop - step, start - step, -step)
 class XrangeTest(unittest.TestCase):
    def assert_iterators_equal(self, xs, ys, test_id, limit=None):
        # check that an iterator xs matches the expected results ys,
        # up to a given limit.
        if limit is not None:
            xs = itertools.islice(xs, limit)
            ys = itertools.islice(ys, limit)
        sentinel = object()
        pairs = itertools.izip_longest(xs, ys, fillvalue=sentinel)
        for i, (x, y) in enumerate(pairs):
            if x == y:
                continue
            elif x == sentinel:
                self.fail('{}: iterator ended unexpectedly '
                          'at position {}; expected {}'.format(test_id, i, y))
            elif y == sentinel:
                self.fail('{}: unexpected excess element {} at '
                          'position {}'.format(test_id, x, i))
            else:
                self.fail('{}: wrong element at position {};'
                          'expected {}, got {}'.format(test_id, i, y, x))
    def test_xrange(self):
        self.assertEqual(list(xrange(3)), [0, 1, 2])
        self.assertEqual(list(xrange(1, 5)), [1, 2, 3, 4])
@ -67,6 +104,37 @@ class XrangeTest(unittest.TestCase):
                self.assertEquals(list(pickle.loads(pickle.dumps(r, proto))),
                                  list(r))
    def test_range_iterators(self):
        # see issue 7298
        limits = [base + jiggle
                  for M in (2**32, 2**64)
                  for base in (-M, -M//2, 0, M//2, M)
                  for jiggle in (-2, -1, 0, 1, 2)]
        test_ranges = [(start, end, step)
                       for start in limits
                       for end in limits
                       for step in (-2**63, -2**31, -2, -1, 1, 2)]
        for start, end, step in test_ranges:
            try:
                iter1 = xrange(start, end, step)
            except OverflowError:
                pass
            else:
                iter2 = pyrange(start, end, step)
                test_id = "xrange({}, {}, {})".format(start, end, step)
                # check first 100 entries
                self.assert_iterators_equal(iter1, iter2, test_id, limit=100)
            try:
                iter1 = reversed(xrange(start, end, step))
            except OverflowError:
                pass
            else:
                iter2 = pyrange_reversed(start, end, step)
                test_id = "reversed(xrange({}, {}, {}))".format(start, end, step)
                self.assert_iterators_equal(iter1, iter2, test_id, limit=100)
 def test_main():
    test.test_support.run_unittest(XrangeTest)
--- a/Objects/rangeobject.c
+++ b/Objects/rangeobject.c
@ -9,16 +9,15 @@ typedef struct {
 	long	len;
 } rangeobject;
-/* Return number of items in range/xrange (lo, hi, step).  step > 0
+/* Return number of items in range (lo, hi, step).  step != 0
- * required.  Return a value < 0 if & only if the true value is too
+ * required.  The result always fits in an unsigned long.
 * large to fit in a signed long.
 */
-static long
+static unsigned long
 get_len_of_range(long lo, long hi, long step)
 {
    /* -------------------------------------------------------------
-	If lo >= hi, the range is empty.
+    If step > 0 and lo >= hi, or step < 0 and lo <= hi, the range is empty.
-	Else if n values are in the range, the last one is
+    Else for step > 0, if n values are in the range, the last one is
    lo + (n-1)*step, which must be <= hi-1.  Rearranging,
    n <= (hi - lo - 1)/step + 1, so taking the floor of the RHS gives
    the proper value.  Since lo < hi in this case, hi-lo-1 >= 0, so
@ -26,16 +25,16 @@ get_len_of_range(long lo, long hi, long step)
    floor.  Letting M be the largest positive long, the worst case
    for the RHS numerator is hi=M, lo=-M-1, and then
    hi-lo-1 = M-(-M-1)-1 = 2*M.  Therefore unsigned long has enough
-	precision to compute the RHS exactly.
+    precision to compute the RHS exactly.  The analysis for step < 0
    is similar.
    ---------------------------------------------------------------*/
-	long n = 0;
+    assert(step != 0);
-	if (lo < hi) {
+    if (step > 0 && lo < hi)
-		unsigned long uhi = (unsigned long)hi;
+        return 1UL + (hi - 1UL - lo) / step;
-		unsigned long ulo = (unsigned long)lo;
+    else if (step < 0 && lo > hi)
-		unsigned long diff = uhi - ulo - 1;
+        return 1UL + (lo - 1UL - hi) / (0UL - step);
-		n = (long)(diff / (unsigned long)step + 1);
+    else
-	}
+        return 0UL;
 	return n;
 }
 static PyObject *
@ -43,7 +42,7 @@ range_new(PyTypeObject *type, PyObject *args, PyObject *kw)
 {
 	rangeobject *obj;
 	long ilow = 0, ihigh = 0, istep = 1;
-	long n;
+	unsigned long n;
 	if (!_PyArg_NoKeywords("xrange()", kw))
 		return NULL;
@ -64,11 +63,8 @@ range_new(PyTypeObject *type, PyObject *args, PyObject *kw)
 		PyErr_SetString(PyExc_ValueError, "xrange() arg 3 must not be zero");
 		return NULL;
 	}
 	if (istep > 0)
 	n = get_len_of_range(ilow, ihigh, istep);
-	else
+	if (n > (unsigned long)LONG_MAX || (long)n > PY_SSIZE_T_MAX) {
 		n = get_len_of_range(ihigh, ilow, -istep);
 	if (n < 0) {
 		PyErr_SetString(PyExc_OverflowError,
 				"xrange() result has too many items");
 		return NULL;
@ -78,7 +74,7 @@ range_new(PyTypeObject *type, PyObject *args, PyObject *kw)
 	if (obj == NULL)
 		return NULL;
 	obj->start = ilow;
-	obj->len   = n;
+	obj->len   = (long)n;
 	obj->step  = istep;
 	return (PyObject *) obj;
 }
@ -98,7 +94,9 @@ range_item(rangeobject *r, Py_ssize_t i)
 				"xrange object index out of range");
 		return NULL;
 	}
-	return PyInt_FromSsize_t(r->start + i * r->step);
+	/* do calculation entirely using unsigned longs, to avoid
 	   undefined behaviour due to signed overflow. */
 	return PyInt_FromLong((long)(r->start + (unsigned long)i * r->step));
 }
 static Py_ssize_t
@ -304,9 +302,21 @@ range_reverse(PyObject *seq)
 	len = ((rangeobject *)seq)->len;
 	it->index = 0;
 	it->start = start + (len-1) * step;
 	it->step = -step;
 	it->len = len;
 	/* the casts below guard against signed overflow by turning it
 	   into unsigned overflow instead.  The correctness of this
 	   code still depends on conversion from unsigned long to long
 	   wrapping modulo ULONG_MAX+1, which isn't guaranteed (see
 	   C99 6.3.1.3p3) but seems to hold in practice for all
 	   platforms we're likely to meet.
 	   If step == LONG_MIN then we still end up with LONG_MIN
 	   after negation; but this works out, since we've still got
 	   the correct value modulo ULONG_MAX+1, and the range_item
 	   calculation is also done modulo ULONG_MAX+1.
 	*/
 	it->start = (long)(start + (unsigned long)(len-1) * step);
 	it->step = (long)(-(unsigned long)step);
 	return (PyObject *)it;
 }