Sumprod(): Update citation. Reorder functions. Add final twosum() call. Improve comments. (#101249)

2025-12-04 08:34:25 +00:00 · 2023-01-22 17:07:52 -06:00 · 2023-01-22 17:07:52 -06:00 · 997073c28b
commit 997073c28b
parent 79af40a403
1 changed files with 21 additions and 38 deletions
--- a/Modules/mathmodule.c
+++ b/Modules/mathmodule.c
@ -2833,34 +2833,20 @@ long_add_would_overflow(long a, long b)
 /*
 Double and triple length extended precision floating point arithmetic
-based on ideas from three sources:
+based on:
  Improved Kahan–Babuška algorithm by Arnold Neumaier
  https://www.mat.univie.ac.at/~neum/scan/01.pdf
  A Floating-Point Technique for Extending the Available Precision
  by T. J. Dekker
  https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
-  Ultimately Fast Accurate Summation by Siegfried M. Rump
+  Accurate Sum and Dot Product
-  https://www.tuhh.de/ti3/paper/rump/Ru08b.pdf
+  by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
-
+  https://doi.org/10.1137/030601818
-Double length functions:
+  https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
 * dl_split() exact split of a C double into two half precision components.
 * dl_mul() exact multiplication of two C doubles.
 Triple length functions and constant:
 * tl_zero is a triple length zero for starting or resetting an accumulation.
 * tl_add() compensated addition of a C double to a triple length number.
 * tl_fma() performs a triple length fused-multiply-add.
 * tl_to_d() converts from triple length number back to a C double.
 */
 typedef struct{ double hi; double lo; } DoubleLength;
 typedef struct{ double hi; double lo; double tiny; } TripleLength;
 static const TripleLength tl_zero = {0.0, 0.0, 0.0};
 static inline DoubleLength
 twosum(double a, double b)
@ -2874,25 +2860,9 @@ twosum(double a, double b)
    return  (DoubleLength) {s, t};
 }
 static inline TripleLength
 tl_add(TripleLength total, double x)
 {
    /* Input:       x     total.hi   total.lo    total.tiny
                   |--- twosum ---|
                    s.hi      s.lo
                             |--- twosum ---|
                              t.hi      t.lo
                                       |--- single sum ---|
       Output:      s.hi     t.hi       tiny
     */
    DoubleLength s = twosum(x, total.hi);
    DoubleLength t = twosum(s.lo, total.lo);
    return (TripleLength) {s.hi, t.hi, t.lo + total.tiny};
 }
 static inline DoubleLength
 dl_split(double x) {
-    double t = x * 134217729.0;  /* Veltkamp constant = float(0x8000001) */
+    double t = x * 134217729.0;  // Veltkamp constant = 2.0 ** 27 + 1
    double hi = t - (t - x);
    double lo = x - hi;
    return (DoubleLength) {hi, lo};
@ -2911,6 +2881,18 @@ dl_mul(double x, double y)
    return (DoubleLength) {z, zz};
 }
 typedef struct{ double hi; double lo; double tiny; } TripleLength;
 static const TripleLength tl_zero = {0.0, 0.0, 0.0};
 static inline TripleLength
 tl_add(TripleLength total, double x)
 {
    DoubleLength s = twosum(x, total.hi);
    DoubleLength t = twosum(s.lo, total.lo);
    return (TripleLength) {s.hi, t.hi, t.lo + total.tiny};
 }
 static inline TripleLength
 tl_fma(TripleLength total, double p, double q)
 {
@ -2922,7 +2904,8 @@ tl_fma(TripleLength total, double p, double q)
 static inline double
 tl_to_d(TripleLength total)
 {
-    return total.tiny + total.lo + total.hi;
+    DoubleLength last = twosum(total.lo, total.hi);
    return total.tiny + last.lo + last.hi;
 }
 /*[clinic input]
@ -3039,7 +3022,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
            }
          finalize_int_path:
-            //  # We're finished, overflowed, or have a non-int
+            // We're finished, overflowed, or have a non-int
            int_path_enabled = false;
            if (int_total_in_use) {
                term_i = PyLong_FromLong(int_total);