Sumprod(): Update citation. Reorder functions. Add final twosum() call. Improve comments. (#101249)

This commit is contained in:
Raymond Hettinger 2023-01-22 17:07:52 -06:00 committed by GitHub
parent 79af40a403
commit 997073c28b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -2833,34 +2833,20 @@ long_add_would_overflow(long a, long b)
/* /*
Double and triple length extended precision floating point arithmetic Double and triple length extended precision floating point arithmetic
based on ideas from three sources: based on:
Improved KahanBabuška algorithm by Arnold Neumaier
https://www.mat.univie.ac.at/~neum/scan/01.pdf
A Floating-Point Technique for Extending the Available Precision A Floating-Point Technique for Extending the Available Precision
by T. J. Dekker by T. J. Dekker
https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
Ultimately Fast Accurate Summation by Siegfried M. Rump Accurate Sum and Dot Product
https://www.tuhh.de/ti3/paper/rump/Ru08b.pdf by Takeshi Ogita, Siegfried M. Rump, and ShinIchi Oishi
https://doi.org/10.1137/030601818
Double length functions: https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
* dl_split() exact split of a C double into two half precision components.
* dl_mul() exact multiplication of two C doubles.
Triple length functions and constant:
* tl_zero is a triple length zero for starting or resetting an accumulation.
* tl_add() compensated addition of a C double to a triple length number.
* tl_fma() performs a triple length fused-multiply-add.
* tl_to_d() converts from triple length number back to a C double.
*/ */
typedef struct{ double hi; double lo; } DoubleLength; typedef struct{ double hi; double lo; } DoubleLength;
typedef struct{ double hi; double lo; double tiny; } TripleLength;
static const TripleLength tl_zero = {0.0, 0.0, 0.0};
static inline DoubleLength static inline DoubleLength
twosum(double a, double b) twosum(double a, double b)
@ -2874,25 +2860,9 @@ twosum(double a, double b)
return (DoubleLength) {s, t}; return (DoubleLength) {s, t};
} }
static inline TripleLength
tl_add(TripleLength total, double x)
{
/* Input: x total.hi total.lo total.tiny
|--- twosum ---|
s.hi s.lo
|--- twosum ---|
t.hi t.lo
|--- single sum ---|
Output: s.hi t.hi tiny
*/
DoubleLength s = twosum(x, total.hi);
DoubleLength t = twosum(s.lo, total.lo);
return (TripleLength) {s.hi, t.hi, t.lo + total.tiny};
}
static inline DoubleLength static inline DoubleLength
dl_split(double x) { dl_split(double x) {
double t = x * 134217729.0; /* Veltkamp constant = float(0x8000001) */ double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
double hi = t - (t - x); double hi = t - (t - x);
double lo = x - hi; double lo = x - hi;
return (DoubleLength) {hi, lo}; return (DoubleLength) {hi, lo};
@ -2911,6 +2881,18 @@ dl_mul(double x, double y)
return (DoubleLength) {z, zz}; return (DoubleLength) {z, zz};
} }
typedef struct{ double hi; double lo; double tiny; } TripleLength;
static const TripleLength tl_zero = {0.0, 0.0, 0.0};
static inline TripleLength
tl_add(TripleLength total, double x)
{
DoubleLength s = twosum(x, total.hi);
DoubleLength t = twosum(s.lo, total.lo);
return (TripleLength) {s.hi, t.hi, t.lo + total.tiny};
}
static inline TripleLength static inline TripleLength
tl_fma(TripleLength total, double p, double q) tl_fma(TripleLength total, double p, double q)
{ {
@ -2922,7 +2904,8 @@ tl_fma(TripleLength total, double p, double q)
static inline double static inline double
tl_to_d(TripleLength total) tl_to_d(TripleLength total)
{ {
return total.tiny + total.lo + total.hi; DoubleLength last = twosum(total.lo, total.hi);
return total.tiny + last.lo + last.hi;
} }
/*[clinic input] /*[clinic input]
@ -3039,7 +3022,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
} }
finalize_int_path: finalize_int_path:
// # We're finished, overflowed, or have a non-int // We're finished, overflowed, or have a non-int
int_path_enabled = false; int_path_enabled = false;
if (int_total_in_use) { if (int_total_in_use) {
term_i = PyLong_FromLong(int_total); term_i = PyLong_FromLong(int_total);