mirror of
https://github.com/python/cpython.git
synced 2025-12-04 08:34:25 +00:00
Sumprod(): Update citation. Reorder functions. Add final twosum() call. Improve comments. (#101249)
This commit is contained in:
parent
79af40a403
commit
997073c28b
1 changed files with 21 additions and 38 deletions
|
|
@ -2833,34 +2833,20 @@ long_add_would_overflow(long a, long b)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Double and triple length extended precision floating point arithmetic
|
Double and triple length extended precision floating point arithmetic
|
||||||
based on ideas from three sources:
|
based on:
|
||||||
|
|
||||||
Improved Kahan–Babuška algorithm by Arnold Neumaier
|
|
||||||
https://www.mat.univie.ac.at/~neum/scan/01.pdf
|
|
||||||
|
|
||||||
A Floating-Point Technique for Extending the Available Precision
|
A Floating-Point Technique for Extending the Available Precision
|
||||||
by T. J. Dekker
|
by T. J. Dekker
|
||||||
https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
|
https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
|
||||||
|
|
||||||
Ultimately Fast Accurate Summation by Siegfried M. Rump
|
Accurate Sum and Dot Product
|
||||||
https://www.tuhh.de/ti3/paper/rump/Ru08b.pdf
|
by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
|
||||||
|
https://doi.org/10.1137/030601818
|
||||||
Double length functions:
|
https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
|
||||||
* dl_split() exact split of a C double into two half precision components.
|
|
||||||
* dl_mul() exact multiplication of two C doubles.
|
|
||||||
|
|
||||||
Triple length functions and constant:
|
|
||||||
* tl_zero is a triple length zero for starting or resetting an accumulation.
|
|
||||||
* tl_add() compensated addition of a C double to a triple length number.
|
|
||||||
* tl_fma() performs a triple length fused-multiply-add.
|
|
||||||
* tl_to_d() converts from triple length number back to a C double.
|
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
typedef struct{ double hi; double lo; } DoubleLength;
|
typedef struct{ double hi; double lo; } DoubleLength;
|
||||||
typedef struct{ double hi; double lo; double tiny; } TripleLength;
|
|
||||||
|
|
||||||
static const TripleLength tl_zero = {0.0, 0.0, 0.0};
|
|
||||||
|
|
||||||
static inline DoubleLength
|
static inline DoubleLength
|
||||||
twosum(double a, double b)
|
twosum(double a, double b)
|
||||||
|
|
@ -2874,25 +2860,9 @@ twosum(double a, double b)
|
||||||
return (DoubleLength) {s, t};
|
return (DoubleLength) {s, t};
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline TripleLength
|
|
||||||
tl_add(TripleLength total, double x)
|
|
||||||
{
|
|
||||||
/* Input: x total.hi total.lo total.tiny
|
|
||||||
|--- twosum ---|
|
|
||||||
s.hi s.lo
|
|
||||||
|--- twosum ---|
|
|
||||||
t.hi t.lo
|
|
||||||
|--- single sum ---|
|
|
||||||
Output: s.hi t.hi tiny
|
|
||||||
*/
|
|
||||||
DoubleLength s = twosum(x, total.hi);
|
|
||||||
DoubleLength t = twosum(s.lo, total.lo);
|
|
||||||
return (TripleLength) {s.hi, t.hi, t.lo + total.tiny};
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline DoubleLength
|
static inline DoubleLength
|
||||||
dl_split(double x) {
|
dl_split(double x) {
|
||||||
double t = x * 134217729.0; /* Veltkamp constant = float(0x8000001) */
|
double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
|
||||||
double hi = t - (t - x);
|
double hi = t - (t - x);
|
||||||
double lo = x - hi;
|
double lo = x - hi;
|
||||||
return (DoubleLength) {hi, lo};
|
return (DoubleLength) {hi, lo};
|
||||||
|
|
@ -2911,6 +2881,18 @@ dl_mul(double x, double y)
|
||||||
return (DoubleLength) {z, zz};
|
return (DoubleLength) {z, zz};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct{ double hi; double lo; double tiny; } TripleLength;
|
||||||
|
|
||||||
|
static const TripleLength tl_zero = {0.0, 0.0, 0.0};
|
||||||
|
|
||||||
|
static inline TripleLength
|
||||||
|
tl_add(TripleLength total, double x)
|
||||||
|
{
|
||||||
|
DoubleLength s = twosum(x, total.hi);
|
||||||
|
DoubleLength t = twosum(s.lo, total.lo);
|
||||||
|
return (TripleLength) {s.hi, t.hi, t.lo + total.tiny};
|
||||||
|
}
|
||||||
|
|
||||||
static inline TripleLength
|
static inline TripleLength
|
||||||
tl_fma(TripleLength total, double p, double q)
|
tl_fma(TripleLength total, double p, double q)
|
||||||
{
|
{
|
||||||
|
|
@ -2922,7 +2904,8 @@ tl_fma(TripleLength total, double p, double q)
|
||||||
static inline double
|
static inline double
|
||||||
tl_to_d(TripleLength total)
|
tl_to_d(TripleLength total)
|
||||||
{
|
{
|
||||||
return total.tiny + total.lo + total.hi;
|
DoubleLength last = twosum(total.lo, total.hi);
|
||||||
|
return total.tiny + last.lo + last.hi;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
|
|
@ -3039,7 +3022,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
|
||||||
}
|
}
|
||||||
|
|
||||||
finalize_int_path:
|
finalize_int_path:
|
||||||
// # We're finished, overflowed, or have a non-int
|
// We're finished, overflowed, or have a non-int
|
||||||
int_path_enabled = false;
|
int_path_enabled = false;
|
||||||
if (int_total_in_use) {
|
if (int_total_in_use) {
|
||||||
term_i = PyLong_FromLong(int_total);
|
term_i = PyLong_FromLong(int_total);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue