mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
GH-100485: Add extended accuracy test. Switch to faster fma() based variant. GH-101383)
This commit is contained in:
parent
db757f0e44
commit
84483aacc0
2 changed files with 100 additions and 36 deletions
|
@ -2832,12 +2832,7 @@ long_add_would_overflow(long a, long b)
|
|||
}
|
||||
|
||||
/*
|
||||
Double and triple length extended precision floating point arithmetic
|
||||
based on:
|
||||
|
||||
A Floating-Point Technique for Extending the Available Precision
|
||||
by T. J. Dekker
|
||||
https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
|
||||
Double and triple length extended precision algorithms from:
|
||||
|
||||
Accurate Sum and Dot Product
|
||||
by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
|
||||
|
@ -2848,36 +2843,22 @@ based on:
|
|||
|
||||
typedef struct{ double hi; double lo; } DoubleLength;
|
||||
|
||||
static inline DoubleLength
|
||||
twosum(double a, double b)
|
||||
static DoubleLength
|
||||
dl_sum(double a, double b)
|
||||
{
|
||||
// Rump Algorithm 3.1 Error-free transformation of the sum
|
||||
/* Algorithm 3.1 Error-free transformation of the sum */
|
||||
double x = a + b;
|
||||
double z = x - a;
|
||||
double y = (a - (x - z)) + (b - z);
|
||||
return (DoubleLength) {x, y};
|
||||
}
|
||||
|
||||
static inline DoubleLength
|
||||
dl_split(double x) {
|
||||
// Rump Algorithm 3.2 Error-free splitting of a floating point number
|
||||
// Dekker (5.5) and (5.6).
|
||||
double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
|
||||
double hi = t - (t - x);
|
||||
double lo = x - hi;
|
||||
return (DoubleLength) {hi, lo};
|
||||
}
|
||||
|
||||
static inline DoubleLength
|
||||
static DoubleLength
|
||||
dl_mul(double x, double y)
|
||||
{
|
||||
// Dekker (5.12) and mul12()
|
||||
DoubleLength xx = dl_split(x);
|
||||
DoubleLength yy = dl_split(y);
|
||||
double p = xx.hi * yy.hi;
|
||||
double q = xx.hi * yy.lo + xx.lo * yy.hi;
|
||||
double z = p + q;
|
||||
double zz = p - z + q + xx.lo * yy.lo;
|
||||
/* Algorithm 3.5. Error-free transformation of a product */
|
||||
double z = x * y;
|
||||
double zz = fma(x, y, -z);
|
||||
return (DoubleLength) {z, zz};
|
||||
}
|
||||
|
||||
|
@ -2885,21 +2866,21 @@ typedef struct { double hi; double lo; double tiny; } TripleLength;
|
|||
|
||||
static const TripleLength tl_zero = {0.0, 0.0, 0.0};
|
||||
|
||||
static inline TripleLength
|
||||
tl_fma(TripleLength total, double x, double y)
|
||||
static TripleLength
|
||||
tl_fma(double x, double y, TripleLength total)
|
||||
{
|
||||
// Rump Algorithm 5.10 with K=3 and using SumKVert
|
||||
/* Algorithm 5.10 with SumKVert for K=3 */
|
||||
DoubleLength pr = dl_mul(x, y);
|
||||
DoubleLength sm = twosum(total.hi, pr.hi);
|
||||
DoubleLength r1 = twosum(total.lo, pr.lo);
|
||||
DoubleLength r2 = twosum(r1.hi, sm.lo);
|
||||
DoubleLength sm = dl_sum(total.hi, pr.hi);
|
||||
DoubleLength r1 = dl_sum(total.lo, pr.lo);
|
||||
DoubleLength r2 = dl_sum(r1.hi, sm.lo);
|
||||
return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo};
|
||||
}
|
||||
|
||||
static inline double
|
||||
static double
|
||||
tl_to_d(TripleLength total)
|
||||
{
|
||||
DoubleLength last = twosum(total.lo, total.hi);
|
||||
DoubleLength last = dl_sum(total.lo, total.hi);
|
||||
return total.tiny + last.lo + last.hi;
|
||||
}
|
||||
|
||||
|
@ -3066,7 +3047,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
|
|||
} else {
|
||||
goto finalize_flt_path;
|
||||
}
|
||||
TripleLength new_flt_total = tl_fma(flt_total, flt_p, flt_q);
|
||||
TripleLength new_flt_total = tl_fma(flt_p, flt_q, flt_total);
|
||||
if (isfinite(new_flt_total.hi)) {
|
||||
flt_total = new_flt_total;
|
||||
flt_total_in_use = true;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue