Simplify and improve accuracy for subnormals in hypot() (GH-102785)

2025-09-21 08:00:37 +00:00 · 2023-03-17 14:06:52 -05:00 · 2023-03-17 14:06:52 -05:00 · 72186aa637
commit 72186aa637
parent 174c4bfd0f
1 changed files with 30 additions and 37 deletions
--- a/Modules/mathmodule.c
+++ b/Modules/mathmodule.c
@ -2498,7 +2498,7 @@ References:
 static inline double
 vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
 {
-    double x, h, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
+    double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
    DoubleLength pr, sm;
    int max_e;
    Py_ssize_t i;
@ -2513,7 +2513,16 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
        return max;
    }
    frexp(max, &max_e);
-    if (max_e >= -1023) {
+    if (max_e < -1023) {
        /* When max_e < -1023, ldexp(1.0, -max_e) would overflow.
           So we first perform lossless scaling from subnormals back to normals,
           then recurse back to vector_norm(), and then finally undo the scaling.
        */
        for (i=0 ; i < n ; i++) {
            vec[i] /= DBL_MIN;
        }
        return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan);
    }
    scale = ldexp(1.0, -max_e);
    assert(max * scale >= 0.5);
    assert(max * scale < 1.0);
@ -2541,22 +2550,6 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
    x = csum - 1.0 + (frac1 + frac2);
    return (h + x / (2.0 * h)) / scale;
 }
    /* When max_e < -1023, ldexp(1.0, -max_e) overflows.
       So instead of multiplying by a scale, we just divide by *max*.
    */
    for (i=0 ; i < n ; i++) {
        x = vec[i];
        assert(Py_IS_FINITE(x) && fabs(x) <= max);
        x /= max;
        x = x*x;
        assert(x <= 1.0);
        assert(fabs(csum) >= fabs(x));
        oldcsum = csum;
        csum += x;
        frac1 += (oldcsum - csum) + x;
    }
    return max * sqrt(csum - 1.0 + frac1);
 }
 #define NUM_STACK_ELEMS 16