From 7d7576f3262fa15c34d5575637bd8d7ff4a83f16 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Tue, 6 Jun 2023 14:06:19 -0700 Subject: [PATCH] Avoid underflow in prsqrt. --- Eigen/src/Core/MathFunctionsImpl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h index 5782db45e..86c06b2b1 100644 --- a/Eigen/src/Core/MathFunctionsImpl.h +++ b/Eigen/src/Core/MathFunctionsImpl.h @@ -87,11 +87,11 @@ struct generic_rsqrt_newton_step { Packet inv_sqrt = approx_rsqrt; for (int step = 0; step < Steps; ++step) { // Refine the approximation using one Newton-Raphson step: - // h_n = x * (inv_sqrt * inv_sqrt) - 1 (so that h_n is nearly 0). + // h_n = (x * inv_sqrt) * inv_sqrt - 1 (so that h_n is nearly 0). // inv_sqrt = inv_sqrt - 0.5 * inv_sqrt * h_n - Packet r2 = pmul(inv_sqrt, inv_sqrt); + Packet r2 = pmul(a, inv_sqrt); Packet half_r = pmul(inv_sqrt, cst_minus_half); - Packet h_n = pmadd(a, r2, cst_minus_one); + Packet h_n = pmadd(r2, inv_sqrt, cst_minus_one); inv_sqrt = pmadd(half_r, h_n, inv_sqrt); }