diff options
author | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2012-11-06 17:31:45 +0000 |
---|---|---|
committer | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2012-11-06 17:31:45 +0000 |
commit | 5c8ae23aecdb14ee22ba06684c488cfe0306ff0e (patch) | |
tree | daf286cd6c5edb7441d779682e09e8dc511e57c9 /libc/sysdeps/ieee754/ldbl-128 | |
parent | db0fbac046813774566dfc025932d4e8c0a35640 (diff) |
Merge changes between r21352 and r21563 from /fsf/trunk.
git-svn-id: svn://svn.eglibc.org/trunk@21564 7b3dc134-2b1b-0410-93df-9e9f96275f8d
Diffstat (limited to 'libc/sysdeps/ieee754/ldbl-128')
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-128/s_fmal.c | 137 |
1 files changed, 99 insertions, 38 deletions
diff --git a/libc/sysdeps/ieee754/ldbl-128/s_fmal.c b/libc/sysdeps/ieee754/ldbl-128/s_fmal.c index 46b3d81ce..c9accad8a 100644 --- a/libc/sysdeps/ieee754/ldbl-128/s_fmal.c +++ b/libc/sysdeps/ieee754/ldbl-128/s_fmal.c @@ -22,6 +22,7 @@ #include <fenv.h> #include <ieee754.h> #include <math_private.h> +#include <tininess.h> /* This implementation uses rounding to odd to avoid problems with double rounding. See a paper by Boldo and Melquiond: @@ -55,17 +56,49 @@ __fmal (long double x, long double y, long double z) underflows to 0. */ if (z == 0 && x != 0 && y != 0) return x * y; - /* If x or y or z is Inf/NaN, or if fma will certainly overflow, - or if x * y is less than half of LDBL_DENORM_MIN, - compute as x * y + z. */ + /* If x or y or z is Inf/NaN, or if x * y is zero, compute as + x * y + z. */ if (u.ieee.exponent == 0x7fff || v.ieee.exponent == 0x7fff || w.ieee.exponent == 0x7fff - || u.ieee.exponent + v.ieee.exponent - > 0x7fff + IEEE854_LONG_DOUBLE_BIAS - || u.ieee.exponent + v.ieee.exponent - < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2) + || x == 0 + || y == 0) return x * y + z; + /* If fma will certainly overflow, compute as x * y. */ + if (u.ieee.exponent + v.ieee.exponent + > 0x7fff + IEEE854_LONG_DOUBLE_BIAS) + return x * y; + /* If x * y is less than 1/4 of LDBL_DENORM_MIN, neither the + result nor whether there is underflow depends on its exact + value, only on its sign. */ + if (u.ieee.exponent + v.ieee.exponent + < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2) + { + int neg = u.ieee.negative ^ v.ieee.negative; + long double tiny = neg ? -0x1p-16494L : 0x1p-16494L; + if (w.ieee.exponent >= 3) + return tiny + z; + /* Scaling up, adding TINY and scaling down produces the + correct result, because in round-to-nearest mode adding + TINY has no effect and in other modes double rounding is + harmless. But it may not produce required underflow + exceptions. */ + v.d = z * 0x1p114L + tiny; + if (TININESS_AFTER_ROUNDING + ? v.ieee.exponent < 115 + : (w.ieee.exponent == 0 + || (w.ieee.exponent == 1 + && w.ieee.negative != neg + && w.ieee.mantissa3 == 0 + && w.ieee.mantissa2 == 0 + && w.ieee.mantissa1 == 0 + && w.ieee.mantissa0 == 0))) + { + volatile long double force_underflow = x * y; + (void) force_underflow; + } + return v.d * 0x1p-114L; + } if (u.ieee.exponent + v.ieee.exponent >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG) { @@ -85,8 +118,17 @@ __fmal (long double x, long double y, long double z) { /* Similarly. If z exponent is very large and x and y exponents are - very small, it doesn't matter if we don't adjust it. */ - if (u.ieee.exponent > v.ieee.exponent) + very small, adjust them up to avoid spurious underflows, + rather than down. */ + if (u.ieee.exponent + v.ieee.exponent + <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) + { + if (u.ieee.exponent > v.ieee.exponent) + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + else + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + } + else if (u.ieee.exponent > v.ieee.exponent) { if (u.ieee.exponent > LDBL_MANT_DIG) u.ieee.exponent -= LDBL_MANT_DIG; @@ -116,15 +158,15 @@ __fmal (long double x, long double y, long double z) <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */ { if (u.ieee.exponent > v.ieee.exponent) - u.ieee.exponent += 2 * LDBL_MANT_DIG; + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - v.ieee.exponent += 2 * LDBL_MANT_DIG; - if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4) + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6) { if (w.ieee.exponent) - w.ieee.exponent += 2 * LDBL_MANT_DIG; + w.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - w.d *= 0x1p226L; + w.d *= 0x1p228L; adjust = -1; } /* Otherwise x * y should just affect inexact @@ -139,6 +181,10 @@ __fmal (long double x, long double y, long double z) if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) return x * y + z; + fenv_t env; + feholdexcept (&env); + fesetround (FE_TONEAREST); + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = x * C; @@ -157,9 +203,19 @@ __fmal (long double x, long double y, long double z) t1 = m1 - t1; t2 = z - t2; long double a2 = t1 + t2; + feclearexcept (FE_INEXACT); + + /* If the result is an exact zero, ensure it has the correct + sign. */ + if (a1 == 0 && m2 == 0) + { + feupdateenv (&env); + /* Ensure that round-to-nearest value of z + m1 is not + reused. */ + asm volatile ("" : "=m" (z) : "m" (z)); + return z + m1; + } - fenv_t env; - feholdexcept (&env); fesetround (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ u.d = a2 + m2; @@ -195,39 +251,44 @@ __fmal (long double x, long double y, long double z) /* If a1 + u.d is exact, the only rounding happens during scaling down. */ if (j == 0) - return v.d * 0x1p-226L; + return v.d * 0x1p-228L; /* If result rounded to zero is not subnormal, no double rounding will occur. */ - if (v.ieee.exponent > 226) - return (a1 + u.d) * 0x1p-226L; - /* If v.d * 0x1p-226L with round to zero is a subnormal above - or equal to LDBL_MIN / 2, then v.d * 0x1p-226L shifts mantissa + if (v.ieee.exponent > 228) + return (a1 + u.d) * 0x1p-228L; + /* If v.d * 0x1p-228L with round to zero is a subnormal above + or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa down just by 1 bit, which means v.ieee.mantissa3 |= j would change the round bit, not sticky or guard bit. - v.d * 0x1p-226L never normalizes by shifting up, + v.d * 0x1p-228L never normalizes by shifting up, so round bit plus sticky bit should be already enough for proper rounding. */ - if (v.ieee.exponent == 226) + if (v.ieee.exponent == 228) { - /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding, - v.ieee.mantissa3 & 1 is the round bit and j is our sticky - bit. In round-to-nearest 001 rounds down like 00, - 011 rounds up, even though 01 rounds down (thus we need - to adjust), 101 rounds down like 10 and 111 rounds up - like 11. */ - if ((v.ieee.mantissa3 & 3) == 1) + /* If the exponent would be in the normal range when + rounding to normal precision with unbounded exponent + range, the exact result is known and spurious underflows + must be avoided on systems detecting tininess after + rounding. */ + if (TININESS_AFTER_ROUNDING) { - v.d *= 0x1p-226L; - if (v.ieee.negative) - return v.d - 0x1p-16494L /* __LDBL_DENORM_MIN__ */; - else - return v.d + 0x1p-16494L /* __LDBL_DENORM_MIN__ */; + w.d = a1 + u.d; + if (w.ieee.exponent == 229) + return w.d * 0x1p-228L; } - else - return v.d * 0x1p-226L; + /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding, + v.ieee.mantissa3 & 1 is the round bit and j is our sticky + bit. */ + w.d = 0.0L; + w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j; + w.ieee.negative = v.ieee.negative; + v.ieee.mantissa3 &= ~3U; + v.d *= 0x1p-228L; + w.d *= 0x1p-2L; + return v.d + w.d; } v.ieee.mantissa3 |= j; - return v.d * 0x1p-226L; + return v.d * 0x1p-228L; } } weak_alias (__fmal, fmal) |