diff options
Diffstat (limited to 'libgcc/config/libbid/bid64_round_integral.c')
-rw-r--r-- | libgcc/config/libbid/bid64_round_integral.c | 534 |
1 files changed, 290 insertions, 244 deletions
diff --git a/libgcc/config/libbid/bid64_round_integral.c b/libgcc/config/libbid/bid64_round_integral.c index bbc981e4787..c777ed81195 100644 --- a/libgcc/config/libbid/bid64_round_integral.c +++ b/libgcc/config/libbid/bid64_round_integral.c @@ -34,7 +34,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #if DECIMAL_CALL_BY_REFERENCE void -__bid64_round_integral_exact (UINT64 * pres, +bid64_round_integral_exact (UINT64 * pres, UINT64 * px _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { @@ -44,45 +44,53 @@ __bid64_round_integral_exact (UINT64 * pres, #endif #else UINT64 -__bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM +bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { #endif UINT64 res = 0xbaddbaddbaddbaddull; UINT64 x_sign; - int exp; // unbiased exponent + int exp; // unbiased exponent // Note: C1 represents the significand (UINT64) BID_UI64DOUBLE tmp1; int x_nr_bits; int q, ind, shift; UINT64 C1; // UINT64 res is C* at first - represents up to 16 decimal digits <= 54 bits - UINT128 fstar = {{ 0x0ull, 0x0ull }};; + UINT128 fstar = { {0x0ull, 0x0ull} }; UINT128 P128; - if ((x & MASK_INF) == MASK_INF) { // x is either INF or NAN - res = x; - if ((x & MASK_SNAN) == MASK_SNAN) { + x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative + + // check for NaNs and infinities + if ((x & MASK_NAN) == MASK_NAN) { // check for NaN + if ((x & 0x0003ffffffffffffull) > 999999999999999ull) + x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits + else + x = x & 0xfe03ffffffffffffull; // clear G6-G12 + if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN // set invalid flag *pfpsf |= INVALID_EXCEPTION; - // return Quiet (SNaN) + // return quiet (SNaN) res = x & 0xfdffffffffffffffull; + } else { // QNaN + res = x; } - // return original input if QNaN or INF, quietize if SNaN + BID_RETURN (res); + } else if ((x & MASK_INF) == MASK_INF) { // check for Infinity + res = x_sign | 0x7800000000000000ull; BID_RETURN (res); } // unpack x - x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) { // if the steering bits are 11 (condition will be 0), then // the exponent is G[0:w+1] exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398; C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2; - if (C1 > 9999999999999999ull) { // non-canonical - exp = 0; + if (C1 > 9999999999999999ull) { // non-canonical C1 = 0; } - } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) + } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398; C1 = (x & MASK_BINARY_SIG1); } @@ -139,25 +147,25 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM BID_RETURN (res); } break; - } // end switch () + } // end switch () // q = nr. of decimal digits in x (1 <= q <= 54) // determine first the nr. of bits in x - if (C1 >= 0x0020000000000000ull) { // x >= 2^53 + if (C1 >= 0x0020000000000000ull) { // x >= 2^53 q = 16; - } else { // if x < 2^53 + } else { // if x < 2^53 tmp1.d = (double) C1; // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - q = __bid_nr_digits[x_nr_bits - 1].digits; + q = nr_digits[x_nr_bits - 1].digits; if (q == 0) { - q = __bid_nr_digits[x_nr_bits - 1].digits1; - if (C1 >= __bid_nr_digits[x_nr_bits - 1].threshold_lo) + q = nr_digits[x_nr_bits - 1].digits1; + if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo) q++; } } - if (exp >= 0) { // -exp <= 0 + if (exp >= 0) { // -exp <= 0 // the argument is an integer already res = x; BID_RETURN (res); @@ -165,22 +173,22 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM switch (rnd_mode) { case ROUNDING_TO_NEAREST: - if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q + if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q // need to shift right -exp digits from the coefficient; exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate - C1 = C1 + __bid_midpoint64[ind - 1]; + C1 = C1 + midpoint64[ind - 1]; // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] + // shiftright128[] and maskhigh128[] // 1 <= x <= 16 - // kx = 10^(-x) = __bid_ten2mk64[ind - 1] + // kx = 10^(-x) = ten2mk64[ind - 1] // C* = (C1 + 1/2 * 10^x) * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits - __mul_64x64_to_128 (P128, C1, __bid_ten2mk64[ind - 1]); + __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // if (0 < f* < 10^(-x)) then the result is a midpoint // if floor(C*) is even then C* = floor(C*) - logical right @@ -192,20 +200,20 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM // correct by Property 1) // n = C* * 10^(e+x) - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); - fstar.w[1] = P128.w[1] & __bid_maskhigh128[ind - 1]; + fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // if (0 < f* < 10^(-x)) then the result is a midpoint // since round_to_even, subtract 1 if current result is odd if ((res & 0x0000000000000001ull) && (fstar.w[1] == 0) - && (fstar.w[0] < __bid_ten2mk64[ind - 1])) { + && (fstar.w[0] < ten2mk64[ind - 1])) { res--; } // determine inexactness of the rounding of C* @@ -217,26 +225,25 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM if (fstar.w[0] > 0x8000000000000000ull) { // f* > 1/2 and the result may be exact // fstar.w[0] - 0x8000000000000000ull is f* - 1/2 - if ((fstar.w[0] - 0x8000000000000000ull) > __bid_ten2mk64[ind - 1]) { + if ((fstar.w[0] - 0x8000000000000000ull) > ten2mk64[ind - 1]) { // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; - } // else the result is exact - } else { // the result is inexact; f2* <= 1/2 + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } - } else { // if 3 <= ind - 1 <= 21 - if (fstar.w[1] > __bid_one_half128[ind - 1] - || (fstar.w[1] == __bid_one_half128[ind - 1] - && fstar.w[0])) { + } else { // if 3 <= ind - 1 <= 21 + if (fstar.w[1] > onehalf128[ind - 1] || + (fstar.w[1] == onehalf128[ind - 1] && fstar.w[0])) { // f2* > 1/2 and the result may be exact // Calculate f2* - 1/2 - if (fstar.w[1] > __bid_one_half128[ind - 1] - || fstar.w[0] > __bid_ten2mk64[ind - 1]) { + if (fstar.w[1] > onehalf128[ind - 1] + || fstar.w[0] > ten2mk64[ind - 1]) { // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; - } // else the result is exact - } else { // the result is inexact; f2* <= 1/2 + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } @@ -244,7 +251,7 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); - } else { // if exp < 0 and q + exp < 0 + } else { // if exp < 0 and q + exp < 0 // the result is +0 or -0 res = x_sign | 0x31c0000000000000ull; *pfpsf |= INEXACT_EXCEPTION; @@ -252,22 +259,22 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM } break; case ROUNDING_TIES_AWAY: - if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q + if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q // need to shift right -exp digits from the coefficient; exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate - C1 = C1 + __bid_midpoint64[ind - 1]; + C1 = C1 + midpoint64[ind - 1]; // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] + // shiftright128[] and maskhigh128[] // 1 <= x <= 16 - // kx = 10^(-x) = __bid_ten2mk64[ind - 1] + // kx = 10^(-x) = ten2mk64[ind - 1] // C* = (C1 + 1/2 * 10^x) * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits - __mul_64x64_to_128 (P128, C1, __bid_ten2mk64[ind - 1]); + __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // if (0 < f* < 10^(-x)) then the result is a midpoint // C* = floor(C*) - logical right shift; C* has p decimal digits, @@ -277,14 +284,14 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM // correct by Property 1) // n = C* * 10^(e+x) - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); - fstar.w[1] = P128.w[1] & __bid_maskhigh128[ind - 1]; + fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // midpoints are already rounded correctly @@ -297,26 +304,25 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM if (fstar.w[0] > 0x8000000000000000ull) { // f* > 1/2 and the result may be exact // fstar.w[0] - 0x8000000000000000ull is f* - 1/2 - if ((fstar.w[0] - 0x8000000000000000ull) > __bid_ten2mk64[ind - 1]) { + if ((fstar.w[0] - 0x8000000000000000ull) > ten2mk64[ind - 1]) { // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; - } // else the result is exact - } else { // the result is inexact; f2* <= 1/2 + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } - } else { // if 3 <= ind - 1 <= 21 - if (fstar.w[1] > __bid_one_half128[ind - 1] - || (fstar.w[1] == __bid_one_half128[ind - 1] - && fstar.w[0])) { + } else { // if 3 <= ind - 1 <= 21 + if (fstar.w[1] > onehalf128[ind - 1] || + (fstar.w[1] == onehalf128[ind - 1] && fstar.w[0])) { // f2* > 1/2 and the result may be exact // Calculate f2* - 1/2 - if (fstar.w[1] > __bid_one_half128[ind - 1] - || fstar.w[0] > __bid_ten2mk64[ind - 1]) { + if (fstar.w[1] > onehalf128[ind - 1] + || fstar.w[0] > ten2mk64[ind - 1]) { // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; - } // else the result is exact - } else { // the result is inexact; f2* <= 1/2 + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } @@ -324,7 +330,7 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); - } else { // if exp < 0 and q + exp < 0 + } else { // if exp < 0 and q + exp < 0 // the result is +0 or -0 res = x_sign | 0x31c0000000000000ull; *pfpsf |= INEXACT_EXCEPTION; @@ -332,7 +338,7 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM } break; case ROUNDING_DOWN: - if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q // need to shift right -exp digits from the coefficient; exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 @@ -340,30 +346,30 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] + // shiftright128[] and maskhigh128[] // 1 <= x <= 16 - // kx = 10^(-x) = __bid_ten2mk64[ind - 1] + // kx = 10^(-x) = ten2mk64[ind - 1] // C* = C1 * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits - __mul_64x64_to_128 (P128, C1, __bid_ten2mk64[ind - 1]); + __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // if (0 < f* < 10^(-x)) then the result is exact // n = C* * 10^(e+x) - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); - fstar.w[1] = P128.w[1] & __bid_maskhigh128[ind - 1]; + fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // if (f* > 10^(-x)) then the result is inexact - if ((fstar.w[1] != 0) || (fstar.w[0] >= __bid_ten2mk64[ind - 1])) { + if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1])) { if (x_sign) { // if negative and not exact, increment magnitude res++; @@ -373,7 +379,7 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); - } else { // if exp < 0 and q + exp <= 0 + } else { // if exp < 0 and q + exp <= 0 // the result is +0 or -1 if (x_sign) { res = 0xb1c0000000000001ull; @@ -385,7 +391,7 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM } break; case ROUNDING_UP: - if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q // need to shift right -exp digits from the coefficient; exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 @@ -393,30 +399,30 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] + // shiftright128[] and maskhigh128[] // 1 <= x <= 16 - // kx = 10^(-x) = __bid_ten2mk64[ind - 1] + // kx = 10^(-x) = ten2mk64[ind - 1] // C* = C1 * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits - __mul_64x64_to_128 (P128, C1, __bid_ten2mk64[ind - 1]); + __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // if (0 < f* < 10^(-x)) then the result is exact // n = C* * 10^(e+x) - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); - fstar.w[1] = P128.w[1] & __bid_maskhigh128[ind - 1]; + fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // if (f* > 10^(-x)) then the result is inexact - if ((fstar.w[1] != 0) || (fstar.w[0] >= __bid_ten2mk64[ind - 1])) { + if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1])) { if (!x_sign) { // if positive and not exact, increment magnitude res++; @@ -426,7 +432,7 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); - } else { // if exp < 0 and q + exp <= 0 + } else { // if exp < 0 and q + exp <= 0 // the result is -0 or +1 if (x_sign) { res = 0xb1c0000000000000ull; @@ -438,7 +444,7 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM } break; case ROUNDING_TO_ZERO: - if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q + if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q // need to shift right -exp digits from the coefficient; exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 @@ -446,43 +452,43 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] + // shiftright128[] and maskhigh128[] // 1 <= x <= 16 - // kx = 10^(-x) = __bid_ten2mk64[ind - 1] + // kx = 10^(-x) = ten2mk64[ind - 1] // C* = C1 * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits - __mul_64x64_to_128 (P128, C1, __bid_ten2mk64[ind - 1]); + __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // if (0 < f* < 10^(-x)) then the result is exact // n = C* * 10^(e+x) - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); - fstar.w[1] = P128.w[1] & __bid_maskhigh128[ind - 1]; + fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // if (f* > 10^(-x)) then the result is inexact - if ((fstar.w[1] != 0) || (fstar.w[0] >= __bid_ten2mk64[ind - 1])) { + if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1])) { *pfpsf |= INEXACT_EXCEPTION; } // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); - } else { // if exp < 0 and q + exp < 0 + } else { // if exp < 0 and q + exp < 0 // the result is +0 or -0 res = x_sign | 0x31c0000000000000ull; *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } break; - } // end switch () + } // end switch () BID_RETURN (res); } @@ -492,20 +498,20 @@ __bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM #if DECIMAL_CALL_BY_REFERENCE void -__bid64_round_integral_nearest_even (UINT64 * pres, +bid64_round_integral_nearest_even (UINT64 * pres, UINT64 * px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { UINT64 x = *px; #else UINT64 -__bid64_round_integral_nearest_even (UINT64 x _EXC_FLAGS_PARAM +bid64_round_integral_nearest_even (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { #endif - UINT64 res = 0x0ull; + UINT64 res = 0xbaddbaddbaddbaddull; UINT64 x_sign; - int exp; // unbiased exponent + int exp; // unbiased exponent // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) BID_UI64DOUBLE tmp1; int x_nr_bits; @@ -514,29 +520,37 @@ __bid64_round_integral_nearest_even (UINT64 x _EXC_FLAGS_PARAM UINT128 fstar; UINT128 P128; - if ((x & MASK_INF) == MASK_INF) { // x is either INF or NAN - res = x; - if ((x & MASK_SNAN) == MASK_SNAN) { - // set invalid flag + x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative + + // check for NaNs and infinities + if ((x & MASK_NAN) == MASK_NAN) { // check for NaN + if ((x & 0x0003ffffffffffffull) > 999999999999999ull) + x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits + else + x = x & 0xfe03ffffffffffffull; // clear G6-G12 + if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN + // set invalid flag *pfpsf |= INVALID_EXCEPTION; - // return Quiet (SNaN) + // return quiet (SNaN) res = x & 0xfdffffffffffffffull; + } else { // QNaN + res = x; } - // return original input if QNaN or INF, quietize if SNaN + BID_RETURN (res); + } else if ((x & MASK_INF) == MASK_INF) { // check for Infinity + res = x_sign | 0x7800000000000000ull; BID_RETURN (res); } // unpack x - x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) { // if the steering bits are 11 (condition will be 0), then // the exponent is G[0:w+1] exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398; C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2; - if (C1 > 9999999999999999ull) { // non-canonical - exp = 0; + if (C1 > 9999999999999999ull) { // non-canonical C1 = 0; } - } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) + } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398; C1 = (x & MASK_BINARY_SIG1); } @@ -557,40 +571,40 @@ __bid64_round_integral_nearest_even (UINT64 x _EXC_FLAGS_PARAM } // q = nr. of decimal digits in x (1 <= q <= 54) // determine first the nr. of bits in x - if (C1 >= 0x0020000000000000ull) { // x >= 2^53 + if (C1 >= 0x0020000000000000ull) { // x >= 2^53 q = 16; - } else { // if x < 2^53 + } else { // if x < 2^53 tmp1.d = (double) C1; // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - q = __bid_nr_digits[x_nr_bits - 1].digits; + q = nr_digits[x_nr_bits - 1].digits; if (q == 0) { - q = __bid_nr_digits[x_nr_bits - 1].digits1; - if (C1 >= __bid_nr_digits[x_nr_bits - 1].threshold_lo) + q = nr_digits[x_nr_bits - 1].digits1; + if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo) q++; } } - if (exp >= 0) { // -exp <= 0 + if (exp >= 0) { // -exp <= 0 // the argument is an integer already res = x; BID_RETURN (res); - } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q + } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q // need to shift right -exp digits from the coefficient; the exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate - C1 = C1 + __bid_midpoint64[ind - 1]; + C1 = C1 + midpoint64[ind - 1]; // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] + // shiftright128[] and maskhigh128[] // 1 <= x <= 16 - // kx = 10^(-x) = __bid_ten2mk64[ind - 1] + // kx = 10^(-x) = ten2mk64[ind - 1] // C* = (C1 + 1/2 * 10^x) * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits - __mul_64x64_to_128 (P128, C1, __bid_ten2mk64[ind - 1]); + __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // if (0 < f* < 10^(-x)) then the result is a midpoint // if floor(C*) is even then C* = floor(C*) - logical right @@ -602,26 +616,26 @@ __bid64_round_integral_nearest_even (UINT64 x _EXC_FLAGS_PARAM // correct by Property 1) // n = C* * 10^(e+x) - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); - fstar.w[1] = P128.w[1] & __bid_maskhigh128[ind - 1]; + fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // if (0 < f* < 10^(-x)) then the result is a midpoint // since round_to_even, subtract 1 if current result is odd if ((res & 0x0000000000000001ull) && (fstar.w[1] == 0) - && (fstar.w[0] < __bid_ten2mk64[ind - 1])) { + && (fstar.w[0] < ten2mk64[ind - 1])) { res--; } // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); - } else { // if exp < 0 and q + exp < 0 + } else { // if exp < 0 and q + exp < 0 // the result is +0 or -0 res = x_sign | 0x31c0000000000000ull; BID_RETURN (res); @@ -634,20 +648,20 @@ __bid64_round_integral_nearest_even (UINT64 x _EXC_FLAGS_PARAM #if DECIMAL_CALL_BY_REFERENCE void -__bid64_round_integral_negative (UINT64 * pres, +bid64_round_integral_negative (UINT64 * pres, UINT64 * px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { UINT64 x = *px; #else UINT64 -__bid64_round_integral_negative (UINT64 x _EXC_FLAGS_PARAM +bid64_round_integral_negative (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { #endif - UINT64 res = 0x0ull; + UINT64 res = 0xbaddbaddbaddbaddull; UINT64 x_sign; - int exp; // unbiased exponent + int exp; // unbiased exponent // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) BID_UI64DOUBLE tmp1; int x_nr_bits; @@ -657,29 +671,37 @@ __bid64_round_integral_negative (UINT64 x _EXC_FLAGS_PARAM UINT128 fstar; UINT128 P128; - if ((x & MASK_INF) == MASK_INF) { // x is either INF or NAN - res = x; - if ((x & MASK_SNAN) == MASK_SNAN) { - // set invalid flag + x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative + + // check for NaNs and infinities + if ((x & MASK_NAN) == MASK_NAN) { // check for NaN + if ((x & 0x0003ffffffffffffull) > 999999999999999ull) + x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits + else + x = x & 0xfe03ffffffffffffull; // clear G6-G12 + if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN + // set invalid flag *pfpsf |= INVALID_EXCEPTION; - // return Quiet (SNaN) + // return quiet (SNaN) res = x & 0xfdffffffffffffffull; + } else { // QNaN + res = x; } - // return original input if QNaN or INF, quietize if SNaN + BID_RETURN (res); + } else if ((x & MASK_INF) == MASK_INF) { // check for Infinity + res = x_sign | 0x7800000000000000ull; BID_RETURN (res); } // unpack x - x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) { // if the steering bits are 11 (condition will be 0), then // the exponent is G[0:w+1] exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398; C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2; - if (C1 > 9999999999999999ull) { // non-canonical - exp = 0; + if (C1 > 9999999999999999ull) { // non-canonical C1 = 0; } - } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) + } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398; C1 = (x & MASK_BINARY_SIG1); } @@ -704,25 +726,25 @@ __bid64_round_integral_negative (UINT64 x _EXC_FLAGS_PARAM } // q = nr. of decimal digits in x (1 <= q <= 54) // determine first the nr. of bits in x - if (C1 >= 0x0020000000000000ull) { // x >= 2^53 + if (C1 >= 0x0020000000000000ull) { // x >= 2^53 q = 16; - } else { // if x < 2^53 + } else { // if x < 2^53 tmp1.d = (double) C1; // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - q = __bid_nr_digits[x_nr_bits - 1].digits; + q = nr_digits[x_nr_bits - 1].digits; if (q == 0) { - q = __bid_nr_digits[x_nr_bits - 1].digits1; - if (C1 >= __bid_nr_digits[x_nr_bits - 1].threshold_lo) + q = nr_digits[x_nr_bits - 1].digits1; + if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo) q++; } } - if (exp >= 0) { // -exp <= 0 + if (exp >= 0) { // -exp <= 0 // the argument is an integer already res = x; BID_RETURN (res); - } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q // need to shift right -exp digits from the coefficient; the exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 @@ -730,38 +752,38 @@ __bid64_round_integral_negative (UINT64 x _EXC_FLAGS_PARAM // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] + // shiftright128[] and maskhigh128[] // 1 <= x <= 16 - // kx = 10^(-x) = __bid_ten2mk64[ind - 1] + // kx = 10^(-x) = ten2mk64[ind - 1] // C* = C1 * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits - __mul_64x64_to_128 (P128, C1, __bid_ten2mk64[ind - 1]); + __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // if (0 < f* < 10^(-x)) then the result is exact // n = C* * 10^(e+x) - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); - fstar.w[1] = P128.w[1] & __bid_maskhigh128[ind - 1]; + fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // if (f* > 10^(-x)) then the result is inexact if (x_sign - && ((fstar.w[1] != 0) || (fstar.w[0] >= __bid_ten2mk64[ind - 1]))) { + && ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1]))) { // if negative and not exact, increment magnitude res++; } // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); - } else { // if exp < 0 and q + exp <= 0 + } else { // if exp < 0 and q + exp <= 0 // the result is +0 or -1 if (x_sign) { res = 0xb1c0000000000001ull; @@ -778,20 +800,20 @@ __bid64_round_integral_negative (UINT64 x _EXC_FLAGS_PARAM #if DECIMAL_CALL_BY_REFERENCE void -__bid64_round_integral_positive (UINT64 * pres, +bid64_round_integral_positive (UINT64 * pres, UINT64 * px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { UINT64 x = *px; #else UINT64 -__bid64_round_integral_positive (UINT64 x _EXC_FLAGS_PARAM +bid64_round_integral_positive (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { #endif - UINT64 res = 0x0ull; + UINT64 res = 0xbaddbaddbaddbaddull; UINT64 x_sign; - int exp; // unbiased exponent + int exp; // unbiased exponent // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) BID_UI64DOUBLE tmp1; int x_nr_bits; @@ -801,29 +823,37 @@ __bid64_round_integral_positive (UINT64 x _EXC_FLAGS_PARAM UINT128 fstar; UINT128 P128; - if ((x & MASK_INF) == MASK_INF) { // x is either INF or NAN - res = x; - if ((x & MASK_SNAN) == MASK_SNAN) { - // set invalid flag + x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative + + // check for NaNs and infinities + if ((x & MASK_NAN) == MASK_NAN) { // check for NaN + if ((x & 0x0003ffffffffffffull) > 999999999999999ull) + x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits + else + x = x & 0xfe03ffffffffffffull; // clear G6-G12 + if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN + // set invalid flag *pfpsf |= INVALID_EXCEPTION; - // return Quiet (SNaN) + // return quiet (SNaN) res = x & 0xfdffffffffffffffull; + } else { // QNaN + res = x; } - // return original input if QNaN or INF, quietize if SNaN + BID_RETURN (res); + } else if ((x & MASK_INF) == MASK_INF) { // check for Infinity + res = x_sign | 0x7800000000000000ull; BID_RETURN (res); } // unpack x - x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) { // if the steering bits are 11 (condition will be 0), then // the exponent is G[0:w+1] exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398; C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2; - if (C1 > 9999999999999999ull) { // non-canonical - exp = 0; + if (C1 > 9999999999999999ull) { // non-canonical C1 = 0; } - } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) + } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398; C1 = (x & MASK_BINARY_SIG1); } @@ -848,25 +878,25 @@ __bid64_round_integral_positive (UINT64 x _EXC_FLAGS_PARAM } // q = nr. of decimal digits in x (1 <= q <= 54) // determine first the nr. of bits in x - if (C1 >= 0x0020000000000000ull) { // x >= 2^53 + if (C1 >= 0x0020000000000000ull) { // x >= 2^53 q = 16; - } else { // if x < 2^53 + } else { // if x < 2^53 tmp1.d = (double) C1; // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - q = __bid_nr_digits[x_nr_bits - 1].digits; + q = nr_digits[x_nr_bits - 1].digits; if (q == 0) { - q = __bid_nr_digits[x_nr_bits - 1].digits1; - if (C1 >= __bid_nr_digits[x_nr_bits - 1].threshold_lo) + q = nr_digits[x_nr_bits - 1].digits1; + if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo) q++; } } - if (exp >= 0) { // -exp <= 0 + if (exp >= 0) { // -exp <= 0 // the argument is an integer already res = x; BID_RETURN (res); - } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q // need to shift right -exp digits from the coefficient; the exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 @@ -874,38 +904,38 @@ __bid64_round_integral_positive (UINT64 x _EXC_FLAGS_PARAM // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] + // shiftright128[] and maskhigh128[] // 1 <= x <= 16 - // kx = 10^(-x) = __bid_ten2mk64[ind - 1] + // kx = 10^(-x) = ten2mk64[ind - 1] // C* = C1 * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits - __mul_64x64_to_128 (P128, C1, __bid_ten2mk64[ind - 1]); + __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // if (0 < f* < 10^(-x)) then the result is exact // n = C* * 10^(e+x) - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; fstar.w[1] = 0; fstar.w[0] = P128.w[0]; - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); - fstar.w[1] = P128.w[1] & __bid_maskhigh128[ind - 1]; + fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; fstar.w[0] = P128.w[0]; } // if (f* > 10^(-x)) then the result is inexact if (!x_sign - && ((fstar.w[1] != 0) || (fstar.w[0] >= __bid_ten2mk64[ind - 1]))) { + && ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1]))) { // if positive and not exact, increment magnitude res++; } // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); - } else { // if exp < 0 and q + exp <= 0 + } else { // if exp < 0 and q + exp <= 0 // the result is -0 or +1 if (x_sign) { res = 0xb1c0000000000000ull; @@ -922,20 +952,20 @@ __bid64_round_integral_positive (UINT64 x _EXC_FLAGS_PARAM #if DECIMAL_CALL_BY_REFERENCE void -__bid64_round_integral_zero (UINT64 * pres, +bid64_round_integral_zero (UINT64 * pres, UINT64 * px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { UINT64 x = *px; #else UINT64 -__bid64_round_integral_zero (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM +bid64_round_integral_zero (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { #endif - UINT64 res = 0x0ull; + UINT64 res = 0xbaddbaddbaddbaddull; UINT64 x_sign; - int exp; // unbiased exponent + int exp; // unbiased exponent // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) BID_UI64DOUBLE tmp1; int x_nr_bits; @@ -944,29 +974,37 @@ __bid64_round_integral_zero (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM // UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits UINT128 P128; - if ((x & MASK_INF) == MASK_INF) { // x is either INF or NAN - res = x; - if ((x & MASK_SNAN) == MASK_SNAN) { - // set invalid flag + x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative + + // check for NaNs and infinities + if ((x & MASK_NAN) == MASK_NAN) { // check for NaN + if ((x & 0x0003ffffffffffffull) > 999999999999999ull) + x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits + else + x = x & 0xfe03ffffffffffffull; // clear G6-G12 + if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN + // set invalid flag *pfpsf |= INVALID_EXCEPTION; - // return Quiet (SNaN) + // return quiet (SNaN) res = x & 0xfdffffffffffffffull; + } else { // QNaN + res = x; } - // return original input if QNaN or INF, quietize if SNaN + BID_RETURN (res); + } else if ((x & MASK_INF) == MASK_INF) { // check for Infinity + res = x_sign | 0x7800000000000000ull; BID_RETURN (res); } // unpack x - x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) { // if the steering bits are 11 (condition will be 0), then // the exponent is G[0:w+1] exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398; C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2; - if (C1 > 9999999999999999ull) { // non-canonical - exp = 0; + if (C1 > 9999999999999999ull) { // non-canonical C1 = 0; } - } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) + } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398; C1 = (x & MASK_BINARY_SIG1); } @@ -987,25 +1025,25 @@ __bid64_round_integral_zero (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM } // q = nr. of decimal digits in x (1 <= q <= 54) // determine first the nr. of bits in x - if (C1 >= 0x0020000000000000ull) { // x >= 2^53 + if (C1 >= 0x0020000000000000ull) { // x >= 2^53 q = 16; - } else { // if x < 2^53 + } else { // if x < 2^53 tmp1.d = (double) C1; // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - q = __bid_nr_digits[x_nr_bits - 1].digits; + q = nr_digits[x_nr_bits - 1].digits; if (q == 0) { - q = __bid_nr_digits[x_nr_bits - 1].digits1; - if (C1 >= __bid_nr_digits[x_nr_bits - 1].threshold_lo) + q = nr_digits[x_nr_bits - 1].digits1; + if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo) q++; } } - if (exp >= 0) { // -exp <= 0 + if (exp >= 0) { // -exp <= 0 // the argument is an integer already res = x; BID_RETURN (res); - } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q + } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q // need to shift right -exp digits from the coefficient; the exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 @@ -1013,36 +1051,36 @@ __bid64_round_integral_zero (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] + // shiftright128[] and maskhigh128[] // 1 <= x <= 16 - // kx = 10^(-x) = __bid_ten2mk64[ind - 1] + // kx = 10^(-x) = ten2mk64[ind - 1] // C* = C1 * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits - __mul_64x64_to_128 (P128, C1, __bid_ten2mk64[ind - 1]); + __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // C* = floor(C*) (logical right shift; C has p decimal digits, // correct by Property 1) // if (0 < f* < 10^(-x)) then the result is exact // n = C* * 10^(e+x) - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; // redundant fstar.w[1] = 0; // redundant fstar.w[0] = P128.w[0]; - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); - // redundant fstar.w[1] = P128.w[1] & __bid_maskhigh128[ind - 1]; + // redundant fstar.w[1] = P128.w[1] & maskhigh128[ind - 1]; // redundant fstar.w[0] = P128.w[0]; } // if (f* > 10^(-x)) then the result is inexact - // if ((fstar.w[1] != 0) || (fstar.w[0] >= __bid_ten2mk64[ind-1])){ + // if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind-1])){ // // redundant // } // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); - } else { // if exp < 0 and q + exp < 0 + } else { // if exp < 0 and q + exp < 0 // the result is +0 or -0 res = x_sign | 0x31c0000000000000ull; BID_RETURN (res); @@ -1055,20 +1093,20 @@ __bid64_round_integral_zero (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM #if DECIMAL_CALL_BY_REFERENCE void -__bid64_round_integral_nearest_away (UINT64 * pres, +bid64_round_integral_nearest_away (UINT64 * pres, UINT64 * px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { UINT64 x = *px; #else UINT64 -__bid64_round_integral_nearest_away (UINT64 x _EXC_FLAGS_PARAM +bid64_round_integral_nearest_away (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM _EXC_INFO_PARAM) { #endif - UINT64 res = 0x0ull; + UINT64 res = 0xbaddbaddbaddbaddull; UINT64 x_sign; - int exp; // unbiased exponent + int exp; // unbiased exponent // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) BID_UI64DOUBLE tmp1; int x_nr_bits; @@ -1076,29 +1114,37 @@ __bid64_round_integral_nearest_away (UINT64 x _EXC_FLAGS_PARAM UINT64 C1; UINT128 P128; - if ((x & MASK_INF) == MASK_INF) { // x is either INF or NAN - res = x; - if ((x & MASK_SNAN) == MASK_SNAN) { - // set invalid flag + x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative + + // check for NaNs and infinities + if ((x & MASK_NAN) == MASK_NAN) { // check for NaN + if ((x & 0x0003ffffffffffffull) > 999999999999999ull) + x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits + else + x = x & 0xfe03ffffffffffffull; // clear G6-G12 + if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN + // set invalid flag *pfpsf |= INVALID_EXCEPTION; - // return Quiet (SNaN) + // return quiet (SNaN) res = x & 0xfdffffffffffffffull; + } else { // QNaN + res = x; } - // return original input if QNaN or INF, quietize if SNaN + BID_RETURN (res); + } else if ((x & MASK_INF) == MASK_INF) { // check for Infinity + res = x_sign | 0x7800000000000000ull; BID_RETURN (res); } // unpack x - x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) { // if the steering bits are 11 (condition will be 0), then // the exponent is G[0:w+1] exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398; C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2; - if (C1 > 9999999999999999ull) { // non-canonical - exp = 0; + if (C1 > 9999999999999999ull) { // non-canonical C1 = 0; } - } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) + } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS) exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398; C1 = (x & MASK_BINARY_SIG1); } @@ -1119,40 +1165,40 @@ __bid64_round_integral_nearest_away (UINT64 x _EXC_FLAGS_PARAM } // q = nr. of decimal digits in x (1 <= q <= 54) // determine first the nr. of bits in x - if (C1 >= 0x0020000000000000ull) { // x >= 2^53 + if (C1 >= 0x0020000000000000ull) { // x >= 2^53 q = 16; - } else { // if x < 2^53 + } else { // if x < 2^53 tmp1.d = (double) C1; // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - q = __bid_nr_digits[x_nr_bits - 1].digits; + q = nr_digits[x_nr_bits - 1].digits; if (q == 0) { - q = __bid_nr_digits[x_nr_bits - 1].digits1; - if (C1 >= __bid_nr_digits[x_nr_bits - 1].threshold_lo) + q = nr_digits[x_nr_bits - 1].digits1; + if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo) q++; } } - if (exp >= 0) { // -exp <= 0 + if (exp >= 0) { // -exp <= 0 // the argument is an integer already res = x; BID_RETURN (res); - } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q + } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q // need to shift right -exp digits from the coefficient; the exp will be 0 ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate - C1 = C1 + __bid_midpoint64[ind - 1]; + C1 = C1 + midpoint64[ind - 1]; // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] + // shiftright128[] and maskhigh128[] // 1 <= x <= 16 - // kx = 10^(-x) = __bid_ten2mk64[ind - 1] + // kx = 10^(-x) = ten2mk64[ind - 1] // C* = (C1 + 1/2 * 10^x) * 10^(-x) // the approximation of 10^(-x) was rounded up to 64 bits - __mul_64x64_to_128 (P128, C1, __bid_ten2mk64[ind - 1]); + __mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]); // if (0 < f* < 10^(-x)) then the result is a midpoint // C* = floor(C*) - logical right shift; C* has p decimal digits, @@ -1162,17 +1208,17 @@ __bid64_round_integral_nearest_away (UINT64 x _EXC_FLAGS_PARAM // correct by Property 1) // n = C* * 10^(e+x) - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res = P128.w[1]; - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res = (P128.w[1] >> shift); } // midpoints are already rounded correctly // set exponent to zero as it was negative before. res = x_sign | 0x31c0000000000000ull | res; BID_RETURN (res); - } else { // if exp < 0 and q + exp < 0 + } else { // if exp < 0 and q + exp < 0 // the result is +0 or -0 res = x_sign | 0x31c0000000000000ull; BID_RETURN (res); |