diff options
Diffstat (limited to 'libgcc/config/libbid/bid128_fma.c')
-rw-r--r-- | libgcc/config/libbid/bid128_fma.c | 2411 |
1 files changed, 1734 insertions, 677 deletions
diff --git a/libgcc/config/libbid/bid128_fma.c b/libgcc/config/libbid/bid128_fma.c index 22e08e2eee0..1d0ffb128ed 100644 --- a/libgcc/config/libbid/bid128_fma.c +++ b/libgcc/config/libbid/bid128_fma.c @@ -32,24 +32,20 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA * ****************************************************************************/ -#include "bid_conf.h" -#include "bid_functions.h" #include "bid_internal.h" static void -rounding_correction ( - unsigned int rnd_mode, - unsigned int is_inexact_lt_midpoint, - unsigned int is_inexact_gt_midpoint, - unsigned int is_midpoint_lt_even, - unsigned int is_midpoint_gt_even, - int unbexp, - UINT128 * ptrres, - _IDEC_flags * ptrfpsf) { - // unbiased true exponent unbexp may be larger than emax +rounding_correction (unsigned int rnd_mode, + unsigned int is_inexact_lt_midpoint, + unsigned int is_inexact_gt_midpoint, + unsigned int is_midpoint_lt_even, + unsigned int is_midpoint_gt_even, + int unbexp, + UINT128 * ptrres, _IDEC_flags * ptrfpsf) { + // unbiased true exponent unbexp may be larger than emax UINT128 res = *ptrres; // expected to have the correct sign and coefficient - // (the exponent field is ignored, as unbexp is used instead) + // (the exponent field is ignored, as unbexp is used instead) UINT64 sign, exp; UINT64 C_hi, C_lo; @@ -68,10 +64,10 @@ rounding_correction ( exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax C_hi = res.w[1] & MASK_COEFF; C_lo = res.w[0]; - if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) || + if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) || ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) && - is_midpoint_gt_even))) || - (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) || + is_midpoint_gt_even))) || + (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) || ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) && is_midpoint_gt_even)))) { // C = C + 1 @@ -87,20 +83,26 @@ rounding_correction ( exp = (UINT64) (unbexp + 6176) << 49; } } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) && - ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) || + ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) || (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) { // C = C - 1 C_lo = C_lo - 1; if (C_lo == 0xffffffffffffffffull) C_hi--; // check if we crossed into the lower decade - if (exp > 0 && C_hi == 0x0000314dc6448d93ull && - C_lo == 0x38c15b09ffffffffull) { // 10^33 - 1 - C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 - C_lo = 0x378d8e63ffffffffull; - // exp = exp - EXP_P1; - unbexp = unbexp - 1; - exp = (UINT64) (unbexp + 6176) << 49; + if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) { + // C = 10^33 - 1 + if (exp > 0) { + C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 + C_lo = 0x378d8e63ffffffffull; + // exp = exp - EXP_P1; + unbexp = unbexp - 1; + exp = (UINT64) (unbexp + 6176) << 49; + } else { // if exp = 0 + if (is_midpoint_lt_even || is_midpoint_lt_even || + is_inexact_gt_midpoint || is_inexact_gt_midpoint) // tiny & inexact + *ptrfpsf |= UNDERFLOW_EXCEPTION; + } } } else { ; // the result is already correct @@ -192,56 +194,56 @@ sub256 (UINT256 x, UINT256 y, UINT256 * pz) { static int -__bid_nr_digits256 (UINT256 R256) { +nr_digits256 (UINT256 R256) { int ind; // determine the number of decimal digits in R256 if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) { // between 1 and 19 digits for (ind = 1; ind <= 19; ind++) { - if (R256.w[0] < __bid_ten2k64[ind]) { + if (R256.w[0] < ten2k64[ind]) { break; } } // ind digits } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && - (R256.w[1] < __bid_ten2k128[0].w[1] - || (R256.w[1] == __bid_ten2k128[0].w[1] - && R256.w[0] < __bid_ten2k128[0].w[0]))) { + (R256.w[1] < ten2k128[0].w[1] || + (R256.w[1] == ten2k128[0].w[1] + && R256.w[0] < ten2k128[0].w[0]))) { // 20 digits ind = 20; } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) { // between 21 and 38 digits for (ind = 1; ind <= 18; ind++) { - if (R256.w[1] < __bid_ten2k128[ind].w[1] || - (R256.w[1] == __bid_ten2k128[ind].w[1] && - R256.w[0] < __bid_ten2k128[ind].w[0])) { + if (R256.w[1] < ten2k128[ind].w[1] || + (R256.w[1] == ten2k128[ind].w[1] && + R256.w[0] < ten2k128[ind].w[0])) { break; } } // ind + 20 digits ind = ind + 20; - } else if (((R256.w[3] == 0x0 && - (R256.w[2] < __bid_ten2k256[0].w[2])) || - (R256.w[2] == __bid_ten2k256[0].w[2] && - R256.w[1] < __bid_ten2k256[0].w[1]) || - (R256.w[2] == __bid_ten2k256[0].w[2] && - R256.w[1] == __bid_ten2k256[0].w[1] && - R256.w[0] < __bid_ten2k256[0].w[0]))) { + } else if (R256.w[3] == 0x0 && + (R256.w[2] < ten2k256[0].w[2] || + (R256.w[2] == ten2k256[0].w[2] && + R256.w[1] < ten2k256[0].w[1]) || + (R256.w[2] == ten2k256[0].w[2] && + R256.w[1] == ten2k256[0].w[1] && + R256.w[0] < ten2k256[0].w[0]))) { // 39 digits ind = 39; } else { // between 40 and 68 digits for (ind = 1; ind <= 29; ind++) { - if (R256.w[3] < __bid_ten2k256[ind].w[3] || - (R256.w[3] == __bid_ten2k256[ind].w[3] && - R256.w[2] < __bid_ten2k256[ind].w[2]) || - (R256.w[3] == __bid_ten2k256[ind].w[3] && - R256.w[2] == __bid_ten2k256[ind].w[2] && - R256.w[1] < __bid_ten2k256[ind].w[1]) || - (R256.w[3] == __bid_ten2k256[ind].w[3] && - R256.w[2] == __bid_ten2k256[ind].w[2] && - R256.w[1] == __bid_ten2k256[ind].w[1] && - R256.w[0] < __bid_ten2k256[ind].w[0])) { + if (R256.w[3] < ten2k256[ind].w[3] || + (R256.w[3] == ten2k256[ind].w[3] && + R256.w[2] < ten2k256[ind].w[2]) || + (R256.w[3] == ten2k256[ind].w[3] && + R256.w[2] == ten2k256[ind].w[2] && + R256.w[1] < ten2k256[ind].w[1]) || + (R256.w[3] == ten2k256[ind].w[3] && + R256.w[2] == ten2k256[ind].w[2] && + R256.w[1] == ten2k256[ind].w[1] && + R256.w[0] < ten2k256[ind].w[0])) { break; } } @@ -305,10 +307,10 @@ add_and_round (int q3, R256.w[0] = C3.w[0]; } else if (scale <= 19) { // 10^scale fits in 64 bits P128.w[1] = 0; - P128.w[0] = __bid_ten2k64[scale]; + P128.w[0] = ten2k64[scale]; __mul_128x128_to_256 (R256, P128, C3); } else if (scale <= 38) { // 10^scale fits in 128 bits - __mul_128x128_to_256 (R256, __bid_ten2k128[scale - 20], C3); + __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3); } else if (scale <= 57) { // 39 <= scale <= 57 // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits // (10^67 has 223 bits; 10^69 has 230 bits); @@ -316,9 +318,9 @@ add_and_round (int q3, // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits - __mul_64x128_to_128 (R128, __bid_ten2k64[scale - 38], C3); + __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3); // now multiply R128 by 10^38 - __mul_128x128_to_256 (R256, R128, __bid_ten2k128[18]); + __mul_128x128_to_256 (R256, R128, ten2k128[18]); } else { // 58 <= scale <= 66 // 10^scale takes between 193 and 220 bits, // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits) @@ -328,9 +330,9 @@ add_and_round (int q3, // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because // 10^(scale-38) takes more than 64 bits, C3 will take less than 64 - __mul_64x128_to_128 (R128, C3.w[0], __bid_ten2k128[scale - 58]); + __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]); // now calculate 10*38 * 10^(scale-38) * C3 - __mul_128x128_to_256 (R256, R128, __bid_ten2k128[18]); + __mul_128x128_to_256 (R256, R128, ten2k128[18]); } // C3 * 10^scale is now in R256 @@ -348,7 +350,7 @@ add_and_round (int q3, // but may require rounding // compare first R256 = C3 * 10^scale and C4 - if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) || + if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) || (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) || (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] && R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4 @@ -382,7 +384,7 @@ add_and_round (int q3, } // determine the number of decimal digits in R256 - ind = __bid_nr_digits256 (R256); + ind = nr_digits256 (R256); // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind; // round to the destination precision, with unbounded exponent @@ -403,22 +405,22 @@ add_and_round (int q3, if (ind <= 38) { P128.w[1] = R256.w[1]; P128.w[0] = R256.w[0]; - __bid_round128_19_38 (ind, x0, P128, &R128, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + round128_19_38 (ind, x0, P128, &R128, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); } else if (ind <= 57) { P192.w[2] = R256.w[2]; P192.w[1] = R256.w[1]; P192.w[0] = R256.w[0]; - __bid_round192_39_57 (ind, x0, P192, &R192, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + round192_39_57 (ind, x0, P192, &R192, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); R128.w[1] = R192.w[1]; R128.w[0] = R192.w[0]; } else { // if (ind <= 68) - __bid_round256_58_76 (ind, x0, R256, &R256, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + round256_58_76 (ind, x0, R256, &R256, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); R128.w[1] = R256.w[1]; R128.w[0] = R256.w[0]; } @@ -435,9 +437,9 @@ add_and_round (int q3, P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1]; P128.w[0] = R128.w[0]; rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, is_midpoint_lt_even, - is_midpoint_gt_even, 0, &P128, ptrfpsf); + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, is_midpoint_lt_even, + is_midpoint_gt_even, 0, &P128, ptrfpsf); scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 // the number of digits in the significand is p34 = 34 if (e4 + scale < expmin) { @@ -495,10 +497,10 @@ add_and_round (int q3, R128.w[1] = res.w[1] & MASK_COEFF; R128.w[0] = res.w[0]; if (ind <= 19) { - if (R128.w[0] < __bid_midpoint64[ind - 1]) { // < 1/2 ulp + if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp lt_half_ulp = 1; is_inexact_lt_midpoint = 1; - } else if (R128.w[0] == __bid_midpoint64[ind - 1]) { // = 1/2 ulp + } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp eq_half_ulp = 1; is_midpoint_gt_even = 1; } else { // > 1/2 ulp @@ -506,13 +508,13 @@ add_and_round (int q3, is_inexact_gt_midpoint = 1; } } else { // if (ind <= 38) { - if (R128.w[1] < __bid_midpoint128[ind - 20].w[1] || - (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && - R128.w[0] < __bid_midpoint128[ind - 20].w[0])) { // < 1/2 ulp + if (R128.w[1] < midpoint128[ind - 20].w[1] || + (R128.w[1] == midpoint128[ind - 20].w[1] && + R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp lt_half_ulp = 1; is_inexact_lt_midpoint = 1; - } else if (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && - R128.w[0] == __bid_midpoint128[ind - 20].w[0]) { // = 1/2 ulp + } else if (R128.w[1] == midpoint128[ind - 20].w[1] && + R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp eq_half_ulp = 1; is_midpoint_gt_even = 1; } else { // > 1/2 ulp @@ -535,18 +537,18 @@ add_and_round (int q3, // round the ind-digit result to ind - x0 digits if (ind <= 18) { // 2 <= ind <= 18 - __bid_round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); res.w[1] = 0x0; res.w[0] = R64; } else if (ind <= 38) { P128.w[1] = res.w[1] & MASK_COEFF; P128.w[0] = res.w[0]; - __bid_round128_19_38 (ind, x0, P128, &res, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round128_19_38 (ind, x0, P128, &res, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); } e4 = e4 + x0; // expmin // we want the exponent to be expmin, so if incr_exp = 1 then @@ -555,7 +557,7 @@ add_and_round (int q3, // 64 x 128 -> 128 P128.w[1] = res.w[1] & MASK_COEFF; P128.w[0] = res.w[0]; - __mul_64x128_to_128 (res, __bid_ten2k64[1], P128); + __mul_64x128_to_128 (res, ten2k64[1], P128); } res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF); @@ -581,7 +583,7 @@ add_and_round (int q3, is_midpoint_gt_even = 0; is_inexact_gt_midpoint = 1; } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && - !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { // if this second rounding was exact the result may still be // inexact because of the first rounding if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { @@ -591,14 +593,14 @@ add_and_round (int q3, is_inexact_lt_midpoint = 1; } } else if (is_midpoint_gt_even && - (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { + (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { // pulled up to a midpoint is_inexact_lt_midpoint = 1; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else if (is_midpoint_lt_even && - (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { + (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { // pulled down to a midpoint is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; @@ -613,9 +615,9 @@ add_and_round (int q3, // apply correction if not rounding to nearest if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, - is_inexact_lt_midpoint, is_inexact_gt_midpoint, - is_midpoint_lt_even, is_midpoint_gt_even, - e4, &res, ptrfpsf); + is_inexact_lt_midpoint, is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e4, &res, ptrfpsf); } if (is_midpoint_lt_even || is_midpoint_gt_even || is_inexact_lt_midpoint || is_inexact_gt_midpoint) { @@ -635,23 +637,31 @@ add_and_round (int q3, #if DECIMAL_CALL_BY_REFERENCE -void -__bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz - _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM - _EXC_INFO_PARAM) { +static void +bid128_ext_fma (int *ptr_is_midpoint_lt_even, + int *ptr_is_midpoint_gt_even, + int *ptr_is_inexact_lt_midpoint, + int *ptr_is_inexact_gt_midpoint, UINT128 * pres, + UINT128 * px, UINT128 * py, + UINT128 * + pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { UINT128 x = *px, y = *py, z = *pz; #if !DECIMAL_GLOBAL_ROUNDING unsigned int rnd_mode = *prnd_mode; #endif #else -UINT128 -__bid128_fma (UINT128 x, UINT128 y, UINT128 z - _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM - _EXC_INFO_PARAM) { +static UINT128 +bid128_ext_fma (int *ptr_is_midpoint_lt_even, + int *ptr_is_midpoint_gt_even, + int *ptr_is_inexact_lt_midpoint, + int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y, + UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM + _EXC_MASKS_PARAM _EXC_INFO_PARAM) { #endif - UINT128 res = {{ 0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull }}; - UINT64 x_sign, y_sign, z_sign, p_sign; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign; UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp; int true_p_exp; UINT128 C1, C2, C3; @@ -679,10 +689,13 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z UINT256 R256; // the following are based on the table of special cases for fma; the NaN - // behavior is similar to that of the IPF fma + // behavior is similar to that of the IA-64 Architecture fma // identify cases where at least one operand is NaN + BID_SWAP128 (x); + BID_SWAP128 (y); + BID_SWAP128 (z); if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y) // check first for non-canonical NaN payload @@ -709,6 +722,11 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z *pfpsf |= INVALID_EXCEPTION; } } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z) @@ -735,6 +753,11 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z *pfpsf |= INVALID_EXCEPTION; } } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x) @@ -756,6 +779,11 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] res.w[0] = x.w[0]; } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check @@ -774,8 +802,8 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z } else { // G0_G1 != 11 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits if (C1.w[1] > 0x0001ed09bead87c0ull || - (C1.w[1] == 0x0001ed09bead87c0ull - && C1.w[0] > 0x378d8e63ffffffffull)) { + (C1.w[1] == 0x0001ed09bead87c0ull && + C1.w[0] > 0x378d8e63ffffffffull)) { // x is non-canonical if coefficient is larger than 10^34 -1 C1.w[1] = 0; C1.w[0] = 0; @@ -797,8 +825,8 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z } else { // G0_G1 != 11 y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits if (C2.w[1] > 0x0001ed09bead87c0ull || - (C2.w[1] == 0x0001ed09bead87c0ull - && C2.w[0] > 0x378d8e63ffffffffull)) { + (C2.w[1] == 0x0001ed09bead87c0ull && + C2.w[0] > 0x378d8e63ffffffffull)) { // y is non-canonical if coefficient is larger than 10^34 -1 C2.w[1] = 0; C2.w[0] = 0; @@ -820,8 +848,8 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z } else { // G0_G1 != 11 z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits if (C3.w[1] > 0x0001ed09bead87c0ull || - (C3.w[1] == 0x0001ed09bead87c0ull - && C3.w[0] > 0x378d8e63ffffffffull)) { + (C3.w[1] == 0x0001ed09bead87c0ull && + C3.w[0] > 0x378d8e63ffffffffull)) { // z is non-canonical if coefficient is larger than 10^34 -1 C3.w[1] = 0; C3.w[0] = 0; @@ -875,6 +903,11 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z // set invalid flag *pfpsf |= INVALID_EXCEPTION; } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf @@ -887,8 +920,8 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z // set invalid flag *pfpsf |= INVALID_EXCEPTION; } else { - res.w[1] = z.w[1]; - res.w[0] = z.w[0]; + res.w[1] = z_sign | MASK_INF; + res.w[0] = 0x0; } } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0 // z = 0, f, inf @@ -902,25 +935,31 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z res.w[1] = p_sign | MASK_INF; res.w[0] = 0x0; } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf // x = 0, f and y = 0, f, necessarily res.w[1] = z_sign | MASK_INF; res.w[0] = 0x0; + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176; if (true_p_exp < -6176) p_exp = 0; // cannot be less than EXP_MIN - // MACH DEBUG else if (true_p_exp > 6111) - // p_exp = (UINT64) (6111 + 6176) << 49; // cannot be more than EXP_MAX else p_exp = (UINT64) (true_p_exp + 6176) << 49; - if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || - (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && - C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0 + if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0 // the result is 0 if (p_exp < z_exp) res.w[1] = p_exp; // preferred exponent @@ -940,6 +979,11 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z res.w[0] = 0x0; } } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } // from this point on, we may need to know the number of decimal digits @@ -970,12 +1014,12 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z x_nr_bits = 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); } - q1 = __bid_nr_digits[x_nr_bits - 1].digits; + q1 = nr_digits[x_nr_bits - 1].digits; if (q1 == 0) { - q1 = __bid_nr_digits[x_nr_bits - 1].digits1; - if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi || - (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi && - C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo)) + q1 = nr_digits[x_nr_bits - 1].digits1; + if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || + (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && + C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) q1++; } } @@ -1004,12 +1048,12 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); } - q2 = __bid_nr_digits[y_nr_bits].digits; + q2 = nr_digits[y_nr_bits].digits; if (q2 == 0) { - q2 = __bid_nr_digits[y_nr_bits].digits1; - if (C2.w[1] > __bid_nr_digits[y_nr_bits].threshold_hi || - (C2.w[1] == __bid_nr_digits[y_nr_bits].threshold_hi && - C2.w[0] >= __bid_nr_digits[y_nr_bits].threshold_lo)) + q2 = nr_digits[y_nr_bits].digits1; + if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi || + (C2.w[1] == nr_digits[y_nr_bits].threshold_hi && + C2.w[0] >= nr_digits[y_nr_bits].threshold_lo)) q2++; } } @@ -1038,19 +1082,19 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); } - q3 = __bid_nr_digits[z_nr_bits].digits; + q3 = nr_digits[z_nr_bits].digits; if (q3 == 0) { - q3 = __bid_nr_digits[z_nr_bits].digits1; - if (C3.w[1] > __bid_nr_digits[z_nr_bits].threshold_hi || - (C3.w[1] == __bid_nr_digits[z_nr_bits].threshold_hi && - C3.w[0] >= __bid_nr_digits[z_nr_bits].threshold_lo)) + q3 = nr_digits[z_nr_bits].digits1; + if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi || + (C3.w[1] == nr_digits[z_nr_bits].threshold_hi && + C3.w[0] >= nr_digits[z_nr_bits].threshold_lo)) q3++; } } - if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) || + if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) { - // x = 0 or y = 0 + // x = 0 or y = 0 // z = f, necessarily; for 0 + z return z, with the preferred exponent // the result is z, but need to get the preferred exponent if (z_exp <= p_exp) { // the preferred exponent is z_exp @@ -1068,20 +1112,25 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z res.w[0] = z.w[0]; } else if (q3 <= 19) { // z fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits - // 64 x 64 C3.w[0] * __bid_ten2k64[scale] - __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]); + // 64 x 64 C3.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); } else { // 10^scale fits in 128 bits - // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20] - __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]); + // 64 x 128 C3.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); } } else { // z fits in 128 bits, but 10^scale must fit in 64 bits - // 64 x 128 __bid_ten2k64[scale] * C3 - __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3); + // 64 x 128 ten2k64[scale] * C3 + __mul_128x64_to_128 (res, ten2k64[scale], C3); } // subtract scale from the exponent z_exp = z_exp - ((UINT64) scale << 49); res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } else { ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f @@ -1101,7 +1150,7 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits C4.w[0] = C1.w[0] * C2.w[0]; // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 - if (C4.w[0] < __bid_ten2k64[q1 + q2 - 1]) + if (C4.w[0] < ten2k64[q1 + q2 - 1]) q4 = q1 + q2 - 1; // q4 in [1, 18] else q4 = q1 + q2; // q4 in [2, 19] @@ -1110,7 +1159,7 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]); // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20 - if (C4.w[1] == 0 && C4.w[0] < __bid_ten2k64[19]) { // 19 = q1+q2-1 + if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; q4 = 19; // 19 = q1 + q2 - 1 } else { @@ -1130,9 +1179,9 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z __mul_128x64_to_128 (C4, C2.w[0], C1); } // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 - if (C4.w[1] < __bid_ten2k128[q1 + q2 - 21].w[1] || - (C4.w[1] == __bid_ten2k128[q1 + q2 - 21].w[1] && - C4.w[0] < __bid_ten2k128[q1 + q2 - 21].w[0])) { + if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] || + (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] && + C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) { // if (C4.w[1] == 0) // q4 = 20, necessarily // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; // else @@ -1147,8 +1196,9 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z // may replace this by 128x128_to192 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0 // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39 - if (C4.w[2] == 0 && (C4.w[1] < __bid_ten2k128[18].w[1] || - (C4.w[1] == __bid_ten2k128[18].w[1] && C4.w[0] < __bid_ten2k128[18].w[0]))) { + if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] || + (C4.w[1] == ten2k128[18].w[1] + && C4.w[0] < ten2k128[18].w[0]))) { // 18 = 38 - 20 = q1+q2-1 - 20 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; q4 = 38; // 38 = q1 + q2 - 1 @@ -1175,11 +1225,11 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 } // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 - if (C4.w[2] < __bid_ten2k256[q1 + q2 - 40].w[2] || - (C4.w[2] == __bid_ten2k256[q1 + q2 - 40].w[2] && - (C4.w[1] < __bid_ten2k256[q1 + q2 - 40].w[1] || - (C4.w[1] == __bid_ten2k256[q1 + q2 - 40].w[1] && - C4.w[0] < __bid_ten2k256[q1 + q2 - 40].w[0])))) { + if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || + (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && + (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || + (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && + C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) { // if (C4.w[2] == 0) // q4 = 39, necessarily // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; // else @@ -1200,10 +1250,12 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z __mul_128x128_to_256 (C4, C1, C2); } // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58 - if (C4.w[3] == 0 && (C4.w[2] < __bid_ten2k256[18].w[2] || - (C4.w[2] == __bid_ten2k256[18].w[2] && (C4.w[1] < __bid_ten2k256[18].w[1] || - (C4.w[1] == __bid_ten2k256[18].w[1] && C4.w[0] < __bid_ten2k256[18].w[0]))))) { - // 18 = 57 - 39 = q1+q2-1 - 39 + if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] || + (C4.w[2] == ten2k256[18].w[2] + && (C4.w[1] < ten2k256[18].w[1] + || (C4.w[1] == ten2k256[18].w[1] + && C4.w[0] < ten2k256[18].w[0]))))) { + // 18 = 57 - 39 = q1+q2-1 - 39 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; q4 = 57; // 57 = q1 + q2 - 1 } else { @@ -1221,13 +1273,13 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 - if (C4.w[3] < __bid_ten2k256[q1 + q2 - 40].w[3] || - (C4.w[3] == __bid_ten2k256[q1 + q2 - 40].w[3] && - (C4.w[2] < __bid_ten2k256[q1 + q2 - 40].w[2] || - (C4.w[2] == __bid_ten2k256[q1 + q2 - 40].w[2] && - (C4.w[1] < __bid_ten2k256[q1 + q2 - 40].w[1] || - (C4.w[1] == __bid_ten2k256[q1 + q2 - 40].w[1] && - C4.w[0] < __bid_ten2k256[q1 + q2 - 40].w[0])))))) { + if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] || + (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] && + (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || + (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && + (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || + (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && + C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) { // if (C4.w[3] == 0) // q4 = 58, necessarily // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; // else @@ -1251,25 +1303,25 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z if (q4 <= 38) { P128.w[1] = C4.w[1]; P128.w[0] = C4.w[0]; - __bid_round128_19_38 (q4, x0, P128, &res, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round128_19_38 (q4, x0, P128, &res, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); } else if (q4 <= 57) { // 35 <= q4 <= 57 P192.w[2] = C4.w[2]; P192.w[1] = C4.w[1]; P192.w[0] = C4.w[0]; - __bid_round192_39_57 (q4, x0, P192, &R192, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round192_39_57 (q4, x0, P192, &R192, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); res.w[0] = R192.w[0]; res.w[1] = R192.w[1]; } else { // if (q4 <= 68) - __bid_round256_58_76 (q4, x0, C4, &R256, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round256_58_76 (q4, x0, C4, &R256, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); res.w[0] = R256.w[0]; res.w[1] = R256.w[1]; } @@ -1290,15 +1342,15 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z // res = (C4 * 10^scale) * 10^expmax if (q4 <= 19) { // C4 fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits - // 64 x 64 C4.w[0] * __bid_ten2k64[scale] - __mul_64x64_to_128MACH (res, C4.w[0], __bid_ten2k64[scale]); + // 64 x 64 C4.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]); } else { // 10^scale fits in 128 bits - // 64 x 128 C4.w[0] * __bid_ten2k128[scale - 20] - __mul_128x64_to_128 (res, C4.w[0], __bid_ten2k128[scale - 20]); + // 64 x 128 C4.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]); } } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits - // 64 x 128 __bid_ten2k64[scale] * CC43 - __mul_128x64_to_128 (res, __bid_ten2k64[scale], C4); + // 64 x 128 ten2k64[scale] * CC43 + __mul_128x64_to_128 (res, ten2k64[scale], C4); } e4 = e4 - scale; // expmax q4 = q4 + scale; @@ -1320,12 +1372,17 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z } else { res.w[1] = p_sign | res.w[1]; rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, - is_midpoint_lt_even, is_midpoint_gt_even, - e4, &res, pfpsf); + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e4, &res, pfpsf); } *pfpsf |= save_fpsf; + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } // check for underflow @@ -1345,13 +1402,13 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z // the number of decimal digits in res is q4 if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17 - __bid_round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); if (incr_exp) { // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 - R64 = __bid_ten2k64[q4 - x0]; + R64 = ten2k64[q4 - x0]; } // res.w[1] = 0; (from above) res.w[0] = R64; @@ -1359,19 +1416,19 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z // 19 <= q4 <= 38 P128.w[1] = res.w[1]; P128.w[0] = res.w[0]; - __bid_round128_19_38 (q4, x0, P128, &res, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round128_19_38 (q4, x0, P128, &res, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); if (incr_exp) { // increase coefficient by a factor of 10; this will be <= 10^33 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 - // res.w[1] = 0; - res.w[0] = __bid_ten2k64[q4 - x0]; + // res.w[1] = 0; + res.w[0] = ten2k64[q4 - x0]; } else { // 20 <= q4 - x0 <= 37 - res.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0]; - res.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1]; + res.w[0] = ten2k128[q4 - x0 - 20].w[0]; + res.w[1] = ten2k128[q4 - x0 - 20].w[1]; } } } @@ -1380,10 +1437,10 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin // determine relationship with 1/2 ulp if (q4 <= 19) { - if (res.w[0] < __bid_midpoint64[q4 - 1]) { // < 1/2 ulp + if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp lt_half_ulp = 1; is_inexact_lt_midpoint = 1; - } else if (res.w[0] == __bid_midpoint64[q4 - 1]) { // = 1/2 ulp + } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp eq_half_ulp = 1; is_midpoint_gt_even = 1; } else { // > 1/2 ulp @@ -1391,13 +1448,13 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z is_inexact_gt_midpoint = 1; } } else { // if (q4 <= 34) - if (res.w[1] < __bid_midpoint128[q4 - 20].w[1] || - (res.w[1] == __bid_midpoint128[q4 - 20].w[1] && - res.w[0] < __bid_midpoint128[q4 - 20].w[0])) { // < 1/2 ulp + if (res.w[1] < midpoint128[q4 - 20].w[1] || + (res.w[1] == midpoint128[q4 - 20].w[1] && + res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp lt_half_ulp = 1; is_inexact_lt_midpoint = 1; - } else if (res.w[1] == __bid_midpoint128[q4 - 20].w[1] && - res.w[0] == __bid_midpoint128[q4 - 20].w[0]) { // = 1/2 ulp + } else if (res.w[1] == midpoint128[q4 - 20].w[1] && + res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp eq_half_ulp = 1; is_midpoint_gt_even = 1; } else { // > 1/2 ulp @@ -1424,7 +1481,7 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z } // avoid a double rounding error if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && - is_midpoint_lt_even) { // double rounding error upward + is_midpoint_lt_even) { // double rounding error upward // res = res - 1 res.w[0]--; if (res.w[0] == 0xffffffffffffffffull) @@ -1444,7 +1501,7 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z is_midpoint_gt_even = 0; is_inexact_gt_midpoint = 1; } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && - !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { // if this second rounding was exact the result may still be // inexact because of the first rounding if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { @@ -1454,14 +1511,14 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z is_inexact_lt_midpoint = 1; } } else if (is_midpoint_gt_even && - (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { + (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { // pulled up to a midpoint is_inexact_lt_midpoint = 1; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else if (is_midpoint_lt_even && - (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { + (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { // pulled down to a midpoint is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; @@ -1483,20 +1540,21 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z ; // res and e4 are unchanged } else if (q4 <= 19) { // C4 fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits - // 64 x 64 res.w[0] * __bid_ten2k64[scale] - __mul_64x64_to_128MACH (res, res.w[0], __bid_ten2k64[scale]); + // 64 x 64 res.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]); } else { // 10^scale fits in 128 bits - // 64 x 128 res.w[0] * __bid_ten2k128[scale - 20] - __mul_128x64_to_128 (res, res.w[0], __bid_ten2k128[scale - 20]); + // 64 x 128 res.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]); } } else { // res fits in 128 bits, but 10^scale must fit in 64 bits - // 64 x 128 __bid_ten2k64[scale] * C3 - __mul_128x64_to_128 (res, __bid_ten2k64[scale], res); + // 64 x 128 ten2k64[scale] * C3 + __mul_128x64_to_128 (res, ten2k64[scale], res); } // subtract scale from the exponent e4 = e4 - scale; } } + // check for inexact result if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || is_midpoint_lt_even || is_midpoint_gt_even) { @@ -1507,29 +1565,44 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, - is_midpoint_lt_even, is_midpoint_gt_even, - e4, &res, pfpsf); + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e4, &res, pfpsf); } *pfpsf |= save_fpsf; + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } + // no overflow, and no underflow for rounding to nearest + res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; + + if (rnd_mode != ROUNDING_TO_NEAREST) { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e4, &res, pfpsf); + // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ) + if (e4 == expmin) { + if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || + ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && + res.w[0] < 0x38c15b0a00000000ull)) { + is_tiny = 1; + } + } + } - // no overflow and no underflow if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || is_midpoint_lt_even || is_midpoint_gt_even) { // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; - } - res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; - - if (rnd_mode != ROUNDING_TO_NEAREST) { - rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, - is_midpoint_lt_even, is_midpoint_gt_even, - e4, &res, pfpsf); + if (is_tiny) + *pfpsf |= UNDERFLOW_EXCEPTION; } if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact @@ -1554,21 +1627,26 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z ; // leave res unchanged } else if (q4 <= 19) { // x * y fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits - // 64 x 64 C3.w[0] * __bid_ten2k64[scale] - __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]); + // 64 x 64 C3.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); } else { // 10^scale fits in 128 bits - // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20] - __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]); + // 64 x 128 C3.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); } res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits - // 64 x 128 __bid_ten2k64[scale] * C3 - __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3); + // 64 x 128 ten2k64[scale] * C3 + __mul_128x64_to_128 (res, ten2k64[scale], C3); res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; } } // else leave the result as it is, because p_exp <= z_exp } *pfpsf |= save_fpsf; + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } // else we have f * f + f @@ -1578,7 +1656,7 @@ __bid128_fma (UINT128 x, UINT128 y, UINT128 z delta_ge_zero: if (delta >= 0) { - if (p34 <= delta - 1 || // Case (1') + if (p34 <= delta - 1 || // Case (1') (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A) // check for overflow, which can occur only in Case (1') if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) { @@ -1602,27 +1680,32 @@ delta_ge_zero: } else { if (q3 <= 19) { // C3 fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits - // 64 x 64 C3.w[0] * __bid_ten2k64[scale] - __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]); + // 64 x 64 C3.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); } else { // 10^scale fits in 128 bits - // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20] - __mul_128x64_to_128 (res, C3.w[0], - __bid_ten2k128[scale - 20]); + // 64 x 128 C3.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, C3.w[0], + ten2k128[scale - 20]); } } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits - // 64 x 128 __bid_ten2k64[scale] * C3 - __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3); + // 64 x 128 ten2k64[scale] * C3 + __mul_128x64_to_128 (res, ten2k64[scale], C3); } // the coefficient in res has q3 + scale = p34 digits } e3 = e3 - scale; res.w[1] = z_sign | res.w[1]; rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, - is_midpoint_lt_even, is_midpoint_gt_even, - e3, &res, pfpsf); + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e3, &res, pfpsf); } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } // res = z @@ -1638,15 +1721,15 @@ delta_ge_zero: res.w[0] = C3.w[0]; } else if (q3 <= 19) { // z fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits - // 64 x 64 C3.w[0] * __bid_ten2k64[scale] - __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]); + // 64 x 64 C3.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); } else { // 10^scale fits in 128 bits - // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20] - __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]); + // 64 x 128 C3.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); } } else { // z fits in 128 bits, but 10^scale must fit in 64 bits - // 64 x 128 __bid_ten2k64[scale] * C3 - __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3); + // 64 x 128 ten2k64[scale] * C3 + __mul_128x64_to_128 (res, ten2k64[scale], C3); } // the coefficient in res has q3 + scale digits // subtract scale from the exponent @@ -1667,11 +1750,11 @@ delta_ge_zero: if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) { // there is a gap of exactly one digit between the scaled C3 and C4 // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case - if ((q3 <= 19 && C3.w[0] != __bid_ten2k64[q3 - 1]) || - (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != __bid_ten2k64[19])) || - (q3 >= 21 && (C3.w[1] != __bid_ten2k128[q3 - 21].w[1] || - C3.w[0] != __bid_ten2k128[q3 - 21].w[0]))) { - // C3 * 10^ scale != 10^(q3-1) + if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) || + (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) || + (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] || + C3.w[0] != ten2k128[q3 - 21].w[0]))) { + // C3 * 10^ scale != 10^(q3-1) // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull || // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value @@ -1684,35 +1767,35 @@ delta_ge_zero: // if q4 > 1 then truncate C4 from q4 digits to 1 digit; // x = q4-1, 1 <= x <= 67 and check if this operation is exact if (q4 <= 18) { // 2 <= q4 <= 18 - __bid_round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); } else if (q4 <= 38) { P128.w[1] = C4.w[1]; P128.w[0] = C4.w[0]; - __bid_round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, - &is_midpoint_lt_even, - &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, + &is_midpoint_lt_even, + &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); R64 = R128.w[0]; // one decimal digit } else if (q4 <= 57) { P192.w[2] = C4.w[2]; P192.w[1] = C4.w[1]; P192.w[0] = C4.w[0]; - __bid_round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, - &is_midpoint_lt_even, - &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, + &is_midpoint_lt_even, + &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); R64 = R192.w[0]; // one decimal digit } else { // if (q4 <= 68) - __bid_round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, - &is_midpoint_lt_even, - &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, + &is_midpoint_lt_even, + &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); R64 = R256.w[0]; // one decimal digit } if (incr_exp) { @@ -1725,7 +1808,7 @@ delta_ge_zero: is_midpoint_lt_even = 1; is_midpoint_gt_even = 0; } else if ((e3 == expmin) || - R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) { + R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) { // result does not change is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; @@ -1739,10 +1822,10 @@ delta_ge_zero: // result decremented is 10^(q3+scale) - 1 if ((q3 + scale) <= 19) { res.w[1] = 0; - res.w[0] = __bid_ten2k64[q3 + scale]; + res.w[0] = ten2k64[q3 + scale]; } else { // if ((q3 + scale + 1) <= 35) - res.w[1] = __bid_ten2k128[q3 + scale - 20].w[1]; - res.w[0] = __bid_ten2k128[q3 + scale - 20].w[0]; + res.w[1] = ten2k128[q3 + scale - 20].w[1]; + res.w[0] = ten2k128[q3 + scale - 20].w[0]; } res.w[0] = res.w[0] - 1; // borrow never occurs z_exp = z_exp - EXP_P1; @@ -1756,7 +1839,7 @@ delta_ge_zero: *pfpsf |= UNDERFLOW_EXCEPTION; } } - } // end 10^(q3+scale-1) + } // end 10^(q3+scale-1) // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } else { @@ -1799,22 +1882,26 @@ delta_ge_zero: // endif // endif // endif - if (((e3 == expmin) && ((q3 + scale) < p34)) || - ((e3 == expmin) && ((q3 + scale) == p34) && - ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull) && // 10^33_high - (res.w[0] == 0x38c15b0a00000000ull) && // 10^33_low - (z_sign != p_sign) && - ((!z_sign && rnd_mode != ROUNDING_UP) || + if ((e3 == expmin && (q3 + scale) < p34) || + (e3 == expmin && (q3 + scale) == p34 && + (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && // 10^33_high + res.w[0] == 0x38c15b0a00000000ull && // 10^33_low + z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) || (z_sign && rnd_mode != ROUNDING_DOWN)))) { *pfpsf |= UNDERFLOW_EXCEPTION; } if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, - is_midpoint_lt_even, is_midpoint_gt_even, - e3, &res, pfpsf); + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e3, &res, pfpsf); } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } else if (p34 == delta) { // Case (1''B) @@ -1829,15 +1916,15 @@ delta_ge_zero: res.w[0] = C3.w[0]; } else if (q3 <= 19) { // z fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits - // 64 x 64 C3.w[0] * __bid_ten2k64[scale] - __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]); + // 64 x 64 C3.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); } else { // 10^scale fits in 128 bits - // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20] - __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]); + // 64 x 128 C3.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); } } else { // z fits in 128 bits, but 10^scale must fit in 64 bits - // 64 x 128 __bid_ten2k64[scale] * C3 - __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3); + // 64 x 128 ten2k64[scale] * C3 + __mul_128x64_to_128 (res, ten2k64[scale], C3); } // subtract scale from the exponent z_exp = z_exp - ((UINT64) scale << 49); @@ -1847,55 +1934,55 @@ delta_ge_zero: // determine whether x * y is less than, equal to, or greater than // 1/2 ulp (z) if (q4 <= 19) { - if (C4.w[0] < __bid_midpoint64[q4 - 1]) { // < 1/2 ulp + if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp lt_half_ulp = 1; - } else if (C4.w[0] == __bid_midpoint64[q4 - 1]) { // = 1/2 ulp + } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp eq_half_ulp = 1; } else { // > 1/2 ulp gt_half_ulp = 1; } } else if (q4 <= 38) { - if (C4.w[2] == 0 && (C4.w[1] < __bid_midpoint128[q4 - 20].w[1] || - (C4.w[1] == __bid_midpoint128[q4 - 20].w[1] && - C4.w[0] < __bid_midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp + if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] || + (C4.w[1] == midpoint128[q4 - 20].w[1] && + C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp lt_half_ulp = 1; - } else if (C4.w[2] == 0 && C4.w[1] == __bid_midpoint128[q4 - 20].w[1] && - C4.w[0] == __bid_midpoint128[q4 - 20].w[0]) { // = 1/2 ulp + } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] && + C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp eq_half_ulp = 1; } else { // > 1/2 ulp gt_half_ulp = 1; } } else if (q4 <= 58) { - if (C4.w[3] == 0 && (C4.w[2] < __bid_midpoint192[q4 - 39].w[2] || - (C4.w[2] == __bid_midpoint192[q4 - 39].w[2] && - C4.w[1] < __bid_midpoint192[q4 - 39].w[1]) || - (C4.w[2] == __bid_midpoint192[q4 - 39].w[2] && - C4.w[1] == __bid_midpoint192[q4 - 39].w[1] && - C4.w[0] < __bid_midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp + if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] || + (C4.w[2] == midpoint192[q4 - 39].w[2] && + C4.w[1] < midpoint192[q4 - 39].w[1]) || + (C4.w[2] == midpoint192[q4 - 39].w[2] && + C4.w[1] == midpoint192[q4 - 39].w[1] && + C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp lt_half_ulp = 1; - } else if (C4.w[3] == 0 && C4.w[2] == __bid_midpoint192[q4 - 39].w[2] && - C4.w[1] == __bid_midpoint192[q4 - 39].w[1] && - C4.w[0] == __bid_midpoint192[q4 - 39].w[0]) { // = 1/2 ulp + } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] && + C4.w[1] == midpoint192[q4 - 39].w[1] && + C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp eq_half_ulp = 1; } else { // > 1/2 ulp gt_half_ulp = 1; } } else { - if (C4.w[3] < __bid_midpoint256[q4 - 59].w[3] || - (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && - C4.w[2] < __bid_midpoint256[q4 - 59].w[2]) || - (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && - C4.w[2] == __bid_midpoint256[q4 - 59].w[2] && - C4.w[1] < __bid_midpoint256[q4 - 59].w[1]) || - (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && - C4.w[2] == __bid_midpoint256[q4 - 59].w[2] && - C4.w[1] == __bid_midpoint256[q4 - 59].w[1] && - C4.w[0] < __bid_midpoint256[q4 - 59].w[0])) { // < 1/2 ulp + if (C4.w[3] < midpoint256[q4 - 59].w[3] || + (C4.w[3] == midpoint256[q4 - 59].w[3] && + C4.w[2] < midpoint256[q4 - 59].w[2]) || + (C4.w[3] == midpoint256[q4 - 59].w[3] && + C4.w[2] == midpoint256[q4 - 59].w[2] && + C4.w[1] < midpoint256[q4 - 59].w[1]) || + (C4.w[3] == midpoint256[q4 - 59].w[3] && + C4.w[2] == midpoint256[q4 - 59].w[2] && + C4.w[1] == midpoint256[q4 - 59].w[1] && + C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp lt_half_ulp = 1; - } else if (C4.w[3] == __bid_midpoint256[q4 - 59].w[3] && - C4.w[2] == __bid_midpoint256[q4 - 59].w[2] && - C4.w[1] == __bid_midpoint256[q4 - 59].w[1] && - C4.w[0] == __bid_midpoint256[q4 - 59].w[0]) { // = 1/2 ulp + } else if (C4.w[3] == midpoint256[q4 - 59].w[3] && + C4.w[2] == midpoint256[q4 - 59].w[2] && + C4.w[1] == midpoint256[q4 - 59].w[1] && + C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp eq_half_ulp = 1; } else { // > 1/2 ulp gt_half_ulp = 1; @@ -1942,14 +2029,20 @@ delta_ge_zero: res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf res.w[0] = 0x0000000000000000ull; *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, - is_midpoint_lt_even, is_midpoint_gt_even, - e3, &res, pfpsf); + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e3, &res, pfpsf); + z_exp = res.w[1] & MASK_EXP; } } else { // if (p_sign != z_sign) // consider two cases, because C3 * 10^scale = 10^33 is a special case @@ -1985,23 +2078,29 @@ delta_ge_zero: *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); } else { rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, - is_midpoint_lt_even, - is_midpoint_gt_even, e3, &res, - pfpsf); + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, + is_midpoint_gt_even, e3, &res, + pfpsf); } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, - is_midpoint_lt_even, - is_midpoint_gt_even, e3, &res, pfpsf); + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, + is_midpoint_gt_even, e3, &res, pfpsf); } + z_exp = res.w[1] & MASK_EXP; } else { // if C3 * 10^scale = 10^33 e3 = (z_exp >> 49) - 6176; if (e3 > expmin) { @@ -2019,118 +2118,122 @@ delta_ge_zero: // if q4 > 1 then truncate C4 from q4 digits to 1 digit; // x = q4-1, 1 <= x <= 67 and check if this operation is exact if (q4 <= 18) { // 2 <= q4 <= 18 - __bid_round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, - &is_midpoint_lt_even, - &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, + &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); } else if (q4 <= 38) { - P128.w[1] = C4.w[1]; - P128.w[0] = C4.w[0]; - __bid_round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, - &is_midpoint_lt_even, - &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); - R64 = R128.w[0]; // one decimal digit + P128.w[1] = C4.w[1]; + P128.w[0] = C4.w[0]; + round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, + &is_midpoint_lt_even, + &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + R64 = R128.w[0]; // one decimal digit } else if (q4 <= 57) { - P192.w[2] = C4.w[2]; - P192.w[1] = C4.w[1]; - P192.w[0] = C4.w[0]; - __bid_round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, - &is_midpoint_lt_even, - &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); - R64 = R192.w[0]; // one decimal digit + P192.w[2] = C4.w[2]; + P192.w[1] = C4.w[1]; + P192.w[0] = C4.w[0]; + round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, + &is_midpoint_lt_even, + &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + R64 = R192.w[0]; // one decimal digit } else { // if (q4 <= 68) - __bid_round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, - &is_midpoint_lt_even, - &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); - R64 = R256.w[0]; // one decimal digit + round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, + &is_midpoint_lt_even, + &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + R64 = R256.w[0]; // one decimal digit } if (!is_midpoint_lt_even && !is_midpoint_gt_even && - !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { - // the result is exact: 10^34 - R64 - // incr_exp = 0 with certainty - z_exp = z_exp - EXP_P1; - e3 = e3 - 1; - res.w[1] = - z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull; - res.w[0] = 0x378d8e6400000000ull - R64; + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + // the result is exact: 10^34 - R64 + // incr_exp = 0 with certainty + z_exp = z_exp - EXP_P1; + e3 = e3 - 1; + res.w[1] = + z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull; + res.w[0] = 0x378d8e6400000000ull - R64; } else { - // We want R64 to be the top digit of C4, but we actually - // obtained (C4 * 10^(-q4+1))RN; a correction may be needed, - // because the top digit is (C4 * 10^(-q4+1))RZ - // however, if incr_exp = 1 then R64 = 10 with certainty - if (incr_exp) { - R64 = 10; - } - // the result is inexact as C4 has more than 1 significant digit - // and C3 * 10^scale = 10^33 - // example of case that is treated here: - // 100...0 * 10^e3 - 0.41 * 10^e3 = - // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1) - // note that (e3 > expmin} - // in order to round, subtract R64 from 10^34 and then compare - // C4 - R64 * 10^(q4-1) with 1/2 ulp - // calculate 10^34 - R64 - res.w[1] = 0x0001ed09bead87c0ull; - res.w[0] = 0x378d8e6400000000ull - R64; - z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand - // calculate C4 - R64 * 10^(q4-1); this is a rare case and - // R64 is small, 1 <= R64 <= 9 - e3 = e3 - 1; - if (is_inexact_lt_midpoint) { - is_inexact_lt_midpoint = 0; - is_inexact_gt_midpoint = 1; - } else if (is_inexact_gt_midpoint) { - is_inexact_gt_midpoint = 0; - is_inexact_lt_midpoint = 1; - } else if (is_midpoint_lt_even) { - is_midpoint_lt_even = 0; - is_midpoint_gt_even = 1; - } else if (is_midpoint_gt_even) { - is_midpoint_gt_even = 0; - is_midpoint_lt_even = 1; - } else { - ; - } - // the result is always inexact, and never tiny - // check for overflow for RN - if (e3 > expmax) { - if (rnd_mode == ROUNDING_TO_NEAREST) { - res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf - res.w[0] = 0x0000000000000000ull; - *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); - } else { - rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, - is_midpoint_lt_even, - is_midpoint_gt_even, e3, &res, - pfpsf); - } - BID_RETURN (res) - } - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - res.w[1] = - z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; - if (rnd_mode != ROUNDING_TO_NEAREST) { - rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, - is_midpoint_lt_even, - is_midpoint_gt_even, e3, &res, - pfpsf); - } - z_exp = res.w[1] & MASK_EXP; - e3 = (z_exp >> 49) - 6176; - } // end result is inexact - } // end q4 > 1 + // We want R64 to be the top digit of C4, but we actually + // obtained (C4 * 10^(-q4+1))RN; a correction may be needed, + // because the top digit is (C4 * 10^(-q4+1))RZ + // however, if incr_exp = 1 then R64 = 10 with certainty + if (incr_exp) { + R64 = 10; + } + // the result is inexact as C4 has more than 1 significant digit + // and C3 * 10^scale = 10^33 + // example of case that is treated here: + // 100...0 * 10^e3 - 0.41 * 10^e3 = + // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1) + // note that (e3 > expmin} + // in order to round, subtract R64 from 10^34 and then compare + // C4 - R64 * 10^(q4-1) with 1/2 ulp + // calculate 10^34 - R64 + res.w[1] = 0x0001ed09bead87c0ull; + res.w[0] = 0x378d8e6400000000ull - R64; + z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand + // calculate C4 - R64 * 10^(q4-1); this is a rare case and + // R64 is small, 1 <= R64 <= 9 + e3 = e3 - 1; + if (is_inexact_lt_midpoint) { + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 1; + } else if (is_inexact_gt_midpoint) { + is_inexact_gt_midpoint = 0; + is_inexact_lt_midpoint = 1; + } else if (is_midpoint_lt_even) { + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 1; + } else if (is_midpoint_gt_even) { + is_midpoint_gt_even = 0; + is_midpoint_lt_even = 1; + } else { + ; + } + // the result is always inexact, and never tiny + // check for overflow for RN + if (e3 > expmax) { + if (rnd_mode == ROUNDING_TO_NEAREST) { + res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf + res.w[0] = 0x0000000000000000ull; + *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); + } else { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, + is_midpoint_gt_even, e3, &res, + pfpsf); + } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + res.w[1] = + z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; + if (rnd_mode != ROUNDING_TO_NEAREST) { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, + is_midpoint_gt_even, e3, &res, + pfpsf); + } + z_exp = res.w[1] & MASK_EXP; + } // end result is inexact + } // end q4 > 1 } else { // if (e3 = emin) // if e3 = expmin the result is also tiny (the condition for // tininess is C4 > 050...0 [q4 digits] which is met because @@ -2161,27 +2264,33 @@ delta_ge_zero: if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, - is_midpoint_lt_even, - is_midpoint_gt_even, e3, &res, - pfpsf); + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, + is_midpoint_gt_even, e3, &res, + pfpsf); + z_exp = res.w[1] & MASK_EXP; } - } // end e3 = emin + } // end e3 = emin // set the inexact flag (if the result was not exact) if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || is_midpoint_lt_even || is_midpoint_gt_even) *pfpsf |= INEXACT_EXCEPTION; - } // end 10^33 - } // end if (p_sign != z_sign) + } // end 10^33 + } // end if (p_sign != z_sign) res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) (q3 <= delta && delta + q4 <= p34) || // Case (3) (delta < q3 && p34 < delta + q4) || // Case (4) (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5) - (delta + q4 < q3)) &&// Case (6) + (delta + q4 < q3)) && // Case (6) !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6) // the result has the sign of z @@ -2200,15 +2309,15 @@ delta_ge_zero: scale = q3 - delta - q4; // 1 <= scale <= 33 if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits - // 64 x 64 C4.w[0] * __bid_ten2k64[scale] - __mul_64x64_to_128MACH (P128, C4.w[0], __bid_ten2k64[scale]); + // 64 x 64 C4.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]); } else { // 10^scale fits in 128 bits - // 64 x 128 C4.w[0] * __bid_ten2k128[scale - 20] - __mul_128x64_to_128 (P128, C4.w[0], __bid_ten2k128[scale - 20]); + // 64 x 128 C4.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]); } } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits - // 64 x 128 __bid_ten2k64[scale] * C4 - __mul_128x64_to_128 (P128, __bid_ten2k64[scale], C4); + // 64 x 128 ten2k64[scale] * C4 + __mul_128x64_to_128 (P128, ten2k64[scale], C4); } C4.w[0] = P128.w[0]; C4.w[1] = P128.w[1]; @@ -2237,15 +2346,15 @@ delta_ge_zero: res.w[0] = C3.w[0]; } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits if (scale <= 19) { // 10^scale fits in 64 bits - // 64 x 64 C3.w[0] * __bid_ten2k64[scale] - __mul_64x64_to_128MACH (res, C3.w[0], __bid_ten2k64[scale]); + // 64 x 64 C3.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); } else { // 10^scale fits in 128 bits - // 64 x 128 C3.w[0] * __bid_ten2k128[scale - 20] - __mul_128x64_to_128 (res, C3.w[0], __bid_ten2k128[scale - 20]); + // 64 x 128 C3.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); } } else { // z fits in 128 bits, but 10^scale must fit in 64 bits - // 64 x 128 __bid_ten2k64[scale] * C3 - __mul_128x64_to_128 (res, __bid_ten2k64[scale], C3); + // 64 x 128 ten2k64[scale] * C3 + __mul_128x64_to_128 (res, ten2k64[scale], C3); } // e3 is already calculated e3 = e3 - scale; @@ -2260,17 +2369,17 @@ delta_ge_zero: // x0 = delta + q4 - p34 (calculated before reaching case2_repeat) // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34 if (x0 == 0) { // this could happen only if we return to case2_repeat, or - // for Case (6) + // for Case (3) or Case (6) R128.w[1] = C4.w[1]; R128.w[0] = C4.w[0]; } else if (q4 <= 18) { // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17 - __bid_round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); if (incr_exp) { // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 - R64 = __bid_ten2k64[q4 - x0]; + R64 = ten2k64[q4 - x0]; } R128.w[1] = 0; R128.w[0] = R64; @@ -2278,18 +2387,18 @@ delta_ge_zero: // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37 P128.w[1] = C4.w[1]; P128.w[0] = C4.w[0]; - __bid_round128_19_38 (q4, x0, P128, &R128, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round128_19_38 (q4, x0, P128, &R128, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); if (incr_exp) { // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 - R128.w[0] = __bid_ten2k64[q4 - x0]; + R128.w[0] = ten2k64[q4 - x0]; // R128.w[1] stays 0 } else { // 20 <= q4 - x0 <= 37 - R128.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0]; - R128.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1]; + R128.w[0] = ten2k128[q4 - x0 - 20].w[0]; + R128.w[1] = ten2k128[q4 - x0 - 20].w[1]; } } } else if (q4 <= 57) { @@ -2297,20 +2406,20 @@ delta_ge_zero: P192.w[2] = C4.w[2]; P192.w[1] = C4.w[1]; P192.w[0] = C4.w[0]; - __bid_round192_39_57 (q4, x0, P192, &R192, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round192_39_57 (q4, x0, P192, &R192, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); // R192.w[2] is always 0 if (incr_exp) { // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 - R192.w[0] = __bid_ten2k64[q4 - x0]; + R192.w[0] = ten2k64[q4 - x0]; // R192.w[1] stays 0 // R192.w[2] stays 0 } else { // 20 <= q4 - x0 <= 33 - R192.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0]; - R192.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1]; + R192.w[0] = ten2k128[q4 - x0 - 20].w[0]; + R192.w[1] = ten2k128[q4 - x0 - 20].w[1]; // R192.w[2] stays 0 } } @@ -2318,21 +2427,21 @@ delta_ge_zero: R128.w[0] = R192.w[0]; } else { // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67 - __bid_round256_58_76 (q4, x0, C4, &R256, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round256_58_76 (q4, x0, C4, &R256, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); // R256.w[3] and R256.w[2] are always 0 if (incr_exp) { // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 - R256.w[0] = __bid_ten2k64[q4 - x0]; + R256.w[0] = ten2k64[q4 - x0]; // R256.w[1] stays 0 // R256.w[2] stays 0 // R256.w[3] stays 0 } else { // 20 <= q4 - x0 <= 33 - R256.w[0] = __bid_ten2k128[q4 - x0 - 20].w[0]; - R256.w[1] = __bid_ten2k128[q4 - x0 - 20].w[1]; + R256.w[0] = ten2k128[q4 - x0 - 20].w[0]; + R256.w[1] = ten2k128[q4 - x0 - 20].w[1]; // R256.w[2] stays 0 // R256.w[3] stays 0 } @@ -2352,7 +2461,7 @@ delta_ge_zero: // if res > 10^34 - 1 need to increase x0 and decrease scale by 1 if (res.w[1] > 0x0001ed09bead87c0ull || (res.w[1] == 0x0001ed09bead87c0ull && - res.w[0] > 0x378d8e63ffffffffull)) { + res.w[0] > 0x378d8e63ffffffffull)) { // avoid double rounding error is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; @@ -2364,10 +2473,10 @@ delta_ge_zero: is_midpoint_gt_even = 0; P128.w[1] = res.w[1]; P128.w[0] = res.w[0]; - __bid_round128_19_38 (35, 1, P128, &res, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round128_19_38 (35, 1, P128, &res, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); // incr_exp is 0 with certainty in this case // avoid a double rounding error if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && @@ -2391,8 +2500,8 @@ delta_ge_zero: is_midpoint_gt_even = 0; is_inexact_gt_midpoint = 1; } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && - !is_inexact_lt_midpoint - && !is_inexact_gt_midpoint) { + !is_inexact_lt_midpoint + && !is_inexact_gt_midpoint) { // if this second rounding was exact the result may still be // inexact because of the first rounding if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { @@ -2402,14 +2511,16 @@ delta_ge_zero: is_inexact_lt_midpoint = 1; } } else if (is_midpoint_gt_even && - (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { + (is_inexact_gt_midpoint0 + || is_midpoint_lt_even0)) { // pulled up to a midpoint is_inexact_lt_midpoint = 1; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else if (is_midpoint_lt_even && - (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { + (is_inexact_lt_midpoint0 + || is_midpoint_gt_even0)) { // pulled down to a midpoint is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; @@ -2423,7 +2534,7 @@ delta_ge_zero: if (!is_midpoint_lt_even && !is_midpoint_gt_even && !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || - is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { + is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { is_inexact_lt_midpoint = 1; } } @@ -2438,14 +2549,14 @@ delta_ge_zero: is_midpoint_lt_even = 1; res.w[0]++; if (res.w[0] == 0x0) - res.w[1]++; + res.w[1]++; // check for rounding overflow if (res.w[1] == 0x0001ed09bead87c0ull && - res.w[0] == 0x378d8e6400000000ull) { - // res = 10^34 => rounding overflow - res.w[1] = 0x0000314dc6448d93ull; - res.w[0] = 0x38c15b0a00000000ull; // 10^33 - e3++; + res.w[0] == 0x378d8e6400000000ull) { + // res = 10^34 => rounding overflow + res.w[1] = 0x0000314dc6448d93ull; + res.w[0] = 0x38c15b0a00000000ull; // 10^33 + e3++; } } else if (is_midpoint_lt_even) { // res = res - 1 @@ -2453,18 +2564,23 @@ delta_ge_zero: is_midpoint_gt_even = 1; res.w[0]--; if (res.w[0] == 0xffffffffffffffffull) - res.w[1]--; + res.w[1]--; // if the result is pure zero, the sign depends on the rounding // mode (x*y and z had opposite signs) if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { - if (rnd_mode != ROUNDING_DOWN) - z_sign = 0x0000000000000000ull; - else - z_sign = 0x8000000000000000ull; - // the exponent is max (e3, expmin) - res.w[1] = 0x0; - res.w[0] = 0x0; - BID_RETURN (res) + if (rnd_mode != ROUNDING_DOWN) + z_sign = 0x0000000000000000ull; + else + z_sign = 0x8000000000000000ull; + // the exponent is max (e3, expmin) + res.w[1] = 0x0; + res.w[0] = 0x0; + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) } } else { ; @@ -2482,11 +2598,13 @@ delta_ge_zero: res.w[1]--; // borrow // if res < 10^33 and exp > expmin need to decrease x0 and // increase scale by 1 - if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull || - (res.w[1] == 0x0000314dc6448d93ull && - res.w[0] < 0x38c15b0a00000000ull)) || - (is_inexact_lt_midpoint && res.w[1] == 0x0000314dc6448d93ull && - res.w[0] == 0x38c15b0a00000000ull)) && x0 >= 1) { + if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull || + (res.w[1] == 0x0000314dc6448d93ull && + res.w[0] < 0x38c15b0a00000000ull)) || + (is_inexact_lt_midpoint + && res.w[1] == 0x0000314dc6448d93ull + && res.w[0] == 0x38c15b0a00000000ull)) + && x0 >= 1) { x0 = x0 - 1; // first restore e3, otherwise it will be too small e3 = e3 + scale; @@ -2498,7 +2616,7 @@ delta_ge_zero: incr_exp = 0; goto case2_repeat; } - // else this is the result rounded with unbounded exponent + // else this is the result rounded with unbounded exponent; // because the result has opposite sign to that of C4 which was // rounded, need to change the rounding indicators if (is_inexact_lt_midpoint) { @@ -2525,7 +2643,7 @@ delta_ge_zero: res.w[1]++; // check for rounding overflow if (res.w[1] == 0x0001ed09bead87c0ull && - res.w[0] == 0x378d8e6400000000ull) { + res.w[0] == 0x378d8e6400000000ull) { // res = 10^34 => rounding overflow res.w[1] = 0x0000314dc6448d93ull; res.w[0] = 0x38c15b0a00000000ull; // 10^33 @@ -2540,12 +2658,17 @@ delta_ge_zero: // mode (x*y and z had opposite signs) if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { if (rnd_mode != ROUNDING_DOWN) - z_sign = 0x0000000000000000ull; + z_sign = 0x0000000000000000ull; else - z_sign = 0x8000000000000000ull; + z_sign = 0x8000000000000000ull; // the exponent is max (e3, expmin) res.w[1] = 0x0; res.w[0] = 0x0; + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } } else { @@ -2556,7 +2679,13 @@ delta_ge_zero: } } // check for underflow - if (e3 < expmin) { + if (e3 == expmin) { // and if significand < 10^33 => result is tiny + if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || + ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && + res.w[0] < 0x38c15b0a00000000ull)) { + is_tiny = 1; + } + } else if (e3 < expmin) { // the result is tiny, so we must truncate more of res is_tiny = 1; x0 = expmin - e3; @@ -2572,21 +2701,21 @@ delta_ge_zero: if (res.w[1] == 0x0) { // between 1 and 19 digits for (ind = 1; ind <= 19; ind++) { - if (res.w[0] < __bid_ten2k64[ind]) { + if (res.w[0] < ten2k64[ind]) { break; } } // ind digits - } else if ((res.w[1] < __bid_ten2k128[0].w[1] || - (res.w[1] == __bid_ten2k128[0].w[1] - && res.w[0] < __bid_ten2k128[0].w[0]))) { + } else if (res.w[1] < ten2k128[0].w[1] || + (res.w[1] == ten2k128[0].w[1] + && res.w[0] < ten2k128[0].w[0])) { // 20 digits ind = 20; } else { // between 21 and 38 digits for (ind = 1; ind <= 18; ind++) { - if (res.w[1] < __bid_ten2k128[ind].w[1] || - (res.w[1] == __bid_ten2k128[ind].w[1] && - res.w[0] < __bid_ten2k128[ind].w[0])) { + if (res.w[1] < ten2k128[ind].w[1] || + (res.w[1] == ten2k128[ind].w[1] && + res.w[0] < ten2k128[ind].w[0])) { break; } } @@ -2595,7 +2724,7 @@ delta_ge_zero: } // at this point ind >= x0; because delta >= 2 on this path, the case - // ind = x0 cab occur only in Case (2) or case (3), when C3 has one + // ind = x0 can occur only in Case (2) or case (3), when C3 has one // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin), // the signs of x * y and z are opposite, and through cancellation // the most significant decimal digit in res has the weight @@ -2611,13 +2740,13 @@ delta_ge_zero: is_inexact_gt_midpoint = 1; } else if (ind <= 18) { // check that 2 <= ind // 2 <= ind <= 18, 1 <= x0 <= 17 - __bid_round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); if (incr_exp) { - // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 - R64 = __bid_ten2k64[q4 - x0]; + // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17 + R64 = ten2k64[ind - x0]; } res.w[1] = 0; res.w[0] = R64; @@ -2625,18 +2754,18 @@ delta_ge_zero: // 19 <= ind <= 38 P128.w[1] = res.w[1]; P128.w[0] = res.w[0]; - __bid_round128_19_38 (ind, x0, P128, &res, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round128_19_38 (ind, x0, P128, &res, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); if (incr_exp) { - // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 + // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37 if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19 - res.w[0] = __bid_ten2k64[ind - x0]; + res.w[0] = ten2k64[ind - x0]; // res.w[1] stays 0 } else { // 20 <= ind - x0 <= 37 - res.w[0] = __bid_ten2k128[ind - x0 - 20].w[0]; - res.w[1] = __bid_ten2k128[ind - x0 - 20].w[1]; + res.w[0] = ten2k128[ind - x0 - 20].w[0]; + res.w[1] = ten2k128[ind - x0 - 20].w[1]; } } } @@ -2662,7 +2791,7 @@ delta_ge_zero: is_midpoint_gt_even = 0; is_inexact_gt_midpoint = 1; } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && - !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { // if this second rounding was exact the result may still be // inexact because of the first rounding if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { @@ -2672,14 +2801,14 @@ delta_ge_zero: is_inexact_lt_midpoint = 1; } } else if (is_midpoint_gt_even && - (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { + (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { // pulled up to a midpoint is_inexact_lt_midpoint = 1; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else if (is_midpoint_lt_even && - (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { + (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { // pulled down to a midpoint is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; @@ -2697,6 +2826,8 @@ delta_ge_zero: is_inexact_lt_midpoint = 1; } } + } else { + ; // not underflow } // check for inexact result if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || @@ -2723,11 +2854,16 @@ delta_ge_zero: } if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, - is_midpoint_lt_even, is_midpoint_gt_even, - e3, &res, pfpsf); + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e3, &res, pfpsf); } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } else { @@ -2758,9 +2894,9 @@ delta_ge_zero: ind = e3; e3 = e4; e4 = ind; - tmp64 = z_sign; + tmp_sign = z_sign; z_sign = p_sign; - p_sign = tmp64; + p_sign = tmp_sign; } else { // from Cases (2), (3), (4), (5) // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be // scaled up by q4 + delta - q3; this is the same as in Cases (15), @@ -2768,9 +2904,14 @@ delta_ge_zero: delta = -delta; } add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, - rnd_mode, &is_midpoint_lt_even, - &is_midpoint_gt_even, &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint, pfpsf, &res); + rnd_mode, &is_midpoint_lt_even, + &is_midpoint_gt_even, &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, pfpsf, &res); + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } @@ -2787,25 +2928,25 @@ delta_ge_zero: if (q4 <= 38) { P128.w[1] = C4.w[1]; P128.w[0] = C4.w[0]; - __bid_round128_19_38 (q4, x0, P128, &res, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round128_19_38 (q4, x0, P128, &res, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); } else if (q4 <= 57) { // 35 <= q4 <= 57 P192.w[2] = C4.w[2]; P192.w[1] = C4.w[1]; P192.w[0] = C4.w[0]; - __bid_round192_39_57 (q4, x0, P192, &R192, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round192_39_57 (q4, x0, P192, &R192, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); res.w[0] = R192.w[0]; res.w[1] = R192.w[1]; } else { // if (q4 <= 68) - __bid_round256_58_76 (q4, x0, C4, &R256, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round256_58_76 (q4, x0, C4, &R256, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); res.w[0] = R256.w[0]; res.w[1] = R256.w[1]; } @@ -2833,34 +2974,34 @@ delta_ge_zero: is_inexact_gt_midpoint = 1; } else { // if (delta == p34 + 1) if (q3 <= 19) { - if (C3.w[0] < __bid_midpoint64[q3 - 1]) { // C3 < 1/2 ulp - // res = 10^33, unchanged - is_inexact_gt_midpoint = 1; - } else if (C3.w[0] == __bid_midpoint64[q3 - 1]) { // C3 = 1/2 ulp - // res = 10^33, unchanged - is_midpoint_lt_even = 1; - } else { // if (C3.w[0] > __bid_midpoint64[q3-1]), C3 > 1/2 ulp - res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 - res.w[0] = 0x378d8e63ffffffffull; - e4 = e4 - 1; - is_inexact_lt_midpoint = 1; - } + if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp + // res = 10^33, unchanged + is_inexact_gt_midpoint = 1; + } else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp + // res = 10^33, unchanged + is_midpoint_lt_even = 1; + } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp + res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 + res.w[0] = 0x378d8e63ffffffffull; + e4 = e4 - 1; + is_inexact_lt_midpoint = 1; + } } else { // if (20 <= q3 <=34) - if (C3.w[1] < __bid_midpoint128[q3 - 20].w[1] || - (C3.w[1] == __bid_midpoint128[q3 - 20].w[1] && - C3.w[0] < __bid_midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp - // res = 10^33, unchanged - is_inexact_gt_midpoint = 1; - } else if (C3.w[1] == __bid_midpoint128[q3 - 20].w[1] && - C3.w[0] == __bid_midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp - // res = 10^33, unchanged - is_midpoint_lt_even = 1; - } else { // if (C3 > __bid_midpoint128[q3-20]), C3 > 1/2 ulp - res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 - res.w[0] = 0x378d8e63ffffffffull; - e4 = e4 - 1; - is_inexact_lt_midpoint = 1; - } + if (C3.w[1] < midpoint128[q3 - 20].w[1] || + (C3.w[1] == midpoint128[q3 - 20].w[1] && + C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp + // res = 10^33, unchanged + is_inexact_gt_midpoint = 1; + } else if (C3.w[1] == midpoint128[q3 - 20].w[1] && + C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp + // res = 10^33, unchanged + is_midpoint_lt_even = 1; + } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp + res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 + res.w[0] = 0x378d8e63ffffffffull; + e4 = e4 - 1; + is_inexact_lt_midpoint = 1; + } } } } @@ -2911,16 +3052,21 @@ delta_ge_zero: } if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, - is_midpoint_lt_even, is_midpoint_gt_even, - e4, &res, pfpsf); + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e4, &res, pfpsf); } if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || is_midpoint_lt_even || is_midpoint_gt_even) { // set the inexact flag *pfpsf |= INEXACT_EXCEPTION; } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } else if ((q4 <= p34 && p34 <= delta) || // Case (8) @@ -2952,12 +3098,12 @@ delta_ge_zero: ind = e3; e3 = e4; e4 = ind; - tmp64 = z_sign; + tmp_sign = z_sign; z_sign = p_sign; - p_sign = tmp64; - tmp64 = z_exp; + p_sign = tmp_sign; + tmp.ui64 = z_exp; z_exp = p_exp; - p_exp = tmp64; + p_exp = tmp.ui64; goto delta_ge_zero; } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11) @@ -2967,16 +3113,16 @@ delta_ge_zero: // 1 <= x0 <= q3 - 1 <= p34 - 1 x0 = e4 - e3; // or x0 = delta + q3 - q4 if (q3 <= 18) { // 2 <= q3 <= 18 - __bid_round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); // C3.w[1] = 0; C3.w[0] = R64; } else if (q3 <= 38) { - __bid_round128_19_38 (q3, x0, C3, &R128, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round128_19_38 (q3, x0, C3, &R128, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); C3.w[1] = R128.w[1]; C3.w[0] = R128.w[0]; } @@ -2987,7 +3133,7 @@ delta_ge_zero: // 64 x 128 -> 128 P128.w[1] = C3.w[1]; P128.w[0] = C3.w[0]; - __mul_64x128_to_128 (C3, __bid_ten2k64[1], P128); + __mul_64x128_to_128 (C3, ten2k64[1], P128); } e3 = e3 + x0; // this is e4 // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3; @@ -3026,10 +3172,10 @@ delta_ge_zero: if (R256.w[0] == 0x0) { R256.w[1]++; if (R256.w[1] == 0x0) { - R256.w[2]++; - if (R256.w[2] == 0x0) { - R256.w[3]++; - } + R256.w[2]++; + if (R256.w[2] == 0x0) { + R256.w[3]++; + } } } // no check for rounding overflow - R256 was a difference @@ -3039,10 +3185,10 @@ delta_ge_zero: if (R256.w[0] == 0xffffffffffffffffull) { R256.w[1]--; if (R256.w[1] == 0xffffffffffffffffull) { - R256.w[2]--; - if (R256.w[2] == 0xffffffffffffffffull) { - R256.w[3]--; - } + R256.w[2]--; + if (R256.w[2] == 0xffffffffffffffffull) { + R256.w[3]--; + } } } } else { @@ -3053,7 +3199,7 @@ delta_ge_zero: } } // determine the number of decimal digits in R256 - ind = __bid_nr_digits256 (R256); // ind >= p34 + ind = nr_digits256 (R256); // ind >= p34 // if R256 is sum, then ind > p34; if R256 is a difference, then // ind >= p34; this means that we can calculate the result rounded to // the destination precision, with unbounded exponent, starting from R256 @@ -3086,25 +3232,25 @@ delta_ge_zero: if (ind <= 38) { P128.w[1] = R256.w[1]; P128.w[0] = R256.w[0]; - __bid_round128_19_38 (ind, x0, P128, &R128, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round128_19_38 (ind, x0, P128, &R128, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); } else if (ind <= 57) { P192.w[2] = R256.w[2]; P192.w[1] = R256.w[1]; P192.w[0] = R256.w[0]; - __bid_round192_39_57 (ind, x0, P192, &R192, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round192_39_57 (ind, x0, P192, &R192, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); R128.w[1] = R192.w[1]; R128.w[0] = R192.w[0]; } else { // if (ind <= 68) - __bid_round256_58_76 (ind, x0, R256, &R256, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round256_58_76 (ind, x0, R256, &R256, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); R128.w[1] = R256.w[1]; R128.w[0] = R256.w[0]; } @@ -3129,7 +3275,7 @@ delta_ge_zero: // not possible in Cases (2)-(6) or (15)-(17) which may get here // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent if (res.w[1] == 0x0000314dc6448d93ull && - res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1 + res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 res.w[0] = 0x378d8e63ffffffffull; e4--; @@ -3143,7 +3289,7 @@ delta_ge_zero: is_midpoint_gt_even = 0; is_inexact_gt_midpoint = 1; } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && - !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { // if this second rounding was exact the result may still be // inexact because of the first rounding if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { @@ -3153,14 +3299,14 @@ delta_ge_zero: is_inexact_lt_midpoint = 1; } } else if (is_midpoint_gt_even && - (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { + (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { // pulled up to a midpoint is_inexact_lt_midpoint = 1; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else if (is_midpoint_lt_even && - (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { + (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { // pulled down to a midpoint is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; @@ -3183,10 +3329,10 @@ delta_ge_zero: P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1]; P128.w[0] = res.w[0]; rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, - is_midpoint_lt_even, is_midpoint_gt_even, - 0, &P128, pfpsf); + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + 0, &P128, pfpsf); scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 // the number of digits in the significand is p34 = 34 if (e4 + scale < expmin) { @@ -3213,8 +3359,13 @@ delta_ge_zero: res.w[1] = p_sign | 0x7800000000000000ull; res.w[0] = 0x0000000000000000ull; *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) - } // else not overflow or not RN, so continue + } // else not overflow or not RN, so continue // from this point on this is similar to the last part of the computation // for Cases (15), (16), (17) @@ -3249,10 +3400,10 @@ delta_ge_zero: R128.w[1] = res.w[1] & MASK_COEFF; R128.w[0] = res.w[0]; if (ind <= 19) { - if (R128.w[0] < __bid_midpoint64[ind - 1]) { // < 1/2 ulp + if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp lt_half_ulp = 1; is_inexact_lt_midpoint = 1; - } else if (R128.w[0] == __bid_midpoint64[ind - 1]) { // = 1/2 ulp + } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp eq_half_ulp = 1; is_midpoint_gt_even = 1; } else { // > 1/2 ulp @@ -3260,13 +3411,13 @@ delta_ge_zero: is_inexact_gt_midpoint = 1; } } else { // if (ind <= 38) - if (R128.w[1] < __bid_midpoint128[ind - 20].w[1] || - (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && - R128.w[0] < __bid_midpoint128[ind - 20].w[0])) { // < 1/2 ulp + if (R128.w[1] < midpoint128[ind - 20].w[1] || + (R128.w[1] == midpoint128[ind - 20].w[1] && + R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp lt_half_ulp = 1; is_inexact_lt_midpoint = 1; - } else if (R128.w[1] == __bid_midpoint128[ind - 20].w[1] && - R128.w[0] == __bid_midpoint128[ind - 20].w[0]) { // = 1/2 ulp + } else if (R128.w[1] == midpoint128[ind - 20].w[1] && + R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp eq_half_ulp = 1; is_midpoint_gt_even = 1; } else { // > 1/2 ulp @@ -3289,19 +3440,19 @@ delta_ge_zero: // round the ind-digit result to ind - x0 digits if (ind <= 18) { // 2 <= ind <= 18 - __bid_round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); res.w[1] = 0x0; res.w[0] = R64; } else if (ind <= 38) { P128.w[1] = res.w[1] & MASK_COEFF; P128.w[0] = res.w[0]; - __bid_round128_19_38 (ind, x0, P128, &res, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); + round128_19_38 (ind, x0, P128, &res, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); } e4 = e4 + x0; // expmin // we want the exponent to be expmin, so if incr_exp = 1 then @@ -3310,13 +3461,14 @@ delta_ge_zero: // 64 x 128 -> 128 P128.w[1] = res.w[1] & MASK_COEFF; P128.w[0] = res.w[0]; - __mul_64x128_to_128 (res, __bid_ten2k64[1], P128); + __mul_64x128_to_128 (res, ten2k64[1], P128); } res.w[1] = - p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF); + p_sign | ((UINT64) (e4 + 6176) << 49) | (res. + w[1] & MASK_COEFF); // avoid a double rounding error if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && - is_midpoint_lt_even) { // double rounding error upward + is_midpoint_lt_even) { // double rounding error upward // res = res - 1 res.w[0]--; if (res.w[0] == 0xffffffffffffffffull) @@ -3328,7 +3480,7 @@ delta_ge_zero: is_midpoint_lt_even = 0; is_inexact_lt_midpoint = 1; } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && - is_midpoint_gt_even) { // double rounding error downward + is_midpoint_gt_even) { // double rounding error downward // res = res + 1 res.w[0]++; if (res.w[0] == 0) @@ -3336,7 +3488,8 @@ delta_ge_zero: is_midpoint_gt_even = 0; is_inexact_gt_midpoint = 1; } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && - !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + !is_inexact_lt_midpoint + && !is_inexact_gt_midpoint) { // if this second rounding was exact the result may still be // inexact because of the first rounding if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { @@ -3346,14 +3499,16 @@ delta_ge_zero: is_inexact_lt_midpoint = 1; } } else if (is_midpoint_gt_even && - (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { + (is_inexact_gt_midpoint0 + || is_midpoint_lt_even0)) { // pulled up to a midpoint is_inexact_lt_midpoint = 1; is_inexact_gt_midpoint = 0; is_midpoint_lt_even = 0; is_midpoint_gt_even = 0; } else if (is_midpoint_lt_even && - (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { + (is_inexact_lt_midpoint0 + || is_midpoint_gt_even0)) { // pulled down to a midpoint is_inexact_lt_midpoint = 0; is_inexact_gt_midpoint = 1; @@ -3368,10 +3523,10 @@ delta_ge_zero: // apply correction if not rounding to nearest if (rnd_mode != ROUNDING_TO_NEAREST) { rounding_correction (rnd_mode, - is_inexact_lt_midpoint, - is_inexact_gt_midpoint, - is_midpoint_lt_even, is_midpoint_gt_even, - e4, &res, pfpsf); + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e4, &res, pfpsf); } if (is_midpoint_lt_even || is_midpoint_gt_even || is_inexact_lt_midpoint || is_inexact_gt_midpoint) { @@ -3380,7 +3535,11 @@ delta_ge_zero: if (is_tiny) *pfpsf |= UNDERFLOW_EXCEPTION; } - + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15) @@ -3391,18 +3550,916 @@ delta_ge_zero: // unbounded exponent add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, - rnd_mode, &is_midpoint_lt_even, - &is_midpoint_gt_even, &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint, pfpsf, &res); - + rnd_mode, &is_midpoint_lt_even, + &is_midpoint_gt_even, &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, pfpsf, &res); + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } else { ; } - } // end if delta < 0 + } // end if delta < 0 + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); BID_RETURN (res) } + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT128 x = *px, y = *py, z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128_fma (UINT128 x, UINT128 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even, is_midpoint_gt_even, + is_inexact_lt_midpoint, is_inexact_gt_midpoint; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + +#if DECIMAL_CALL_BY_REFERENCE + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, &x, &y, &z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x, y, + z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px, y = *py, z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 x1, y1, z1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, &x1, &y1, &z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x1, y1, + z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px, y = *py; + UINT128 z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 x1, y1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, &x1, &y1, &z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x1, y1, + z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px, z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 x1, z1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, &x1, py, &z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x1, y, + z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 x1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, &x1, py, pz + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x1, y, + z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 y = *py, z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 y1, z1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, px, &y1, &z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x, y1, + z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 y = *py; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 y1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, px, &y1, pz + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x, y1, + z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 z1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, px, py, &z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x, y, + z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + +// Note: bid128qqq_fma is represented by bid128_fma + +// Note: bid64ddd_fma is represented by bid64_fma + +#if DECIMAL_CALL_BY_REFERENCE +void +bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px, y = *py; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT64 res1 = 0xbaddbaddbaddbaddull; + UINT128 x1, y1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64qqq_fma (&res1, &x1, &y1, pz + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res1 = bid64qqq_fma (x1, y1, z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res1); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px, z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT64 res1 = 0xbaddbaddbaddbaddull; + UINT128 x1, z1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64qqq_fma (&res1, &x1, py, &z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res1 = bid64qqq_fma (x1, y, z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res1); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT64 res1 = 0xbaddbaddbaddbaddull; + UINT128 x1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64qqq_fma (&res1, &x1, py, pz + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res1 = bid64qqq_fma (x1, y, z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res1); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 y = *py, z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT64 res1 = 0xbaddbaddbaddbaddull; + UINT128 y1, z1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64qqq_fma (&res1, px, &y1, &z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res1 = bid64qqq_fma (x, y1, z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res1); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 y = *py; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT64 res1 = 0xbaddbaddbaddbaddull; + UINT128 y1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64qqq_fma (&res1, px, &y1, pz + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res1 = bid64qqq_fma (x, y1, z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res1); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT64 res1 = 0xbaddbaddbaddbaddull; + UINT128 z1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64qqq_fma (&res1, px, py, &z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res1 = bid64qqq_fma (x, y, z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res1); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT128 x = *px, y = *py, z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0, + is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + int incr_exp; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT64 res1 = 0xbaddbaddbaddbaddull; + unsigned int save_fpsf; // needed because of the call to bid128_ext_fma + UINT64 sign; + UINT64 exp; + int unbexp; + UINT128 C; + BID_UI64DOUBLE tmp; + int nr_bits; + int q, x0; + int scale; + int lt_half_ulp = 0, eq_half_ulp = 0; + + // Note: for rounding modes other than RN or RA, the result can be obtained + // by rounding first to BID128 and then to BID64 + + save_fpsf = *pfpsf; // sticky bits - caller value must be preserved + *pfpsf = 0; + +#if DECIMAL_CALL_BY_REFERENCE + bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, + &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0, + &res, &x, &y, &z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, + &is_inexact_lt_midpoint0, + &is_inexact_gt_midpoint0, x, y, + z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + + if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) || + (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible + ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN) + ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity +#if DECIMAL_CALL_BY_REFERENCE + bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); +#else + res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); +#endif + // determine the unbiased exponent of the result + unbexp = ((res1 >> 53) & 0x3ff) - 398; + + // if subnormal, res1 must have exp = -398 + // if tiny and inexact set underflow and inexact status flags + if (!((res1 & MASK_NAN) == MASK_NAN) && // res1 not NaN + (unbexp == -398) + && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull) + && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 + || is_midpoint_lt_even0 || is_midpoint_gt_even0)) { + // set the inexact flag and the underflow flag + *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); + } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || + is_midpoint_lt_even0 || is_midpoint_gt_even0) { + // set the inexact flag and the underflow flag + *pfpsf |= INEXACT_EXCEPTION; + } + + *pfpsf |= save_fpsf; + BID_RETURN (res1); + } // else continue, and use rounding to nearest to round to 16 digits + + // at this point the result is rounded to nearest (even or away) to 34 digits + // (or less if exact), and it is zero or finite non-zero canonical [sub]normal + sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative + exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits + unbexp = (exp >> 49) - 6176; + C.w[1] = res.w[HIGH_128W] & MASK_COEFF; + C.w[0] = res.w[LOW_128W]; + + if ((C.w[1] == 0x0 && C.w[0] == 0x0) || // result is zero + (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) { + // clear under/overflow +#if DECIMAL_CALL_BY_REFERENCE + bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); +#else + res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); +#endif + *pfpsf |= save_fpsf; + BID_RETURN (res1); + } // else continue + + // -398 - 34 <= unbexp <= 369 + 15 + if (rnd_mode == ROUNDING_TIES_AWAY) { + // apply correction, if needed, to make the result rounded to nearest-even + if (is_midpoint_gt_even) { + // res = res - 1 + res1--; // res1 is now even + } // else the result is already correctly rounded to nearest-even + } + // at this point the result is finite, non-zero canonical normal or subnormal, + // and in most cases overflow or underflow will not occur + + // determine the number of digits q in the result + // q = nr. of decimal digits in x + // determine first the nr. of bits in x + if (C.w[1] == 0) { + if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53 + // split the 64-bit value in two 32-bit halves to avoid rounding errors + if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32 + tmp.d = (double) (C.w[0] >> 32); // exact conversion + nr_bits = + 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // x < 2^32 + tmp.d = (double) (C.w[0]); // exact conversion + nr_bits = + 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // if x < 2^53 + tmp.d = (double) C.w[0]; // exact conversion + nr_bits = + 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1]) + tmp.d = (double) C.w[1]; // exact conversion + nr_bits = + 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + q = nr_digits[nr_bits - 1].digits; + if (q == 0) { + q = nr_digits[nr_bits - 1].digits1; + if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi || + (C.w[1] == nr_digits[nr_bits - 1].threshold_hi && + C.w[0] >= nr_digits[nr_bits - 1].threshold_lo)) + q++; + } + // if q > 16, round to nearest even to 16 digits (but for underflow it may + // have to be truncated even more) + if (q > 16) { + x0 = q - 16; + if (q <= 18) { + round64_2_18 (q, x0, C.w[0], &res1, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + } else { // 19 <= q <= 34 + round128_19_38 (q, x0, C, &res128, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + res1 = res128.w[0]; // the result fits in 64 bits + } + unbexp = unbexp + x0; + if (incr_exp) + unbexp++; + q = 16; // need to set in case denormalization is necessary + } else { + // the result does not require a second rounding (and it must have + // been exact in the first rounding, since q <= 16) + res1 = C.w[0]; + } + + // avoid a double rounding error + if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && + is_midpoint_lt_even) { // double rounding error upward + // res = res - 1 + res1--; // res1 becomes odd + is_midpoint_lt_even = 0; + is_inexact_lt_midpoint = 1; + if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1 + res1 = 0x002386f26fc0ffffull; // 10^16 - 1 + unbexp--; + } + } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && + is_midpoint_gt_even) { // double rounding error downward + // res = res + 1 + res1++; // res1 becomes odd (so it cannot be 10^16) + is_midpoint_gt_even = 0; + is_inexact_gt_midpoint = 1; + } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + // if this second rounding was exact the result may still be + // inexact because of the first rounding + if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { + is_inexact_gt_midpoint = 1; + } + if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { + is_inexact_lt_midpoint = 1; + } + } else if (is_midpoint_gt_even && + (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { + // pulled up to a midpoint + is_inexact_lt_midpoint = 1; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else if (is_midpoint_lt_even && + (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { + // pulled down to a midpoint + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 1; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else { + ; + } + // this is the result rounded correctly to nearest even, with unbounded exp. + + // check for overflow + if (q + unbexp > P16 + expmax16) { + res1 = sign | 0x7800000000000000ull; + *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); + *pfpsf |= save_fpsf; + BID_RETURN (res1) + } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16 + // not overflow; the result must be exact, and we can multiply res1 by + // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits + scale = unbexp - expmax16; + res1 = res1 * ten2k64[scale]; // res1 * 10^scale + unbexp = expmax16; // unbexp - scale + } else { + ; // continue + } + + // check for underflow + if (q + unbexp < P16 + expmin16) { + if (unbexp < expmin16) { + // we must truncate more of res + x0 = expmin16 - unbexp; // x0 >= 1 + is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; + is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; + is_midpoint_lt_even0 = is_midpoint_lt_even; + is_midpoint_gt_even0 = is_midpoint_gt_even; + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + // the number of decimal digits in res1 is q + if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits + // 2 <= q <= 16, 1 <= x0 <= 15 + round64_2_18 (q, x0, res1, &res1, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + if (incr_exp) { + // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15 + res1 = ten2k64[q - x0]; + } + unbexp = unbexp + x0; // expmin16 + } else if (x0 == q) { + // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin + // determine relationship with 1/2 ulp + // q <= 16 + if (res1 < midpoint64[q - 1]) { // < 1/2 ulp + lt_half_ulp = 1; + is_inexact_lt_midpoint = 1; + } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp + eq_half_ulp = 1; + is_midpoint_gt_even = 1; + } else { // > 1/2 ulp + // gt_half_ulp = 1; + is_inexact_gt_midpoint = 1; + } + if (lt_half_ulp || eq_half_ulp) { + // res = +0.0 * 10^expmin16 + res1 = 0x0000000000000000ull; + } else { // if (gt_half_ulp) + // res = +1 * 10^expmin16 + res1 = 0x0000000000000001ull; + } + unbexp = expmin16; + } else { // if (x0 > q) + // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin + res1 = 0x0000000000000000ull; + unbexp = expmin16; + is_inexact_lt_midpoint = 1; + } + // avoid a double rounding error + if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && + is_midpoint_lt_even) { // double rounding error upward + // res = res - 1 + res1--; // res1 becomes odd + is_midpoint_lt_even = 0; + is_inexact_lt_midpoint = 1; + } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && + is_midpoint_gt_even) { // double rounding error downward + // res = res + 1 + res1++; // res1 becomes odd + is_midpoint_gt_even = 0; + is_inexact_gt_midpoint = 1; + } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + // if this rounding was exact the result may still be + // inexact because of the previous roundings + if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { + is_inexact_gt_midpoint = 1; + } + if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { + is_inexact_lt_midpoint = 1; + } + } else if (is_midpoint_gt_even && + (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { + // pulled up to a midpoint + is_inexact_lt_midpoint = 1; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else if (is_midpoint_lt_even && + (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { + // pulled down to a midpoint + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 1; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else { + ; + } + } + // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16) + // and the result is tiny and exact + + // check for inexact result + if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || + is_midpoint_lt_even || is_midpoint_gt_even || + is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || + is_midpoint_lt_even0 || is_midpoint_gt_even0) { + // set the inexact flag and the underflow flag + *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); + } + } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || + is_midpoint_lt_even || is_midpoint_gt_even) { + *pfpsf |= INEXACT_EXCEPTION; + } + // this is the result rounded correctly to nearest, with bounded exponent + + if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction + // res = res + 1 + res1++; // res1 is now odd + } // else the result is already correct + + // assemble the result + if (res1 < 0x0020000000000000ull) { // res < 2^53 + res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1; + } else { // res1 >= 2^53 + res1 = sign | MASK_STEERING_BITS | + ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2); + } + *pfpsf |= save_fpsf; + BID_RETURN (res1); +} |