diff options
author | Andy Ross <andrew.j.ross@intel.com> | 2016-08-15 11:31:58 -0700 |
---|---|---|
committer | Anas Nashif <nashif@linux.intel.com> | 2016-09-10 00:49:00 +0000 |
commit | d06eea4eda04f0eb6761a8216a57902d8278a1fb (patch) | |
tree | 43a6a5b121fe7776ffd7c1b76183802e838e19f8 /lib | |
parent | 073cfddd0f0a35bbb6212465e0d9f3445f596011 (diff) |
libc/printf: Use compiler-provided 64 bit math, phase 1
The _to_float() implementation had a somewhat kludgey hand-written 64
bit math implementation, which is unhelpful on Zephyr as all our
toolchains provide a working uint64_t runtime. This is at best just
dupicated code from libgcc, and at worst less efficient.
This patch replaces the existing 64 bit minilibrary but keeps the
uint32_t[2] API as is for ease of validation and review.
One exception is _ldiv5, a specialized divide-by-five implementation.
The 64 bit division routines are large on some architectures (ARM and
ARC in particular), not pulled in by a default Zephyr build, and will
swamp the benefit from this patch. So this includes a
refactored/improved _ldiv5 which leverages libgcc for multiword shifts
instead of just using raw division.
Note also the "noinline" attribute on _ladd(). This is a workaround
for an apparent compiler bug when built with -Og or -Os (hand-hacking
the Makefiles to build with -O0 works), perhaps due to my aliasing the
int array with a long long. This will go away in phase 2.
Change-Id: I63e8c82dabe2bfaa75b63ddb59e5f11d51be538e
Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libc/minimal/source/stdout/prf.c | 130 |
1 files changed, 43 insertions, 87 deletions
diff --git a/lib/libc/minimal/source/stdout/prf.c b/lib/libc/minimal/source/stdout/prf.c index 2e731759a..0d21dd0d3 100644 --- a/lib/libc/minimal/source/stdout/prf.c +++ b/lib/libc/minimal/source/stdout/prf.c @@ -139,113 +139,69 @@ static int _to_dec(char *buf, int32_t value, int fplus, int fspace, int precisio static void _llshift(uint32_t value[]) { - if (value[0] & 0x80000000) - value[1] = (value[1] << 1) | 1; - else - value[1] <<= 1; - value[0] <<= 1; + *((uint64_t *)&value[0]) <<= 1; } -static void _lrshift(uint32_t value[]) +static void __attribute__((noinline)) _ladd(uint32_t *result, uint32_t *value) { - if (value[1] & 1) - value[0] = (value[0] >> 1) | 0x80000000; - else - value[0] = (value[0] >> 1) & 0x7FFFFFFF; - value[1] = (value[1] >> 1) & 0x7FFFFFFF; + uint64_t *r = (uint64_t *)&result[0]; + uint64_t *v = (uint64_t *)&value[0]; + *r = *r + *v; } -static void _ladd(uint32_t result[], uint32_t value[]) +static void _rlrshift(uint32_t value[]) { - uint32_t carry; - uint32_t temp; - - carry = 0; - temp = result[0] + value[0]; - if (result[0] & 0x80000000) { - if ((value[0] & 0x80000000) || ((temp & 0x80000000) == 0)) - carry = 1; - } else { - if ((value[0] & 0x80000000) && ((temp & 0x80000000) == 0)) - carry = 1; - } - result[0] = temp; - result[1] = result[1] + value[1] + carry; + uint64_t *v = (uint64_t *)&value[0]; + *v = (*v & 1) + (*v >> 1); } -static void _rlrshift(uint32_t value[]) +/* Tiny integer divide-by-five routine. The full 64 bit division + * implementations in libgcc are very large on some architectures, and + * currently nothing in Zephyr pulls it into the link. So it makes + * sense to define this much smaller special case here to avoid + * including it just for printf. + * + * It works by iteratively dividing the most significant 32 bits of + * the 64 bit value by 5. This will leave a remainder of 0-4 + * (i.e. three significant bits), ensuring that the top 29 bits of the + * remainder are zero for the next iteration. Thus in the second + * iteration only 35 significant bits remain, and in the third only + * six. This was tested exhaustively through the first ~10B values in + * the input space, and for ~2e12 (4 hours runtime) random inputs + * taken from the full 64 bit space. + */ +static void _ldiv5(uint32_t value[]) { - uint32_t temp[2]; - - temp[0] = value[0] & 1; - temp[1] = 0; - _lrshift(value); - _ladd(value, temp); -} + uint64_t *v = (uint64_t *)&value[0]; + uint32_t i, hi; + uint64_t rem = *v, quot = 0, q; + static const char shifts[] = { 32, 3, 0 }; - /* - * 64 bit divide by 5 function for _to_float. - * The result is ROUNDED, not TRUNCATED. - */ + /* Usage in this file wants rounded behavior, not truncation. So add + * two to get the threshold right. + */ + rem += 2; -static void _ldiv5(uint32_t value[]) -{ - uint32_t result[2]; - register int shift; - uint32_t temp1[2]; - uint32_t temp2[2]; - - result[0] = 0; /* Result accumulator */ - result[1] = value[1] / 5; - temp1[0] = value[0]; /* Dividend for this pass */ - temp1[1] = value[1] % 5; - temp2[1] = 0; - - while (1) { - for (shift = 0; temp1[1] != 0; shift++) - _lrshift(temp1); - temp2[0] = temp1[0] / 5; - if (temp2[0] == 0) { - if (temp1[0] % 5 > (5 / 2)) { - temp1[0] = 1; - _ladd(result, temp1); - } - break; - } - temp1[0] = temp2[0]; - while (shift-- != 0) - _llshift(temp1); - _ladd(result, temp1); /* Update result accumulator */ - temp1[0] = result[0]; - temp1[1] = result[1]; - _llshift(temp1); /* Compute (current_result*5) */ - _llshift(temp1); - _ladd(temp1, result); - temp1[0] = ~temp1[0]; /* Compute -(current_result*5) */ - temp1[1] = ~temp1[1]; - temp2[0] = 1; - _ladd(temp1, temp2); - _ladd(temp1, value); /* Compute #-(current_result*5) */ + for (i = 0; i < 3; i++) { + hi = rem >> shifts[i]; + q = (uint64_t)(hi / 5) << shifts[i]; + rem -= q * 5; + quot += q; } - value[0] = result[0]; - value[1] = result[1]; + + *v = quot; } static char _get_digit(uint32_t fract[], int *digit_count) { int rval; - uint32_t temp[2]; + uint64_t *fr = (uint64_t *)&fract[0]; if (*digit_count > 0) { *digit_count -= 1; - temp[0] = fract[0]; - temp[1] = fract[1]; - _llshift(fract); /* Multiply by 10 */ - _llshift(fract); - _ladd(fract, temp); - _llshift(fract); - rval = ((fract[1] >> 28) & 0xF) + '0'; - fract[1] &= 0x0FFFFFFF; + *fr = *fr * 10; + rval = ((*fr >> 60) & 0xF) + '0'; + *fr &= 0x0FFFFFFFFFFFFFFFull; } else rval = '0'; return (char) (rval); |