summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorAndy Ross <andrew.j.ross@intel.com>2016-08-15 11:31:58 -0700
committerAnas Nashif <nashif@linux.intel.com>2016-09-10 00:49:00 +0000
commitd06eea4eda04f0eb6761a8216a57902d8278a1fb (patch)
tree43a6a5b121fe7776ffd7c1b76183802e838e19f8 /lib
parent073cfddd0f0a35bbb6212465e0d9f3445f596011 (diff)
libc/printf: Use compiler-provided 64 bit math, phase 1
The _to_float() implementation had a somewhat kludgey hand-written 64 bit math implementation, which is unhelpful on Zephyr as all our toolchains provide a working uint64_t runtime. This is at best just dupicated code from libgcc, and at worst less efficient. This patch replaces the existing 64 bit minilibrary but keeps the uint32_t[2] API as is for ease of validation and review. One exception is _ldiv5, a specialized divide-by-five implementation. The 64 bit division routines are large on some architectures (ARM and ARC in particular), not pulled in by a default Zephyr build, and will swamp the benefit from this patch. So this includes a refactored/improved _ldiv5 which leverages libgcc for multiword shifts instead of just using raw division. Note also the "noinline" attribute on _ladd(). This is a workaround for an apparent compiler bug when built with -Og or -Os (hand-hacking the Makefiles to build with -O0 works), perhaps due to my aliasing the int array with a long long. This will go away in phase 2. Change-Id: I63e8c82dabe2bfaa75b63ddb59e5f11d51be538e Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/libc/minimal/source/stdout/prf.c130
1 files changed, 43 insertions, 87 deletions
diff --git a/lib/libc/minimal/source/stdout/prf.c b/lib/libc/minimal/source/stdout/prf.c
index 2e731759a..0d21dd0d3 100644
--- a/lib/libc/minimal/source/stdout/prf.c
+++ b/lib/libc/minimal/source/stdout/prf.c
@@ -139,113 +139,69 @@ static int _to_dec(char *buf, int32_t value, int fplus, int fspace, int precisio
static void _llshift(uint32_t value[])
{
- if (value[0] & 0x80000000)
- value[1] = (value[1] << 1) | 1;
- else
- value[1] <<= 1;
- value[0] <<= 1;
+ *((uint64_t *)&value[0]) <<= 1;
}
-static void _lrshift(uint32_t value[])
+static void __attribute__((noinline)) _ladd(uint32_t *result, uint32_t *value)
{
- if (value[1] & 1)
- value[0] = (value[0] >> 1) | 0x80000000;
- else
- value[0] = (value[0] >> 1) & 0x7FFFFFFF;
- value[1] = (value[1] >> 1) & 0x7FFFFFFF;
+ uint64_t *r = (uint64_t *)&result[0];
+ uint64_t *v = (uint64_t *)&value[0];
+ *r = *r + *v;
}
-static void _ladd(uint32_t result[], uint32_t value[])
+static void _rlrshift(uint32_t value[])
{
- uint32_t carry;
- uint32_t temp;
-
- carry = 0;
- temp = result[0] + value[0];
- if (result[0] & 0x80000000) {
- if ((value[0] & 0x80000000) || ((temp & 0x80000000) == 0))
- carry = 1;
- } else {
- if ((value[0] & 0x80000000) && ((temp & 0x80000000) == 0))
- carry = 1;
- }
- result[0] = temp;
- result[1] = result[1] + value[1] + carry;
+ uint64_t *v = (uint64_t *)&value[0];
+ *v = (*v & 1) + (*v >> 1);
}
-static void _rlrshift(uint32_t value[])
+/* Tiny integer divide-by-five routine. The full 64 bit division
+ * implementations in libgcc are very large on some architectures, and
+ * currently nothing in Zephyr pulls it into the link. So it makes
+ * sense to define this much smaller special case here to avoid
+ * including it just for printf.
+ *
+ * It works by iteratively dividing the most significant 32 bits of
+ * the 64 bit value by 5. This will leave a remainder of 0-4
+ * (i.e. three significant bits), ensuring that the top 29 bits of the
+ * remainder are zero for the next iteration. Thus in the second
+ * iteration only 35 significant bits remain, and in the third only
+ * six. This was tested exhaustively through the first ~10B values in
+ * the input space, and for ~2e12 (4 hours runtime) random inputs
+ * taken from the full 64 bit space.
+ */
+static void _ldiv5(uint32_t value[])
{
- uint32_t temp[2];
-
- temp[0] = value[0] & 1;
- temp[1] = 0;
- _lrshift(value);
- _ladd(value, temp);
-}
+ uint64_t *v = (uint64_t *)&value[0];
+ uint32_t i, hi;
+ uint64_t rem = *v, quot = 0, q;
+ static const char shifts[] = { 32, 3, 0 };
- /*
- * 64 bit divide by 5 function for _to_float.
- * The result is ROUNDED, not TRUNCATED.
- */
+ /* Usage in this file wants rounded behavior, not truncation. So add
+ * two to get the threshold right.
+ */
+ rem += 2;
-static void _ldiv5(uint32_t value[])
-{
- uint32_t result[2];
- register int shift;
- uint32_t temp1[2];
- uint32_t temp2[2];
-
- result[0] = 0; /* Result accumulator */
- result[1] = value[1] / 5;
- temp1[0] = value[0]; /* Dividend for this pass */
- temp1[1] = value[1] % 5;
- temp2[1] = 0;
-
- while (1) {
- for (shift = 0; temp1[1] != 0; shift++)
- _lrshift(temp1);
- temp2[0] = temp1[0] / 5;
- if (temp2[0] == 0) {
- if (temp1[0] % 5 > (5 / 2)) {
- temp1[0] = 1;
- _ladd(result, temp1);
- }
- break;
- }
- temp1[0] = temp2[0];
- while (shift-- != 0)
- _llshift(temp1);
- _ladd(result, temp1); /* Update result accumulator */
- temp1[0] = result[0];
- temp1[1] = result[1];
- _llshift(temp1); /* Compute (current_result*5) */
- _llshift(temp1);
- _ladd(temp1, result);
- temp1[0] = ~temp1[0]; /* Compute -(current_result*5) */
- temp1[1] = ~temp1[1];
- temp2[0] = 1;
- _ladd(temp1, temp2);
- _ladd(temp1, value); /* Compute #-(current_result*5) */
+ for (i = 0; i < 3; i++) {
+ hi = rem >> shifts[i];
+ q = (uint64_t)(hi / 5) << shifts[i];
+ rem -= q * 5;
+ quot += q;
}
- value[0] = result[0];
- value[1] = result[1];
+
+ *v = quot;
}
static char _get_digit(uint32_t fract[], int *digit_count)
{
int rval;
- uint32_t temp[2];
+ uint64_t *fr = (uint64_t *)&fract[0];
if (*digit_count > 0) {
*digit_count -= 1;
- temp[0] = fract[0];
- temp[1] = fract[1];
- _llshift(fract); /* Multiply by 10 */
- _llshift(fract);
- _ladd(fract, temp);
- _llshift(fract);
- rval = ((fract[1] >> 28) & 0xF) + '0';
- fract[1] &= 0x0FFFFFFF;
+ *fr = *fr * 10;
+ rval = ((*fr >> 60) & 0xF) + '0';
+ *fr &= 0x0FFFFFFFFFFFFFFFull;
} else
rval = '0';
return (char) (rval);