Merge changes between r23946 and r24305 from /fsf/trunk.

git-svn-id: svn://svn.eglibc.org/trunk@24306 7b3dc134-2b1b-0410-93df-9e9f96275f8d
author: joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> 2013-10-18 21:33:25 +0000
committer: joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> 2013-10-18 21:33:25 +0000
commit: fe2ed5aaa408e1ab996a9fe1595a05634208a79c (patch)
tree: e1027fbc9d8a4a8c33f8149b2b42e8cde89c74f6 /libc/sysdeps/powerpc
parent: 571c782b982d888565e7d06bfc2f3d47582fe829 (diff)
186 files changed, 16561 insertions, 3539 deletions
diff --git a/libc/sysdeps/powerpc/bits/fenv.h b/libc/sysdeps/powerpc/bits/fenv.h
index 07cd3c8e5..122edd3dc 100644
--- a/libc/sysdeps/powerpc/bits/fenv.h
+++ b/libc/sysdeps/powerpc/bits/fenv.h
@@ -19,75 +19,6 @@
 # error "Never use <bits/fenv.h> directly; include <fenv.h> instead."
 #endif
 
-#if defined __NO_FPRS__ && !defined _SOFT_FLOAT /* E500 */
-
-/* Define bits representing the exception.  We use the bit positions of
-   the appropriate bits in the SPEFSCR...  */
-enum
-  {
-    FE_INEXACT =
-#define FE_INEXACT	(1 << (63 - 42))
-      FE_INEXACT,
-    FE_INVALID =
-#define FE_INVALID	(1 << (63 - 43))
-      FE_INVALID,
-    FE_DIVBYZERO =
-#define FE_DIVBYZERO	(1 << (63 - 44))
-      FE_DIVBYZERO,
-    FE_UNDERFLOW =
-#define FE_UNDERFLOW	(1 << (63 - 45))
-      FE_UNDERFLOW,
-    FE_OVERFLOW =
-#define FE_OVERFLOW	(1 << (63 - 46))
-      FE_OVERFLOW
-  };
-
-#define FE_ALL_EXCEPT \
-	(FE_INEXACT | FE_DIVBYZERO | FE_UNDERFLOW | FE_OVERFLOW | FE_INVALID)
-
-/* The E500 support all of the four defined rounding modes.  We use
-   the bit pattern in the SPEFSCR as the values for the appropriate
-   macros.  */
-enum
-  {
-    FE_TONEAREST =
-#define FE_TONEAREST	0
-      FE_TONEAREST,
-    FE_TOWARDZERO =
-#define FE_TOWARDZERO	1
-      FE_TOWARDZERO,
-    FE_UPWARD =
-#define FE_UPWARD	2
-      FE_UPWARD,
-    FE_DOWNWARD =
-#define FE_DOWNWARD	3
-      FE_DOWNWARD
-  };
-
-/* Type representing exception flags.  */
-typedef unsigned int fexcept_t;
-
-typedef double fenv_t;
-
-/* If the default argument is used we use this value.  */
-extern const fenv_t __fe_dfl_env;
-#define FE_DFL_ENV	(&__fe_dfl_env)
-
-#ifdef __USE_GNU
-/* Floating-point environment where all exceptions are enabled.  Note that
-   this is not sufficient to give you SIGFPE.  */
-extern const fenv_t __fe_enabled_env;
-# define FE_ENABLED_ENV	(&__fe_enabled_env)
-
-/* Floating-point environment with all exceptions enabled.  Note that
-   just evaluating this value will set the processor into 'FPU
-   exceptions imprecise recoverable' mode, which may cause a significant
-   performance penalty (but have no other visible effect).  */
-extern const fenv_t *__fe_nomask_env (void);
-# define FE_NOMASK_ENV	(__fe_nomask_env ())
-#endif
-
-#else /* PowerPC 6xx floating-point.  */
 
 /* Define bits representing the exception.  We use the bit positions of
    the appropriate bits in the FPSCR...  */
@@ -244,5 +175,3 @@ extern const fenv_t *__fe_mask_env (void);
 __END_DECLS
 
 #endif
-
-#endif
diff --git a/libc/sysdeps/powerpc/bits/mathinline.h b/libc/sysdeps/powerpc/bits/mathinline.h
index 140fff08e..cef5b29b1 100644
--- a/libc/sysdeps/powerpc/bits/mathinline.h
+++ b/libc/sysdeps/powerpc/bits/mathinline.h
@@ -61,21 +61,28 @@
 __MATH_INLINE int
 __NTH (__signbitf (float __x))
 {
+#if __GNUC_PREREQ (4, 0)
+  return __builtin_signbitf (__x);
+#else
   __extension__ union { float __f; int __i; } __u = { __f: __x };
   return __u.__i < 0;
+#endif
 }
 __MATH_INLINE int
 __NTH (__signbit (double __x))
 {
-  __extension__ union { double __d; int __i[2]; } __u = { __d: __x };
-  return __u.__i[0] < 0;
+#if __GNUC_PREREQ (4, 0)
+  return __builtin_signbit (__x);
+#else
+  __extension__ union { double __d; long long __i; } __u = { __d: __x };
+  return __u.__i < 0;
+#endif
 }
 #  ifdef __LONG_DOUBLE_128__
 __MATH_INLINE int
 __NTH (__signbitl (long double __x))
 {
-  __extension__ union { long double __d; int __i[4]; } __u = { __d: __x };
-  return __u.__i[0] < 0;
+  return __signbit ((double) __x);
 }
 #  endif
 # endif
@@ -92,22 +99,17 @@ __NTH (lrint (double __x))
 {
   union {
     double __d;
-    int __ll[2];
+    long long __ll;
   } __u;
   __asm__ ("fctiw %0,%1" : "=f"(__u.__d) : "f"(__x));
-  return __u.__ll[1];
+  return __u.__ll;
 }
 
 __MATH_INLINE long int lrintf (float __x) __THROW;
 __MATH_INLINE long int
 __NTH (lrintf (float __x))
 {
-  union {
-    double __d;
-    int __ll[2];
-  } __u;
-  __asm__ ("fctiw %0,%1" : "=f"(__u.__d) : "f"(__x));
-  return __u.__ll[1];
+  return lrint ((double) __x);
 }
 # endif
 
diff --git a/libc/sysdeps/powerpc/ffs.c b/libc/sysdeps/powerpc/ffs.c
index e0fee46b3..deba0cdd0 100644
--- a/libc/sysdeps/powerpc/ffs.c
+++ b/libc/sysdeps/powerpc/ffs.c
@@ -35,6 +35,7 @@ __ffs (int x)
   return 32 - cnt;
 }
 weak_alias (__ffs, ffs)
+libc_hidden_def (__ffs)
 libc_hidden_builtin_def (ffs)
 #if ULONG_MAX == UINT_MAX
 #undef ffsl
diff --git a/libc/sysdeps/powerpc/fpu/e_sqrt.c b/libc/sysdeps/powerpc/fpu/e_sqrt.c
index 3efe277f3..2d50fb525 100644
--- a/libc/sysdeps/powerpc/fpu/e_sqrt.c
+++ b/libc/sysdeps/powerpc/fpu/e_sqrt.c
@@ -145,7 +145,7 @@ __slow_ieee754_sqrt (double x)
       feraiseexcept (FE_INVALID_SQRT);
 
       fenv_union_t u = { .fenv = fegetenv_register () };
-      if ((u.l[1] & FE_INVALID) == 0)
+      if ((u.l & FE_INVALID) == 0)
 #endif
 	feraiseexcept (FE_INVALID);
       x = a_nan.value;
diff --git a/libc/sysdeps/powerpc/fpu/e_sqrtf.c b/libc/sysdeps/powerpc/fpu/e_sqrtf.c
index 6e50a3cd7..91d2d37d7 100644
--- a/libc/sysdeps/powerpc/fpu/e_sqrtf.c
+++ b/libc/sysdeps/powerpc/fpu/e_sqrtf.c
@@ -121,7 +121,7 @@ __slow_ieee754_sqrtf (float x)
       feraiseexcept (FE_INVALID_SQRT);
 
       fenv_union_t u = { .fenv = fegetenv_register () };
-      if ((u.l[1] & FE_INVALID) == 0)
+      if ((u.l & FE_INVALID) == 0)
 #endif
 	feraiseexcept (FE_INVALID);
       x = a_nan.value;
diff --git a/libc/sysdeps/powerpc/fpu/fclrexcpt.c b/libc/sysdeps/powerpc/fpu/fclrexcpt.c
index 86575dba6..7f66e21ce 100644
--- a/libc/sysdeps/powerpc/fpu/fclrexcpt.c
+++ b/libc/sysdeps/powerpc/fpu/fclrexcpt.c
@@ -28,8 +28,8 @@ __feclearexcept (int excepts)
   u.fenv = fegetenv_register ();
 
   /* Clear the relevant bits.  */
-  u.l[1] = u.l[1] & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID)
-		      | (excepts & FPSCR_STICKY_BITS));
+  u.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID)
+		| (excepts & FPSCR_STICKY_BITS));
 
   /* Put the new state in effect.  */
   fesetenv_register (u.fenv);
diff --git a/libc/sysdeps/powerpc/fpu/fedisblxcpt.c b/libc/sysdeps/powerpc/fpu/fedisblxcpt.c
index 659566b67..f2c45a60c 100644
--- a/libc/sysdeps/powerpc/fpu/fedisblxcpt.c
+++ b/libc/sysdeps/powerpc/fpu/fedisblxcpt.c
@@ -32,15 +32,15 @@ fedisableexcept (int excepts)
 
   fe.fenv = fegetenv_register ();
   if (excepts & FE_INEXACT)
-    fe.l[1] &= ~(1 << (31 - FPSCR_XE));
+    fe.l &= ~(1 << (31 - FPSCR_XE));
   if (excepts & FE_DIVBYZERO)
-    fe.l[1] &= ~(1 << (31 - FPSCR_ZE));
+    fe.l &= ~(1 << (31 - FPSCR_ZE));
   if (excepts & FE_UNDERFLOW)
-    fe.l[1] &= ~(1 << (31 - FPSCR_UE));
+    fe.l &= ~(1 << (31 - FPSCR_UE));
   if (excepts & FE_OVERFLOW)
-    fe.l[1] &= ~(1 << (31 - FPSCR_OE));
+    fe.l &= ~(1 << (31 - FPSCR_OE));
   if (excepts & FE_INVALID)
-    fe.l[1] &= ~(1 << (31 - FPSCR_VE));
+    fe.l &= ~(1 << (31 - FPSCR_VE));
   fesetenv_register (fe.fenv);
 
   new = __fegetexcept ();
diff --git a/libc/sysdeps/powerpc/fpu/feenablxcpt.c b/libc/sysdeps/powerpc/fpu/feenablxcpt.c
index fc4bfffad..472796d15 100644
--- a/libc/sysdeps/powerpc/fpu/feenablxcpt.c
+++ b/libc/sysdeps/powerpc/fpu/feenablxcpt.c
@@ -32,15 +32,15 @@ feenableexcept (int excepts)
 
   fe.fenv = fegetenv_register ();
   if (excepts & FE_INEXACT)
-    fe.l[1] |= (1 << (31 - FPSCR_XE));
+    fe.l |= (1 << (31 - FPSCR_XE));
   if (excepts & FE_DIVBYZERO)
-    fe.l[1] |= (1 << (31 - FPSCR_ZE));
+    fe.l |= (1 << (31 - FPSCR_ZE));
   if (excepts & FE_UNDERFLOW)
-    fe.l[1] |= (1 << (31 - FPSCR_UE));
+    fe.l |= (1 << (31 - FPSCR_UE));
   if (excepts & FE_OVERFLOW)
-    fe.l[1] |= (1 << (31 - FPSCR_OE));
+    fe.l |= (1 << (31 - FPSCR_OE));
   if (excepts & FE_INVALID)
-    fe.l[1] |= (1 << (31 - FPSCR_VE));
+    fe.l |= (1 << (31 - FPSCR_VE));
   fesetenv_register (fe.fenv);
 
   new = __fegetexcept ();
diff --git a/libc/sysdeps/powerpc/fpu/fegetexcept.c b/libc/sysdeps/powerpc/fpu/fegetexcept.c
index f3d5724e9..23d47a27e 100644
--- a/libc/sysdeps/powerpc/fpu/fegetexcept.c
+++ b/libc/sysdeps/powerpc/fpu/fegetexcept.c
@@ -27,15 +27,15 @@ __fegetexcept (void)
 
   fe.fenv = fegetenv_register ();
 
-  if (fe.l[1] & (1 << (31 - FPSCR_XE)))
+  if (fe.l & (1 << (31 - FPSCR_XE)))
       result |= FE_INEXACT;
-  if (fe.l[1] & (1 << (31 - FPSCR_ZE)))
+  if (fe.l & (1 << (31 - FPSCR_ZE)))
       result |= FE_DIVBYZERO;
-  if (fe.l[1] & (1 << (31 - FPSCR_UE)))
+  if (fe.l & (1 << (31 - FPSCR_UE)))
       result |= FE_UNDERFLOW;
-  if (fe.l[1] & (1 << (31 - FPSCR_OE)))
+  if (fe.l & (1 << (31 - FPSCR_OE)))
       result |= FE_OVERFLOW;
-  if (fe.l[1] & (1 << (31 - FPSCR_VE)))
+  if (fe.l & (1 << (31 - FPSCR_VE)))
       result |= FE_INVALID;
 
   return result;
diff --git a/libc/sysdeps/powerpc/fpu/feholdexcpt.c b/libc/sysdeps/powerpc/fpu/feholdexcpt.c
index 013d2bfbb..0ecf0f7bc 100644
--- a/libc/sysdeps/powerpc/fpu/feholdexcpt.c
+++ b/libc/sysdeps/powerpc/fpu/feholdexcpt.c
@@ -30,13 +30,12 @@ feholdexcept (fenv_t *envp)
 
   /* Clear everything except for the rounding modes and non-IEEE arithmetic
      flag.  */
-  new.l[1] = old.l[1] & 7;
-  new.l[0] = old.l[0];
+  new.l = old.l & 0xffffffff00000007LL;
 
   /* If the old env had any enabled exceptions, then mask SIGFPE in the
      MSR FE0/FE1 bits.  This may allow the FPU to run faster because it
      always takes the default action and can not generate SIGFPE. */
-  if ((old.l[1] & _FPU_MASK_ALL) != 0)
+  if ((old.l & _FPU_MASK_ALL) != 0)
     (void)__fe_mask_env ();
 
   /* Put the new state in effect.  */
diff --git a/libc/sysdeps/powerpc/fpu/fenv_libc.h b/libc/sysdeps/powerpc/fpu/fenv_libc.h
index 191095156..baa2a7d39 100644
--- a/libc/sysdeps/powerpc/fpu/fenv_libc.h
+++ b/libc/sysdeps/powerpc/fpu/fenv_libc.h
@@ -69,7 +69,7 @@ libm_hidden_proto (__fe_nomask_env)
 typedef union
 {
   fenv_t fenv;
-  unsigned int l[2];
+  unsigned long long l;
 } fenv_union_t;
 
 
diff --git a/libc/sysdeps/powerpc/fpu/fesetenv.c b/libc/sysdeps/powerpc/fpu/fesetenv.c
index e92adb4c5..6c00b267a 100644
--- a/libc/sysdeps/powerpc/fpu/fesetenv.c
+++ b/libc/sysdeps/powerpc/fpu/fesetenv.c
@@ -34,14 +34,14 @@ __fesetenv (const fenv_t *envp)
      exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits.  This will put the
      hardware into "precise mode" and may cause the FPU to run slower on some
      hardware.  */
-  if ((old.l[1] & _FPU_MASK_ALL) == 0 && (new.l[1] & _FPU_MASK_ALL) != 0)
+  if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
     (void)__fe_nomask_env ();
 
   /* If the old env had any enabled exceptions and the new env has no enabled
      exceptions, then mask SIGFPE in the MSR FE0/FE1 bits.  This may allow the
      FPU to run faster because it always takes the default action and can not
      generate SIGFPE. */
-  if ((old.l[1] & _FPU_MASK_ALL) != 0 && (new.l[1] & _FPU_MASK_ALL) == 0)
+  if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
     (void)__fe_mask_env ();
 
   fesetenv_register (*envp);
diff --git a/libc/sysdeps/powerpc/fpu/feupdateenv.c b/libc/sysdeps/powerpc/fpu/feupdateenv.c
index 6500ea173..677504416 100644
--- a/libc/sysdeps/powerpc/fpu/feupdateenv.c
+++ b/libc/sysdeps/powerpc/fpu/feupdateenv.c
@@ -34,20 +34,20 @@ __feupdateenv (const fenv_t *envp)
   /* Restore rounding mode and exception enable from *envp and merge
      exceptions.  Leave fraction rounded/inexact and FP result/CC bits
      unchanged.  */
-  new.l[1] = (old.l[1] & 0x1FFFFF00) | (new.l[1] & 0x1FF80FFF);
+  new.l = (old.l & 0xffffffff1fffff00LL) | (new.l & 0x1ff80fff);
 
   /* If the old env has no enabled exceptions and the new env has any enabled
      exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits.  This will put
      the hardware into "precise mode" and may cause the FPU to run slower on
      some hardware.  */
-  if ((old.l[1] & _FPU_MASK_ALL) == 0 && (new.l[1] & _FPU_MASK_ALL) != 0)
+  if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
     (void)__fe_nomask_env ();
 
   /* If the old env had any enabled exceptions and the new env has no enabled
      exceptions, then mask SIGFPE in the MSR FE0/FE1 bits.  This may allow the
      FPU to run faster because it always takes the default action and can not
      generate SIGFPE. */
-  if ((old.l[1] & _FPU_MASK_ALL) != 0 && (new.l[1] & _FPU_MASK_ALL) == 0)
+  if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
     (void)__fe_mask_env ();
 
   /* Atomically enable and raise (if appropriate) exceptions set in `new'. */
diff --git a/libc/sysdeps/powerpc/fpu/fgetexcptflg.c b/libc/sysdeps/powerpc/fpu/fgetexcptflg.c
index f6327ce17..1395bede0 100644
--- a/libc/sysdeps/powerpc/fpu/fgetexcptflg.c
+++ b/libc/sysdeps/powerpc/fpu/fgetexcptflg.c
@@ -27,7 +27,7 @@ __fegetexceptflag (fexcept_t *flagp, int excepts)
   u.fenv = fegetenv_register ();
 
   /* Return (all of) it.  */
-  *flagp = u.l[1] & excepts & FE_ALL_EXCEPT;
+  *flagp = u.l & excepts & FE_ALL_EXCEPT;
 
   /* Success.  */
   return 0;
diff --git a/libc/sysdeps/powerpc/fpu/fraiseexcpt.c b/libc/sysdeps/powerpc/fpu/fraiseexcpt.c
index 9118c1954..6193071bd 100644
--- a/libc/sysdeps/powerpc/fpu/fraiseexcpt.c
+++ b/libc/sysdeps/powerpc/fpu/fraiseexcpt.c
@@ -33,11 +33,11 @@ __feraiseexcept (int excepts)
   u.fenv = fegetenv_register ();
 
   /* Add the exceptions */
-  u.l[1] = (u.l[1]
-	    | (excepts & FPSCR_STICKY_BITS)
-	    /* Turn FE_INVALID into FE_INVALID_SOFTWARE.  */
-	    | (excepts >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
-	       & FE_INVALID_SOFTWARE));
+  u.l = (u.l
+	 | (excepts & FPSCR_STICKY_BITS)
+	 /* Turn FE_INVALID into FE_INVALID_SOFTWARE.  */
+	 | (excepts >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
+	    & FE_INVALID_SOFTWARE));
 
   /* Store the new status word (along with the rest of the environment),
      triggering any appropriate exceptions.  */
@@ -49,7 +49,7 @@ __feraiseexcept (int excepts)
 	 don't have FE_INVALID_SOFTWARE implemented.  Detect this
 	 case and raise FE_INVALID_SNAN instead.  */
       u.fenv = fegetenv_register ();
-      if ((u.l[1] & FE_INVALID) == 0)
+      if ((u.l & FE_INVALID) == 0)
 	set_fpscr_bit (FPSCR_VXSNAN);
     }
 
diff --git a/libc/sysdeps/powerpc/fpu/fsetexcptflg.c b/libc/sysdeps/powerpc/fpu/fsetexcptflg.c
index c050d4022..0d309c8d5 100644
--- a/libc/sysdeps/powerpc/fpu/fsetexcptflg.c
+++ b/libc/sysdeps/powerpc/fpu/fsetexcptflg.c
@@ -31,10 +31,10 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts)
   flag = *flagp & excepts;
 
   /* Replace the exception status */
-  u.l[1] = ((u.l[1] & ~(FPSCR_STICKY_BITS & excepts))
-	    | (flag & FPSCR_STICKY_BITS)
-	    | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
-	       & FE_INVALID_SOFTWARE));
+  u.l = ((u.l & ~(FPSCR_STICKY_BITS & excepts))
+	 | (flag & FPSCR_STICKY_BITS)
+	 | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
+	    & FE_INVALID_SOFTWARE));
 
   /* Store the new status word (along with the rest of the environment).
      This may cause floating-point exceptions if the restored state
diff --git a/libc/sysdeps/powerpc/fpu/ftestexcept.c b/libc/sysdeps/powerpc/fpu/ftestexcept.c
index 0dbc3befb..86eea0fb0 100644
--- a/libc/sysdeps/powerpc/fpu/ftestexcept.c
+++ b/libc/sysdeps/powerpc/fpu/ftestexcept.c
@@ -28,6 +28,6 @@ fetestexcept (int excepts)
 
   /* The FE_INVALID bit is dealt with correctly by the hardware, so we can
      just:  */
-  return u.l[1] & excepts;
+  return u.l & excepts;
 }
 libm_hidden_def (fetestexcept)
diff --git a/libc/sysdeps/powerpc/fpu/libm-test-ulps b/libc/sysdeps/powerpc/fpu/libm-test-ulps
index 6fdace9ee..37b2ca192 100644
--- a/libc/sysdeps/powerpc/fpu/libm-test-ulps
+++ b/libc/sysdeps/powerpc/fpu/libm-test-ulps
@@ -5927,11 +5927,31 @@ double: 1
 idouble: 1
 
 # gamma
+Test "gamma (-0x1p-10)":
+double: 1
+idouble: 1
+Test "gamma (-0x1p-15)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "gamma (-0x1p-20)":
+double: 1
+idouble: 1
+Test "gamma (-0x1p-5)":
+double: 1
+idouble: 1
 Test "gamma (0.7)":
 double: 1
 float: 1
 idouble: 1
 ifloat: 1
+Test "gamma (0x1p-10)":
+float: 1
+ifloat: 1
+Test "gamma (0x1p-30)":
+double: 1
+idouble: 1
 Test "gamma (1.2)":
 double: 1
 float: 2
@@ -6131,9 +6151,9 @@ ildouble: 1
 ldouble: 1
 Test "jn (10, 10.0)":
 double: 2
-float: 1
+float: 2
 idouble: 2
-ifloat: 1
+ifloat: 2
 ildouble: 4
 ldouble: 4
 Test "jn (10, 2.0)":
@@ -6146,6 +6166,14 @@ double: 2
 float: 2
 idouble: 2
 ifloat: 2
+Test "jn (2, 0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "jn (2, 0x1p127)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
 Test "jn (2, 2.4048255576957729)":
 double: 2
 float: 1
@@ -6226,11 +6254,31 @@ ildouble: 7
 ldouble: 7
 
 # lgamma
+Test "lgamma (-0x1p-10)":
+double: 1
+idouble: 1
+Test "lgamma (-0x1p-15)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "lgamma (-0x1p-20)":
+double: 1
+idouble: 1
+Test "lgamma (-0x1p-5)":
+double: 1
+idouble: 1
 Test "lgamma (0.7)":
 double: 1
 float: 1
 idouble: 1
 ifloat: 1
+Test "lgamma (0x1p-10)":
+float: 1
+ifloat: 1
+Test "lgamma (0x1p-30)":
+double: 1
+idouble: 1
 Test "lgamma (1.2)":
 double: 1
 float: 2
@@ -6592,6 +6640,9 @@ float: 1
 ifloat: 1
 ildouble: 2
 ldouble: 2
+Test "tan_towardzero (2)":
+ildouble: 1
+ldouble: 1
 Test "tan_towardzero (3)":
 float: 1
 ifloat: 1
@@ -7334,6 +7385,19 @@ idouble: 3
 ifloat: 1
 ildouble: 1
 ldouble: 1
+Test "yn (2, 0x1.ffff62p+99)":
+double: 1
+idouble: 1
+Test "yn (2, 0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "yn (2, 0x1p127)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
 Test "yn (3, 0.125)":
 double: 1
 idouble: 1
diff --git a/libc/sysdeps/powerpc/fpu/s_float_bitwise.h b/libc/sysdeps/powerpc/fpu/s_float_bitwise.h
index 8e4adca86..c0a4e56be 100644
--- a/libc/sysdeps/powerpc/fpu/s_float_bitwise.h
+++ b/libc/sysdeps/powerpc/fpu/s_float_bitwise.h
@@ -23,18 +23,19 @@
 #include <math_private.h>
 
 /* Returns (int)(num & 0x7FFFFFF0 == value) */
-static inline
-int __float_and_test28 (float num, float value)
+static inline int
+__float_and_test28 (float num, float value)
 {
   float ret;
 #ifdef _ARCH_PWR7
-  vector int mask = (vector int) {
-    0x7ffffffe, 0x00000000, 0x00000000, 0x0000000
-  };
+  union {
+    int i;
+    float f;
+  } mask = { .i = 0x7ffffff0 };
   __asm__ (
-  /* the 'f' constrain is use on mask because we just need
+  /* the 'f' constraint is used on mask because we just need
    * to compare floats, not full vector */
-    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
   );
 #else
   int32_t inum;
@@ -46,16 +47,17 @@ int __float_and_test28 (float num, float value)
 }
 
 /* Returns (int)(num & 0x7FFFFF00 == value) */
-static inline
-int __float_and_test24 (float num, float value)
+static inline int
+__float_and_test24 (float num, float value)
 {
   float ret;
 #ifdef _ARCH_PWR7
-  vector int mask = (vector int) {
-    0x7fffffe0, 0x00000000, 0x00000000, 0x0000000
-  };
+  union {
+    int i;
+    float f;
+  } mask = { .i = 0x7fffff00 };
   __asm__ (
-    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
   );
 #else
   int32_t inum;
@@ -67,16 +69,17 @@ int __float_and_test24 (float num, float value)
 }
 
 /* Returns (float)(num & 0x7F800000) */
-static inline
-float __float_and8 (float num)
+static inline float
+__float_and8 (float num)
 {
   float ret;
 #ifdef _ARCH_PWR7
-  vector int mask = (vector int) {
-    0x7ff00000, 0x00000000, 0x00000000, 0x00000000
-  };
+  union {
+    int i;
+    float f;
+  } mask = { .i = 0x7f800000 };
   __asm__ (
-    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
   );
 #else
   int32_t inum;
@@ -88,17 +91,18 @@ float __float_and8 (float num)
 }
 
 /* Returns ((int32_t)(num & 0x7F800000) >> 23) */
-static inline
-int32_t __float_get_exp (float num)
+static inline int32_t
+__float_get_exp (float num)
 {
   int32_t inum;
 #ifdef _ARCH_PWR7
   float ret;
-  vector int mask = (vector int) {
-    0x7ff00000, 0x00000000, 0x00000000, 0x00000000
-  };
+  union {
+    int i;
+    float f;
+  } mask = { .i = 0x7f800000 };
   __asm__ (
-    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
   );
   GET_FLOAT_WORD(inum, ret);
 #else
diff --git a/libc/sysdeps/powerpc/fpu/s_llround.c b/libc/sysdeps/powerpc/fpu/s_llround.c
index 9a0182653..995d0a724 100644
--- a/libc/sysdeps/powerpc/fpu/s_llround.c
+++ b/libc/sysdeps/powerpc/fpu/s_llround.c
@@ -19,29 +19,28 @@
 #include <math.h>
 #include <math_ldbl_opt.h>
 
-/* I think that what this routine is supposed to do is round a value
-   to the nearest integer, with values exactly on the boundary rounded
-   away from zero.  */
-/* This routine relies on (long long)x, when x is out of range of a long long,
-   clipping to MAX_LLONG or MIN_LLONG.  */
+/* Round to the nearest integer, with values exactly on a 0.5 boundary
+   rounded away from zero, regardless of the current rounding mode.
+   If (long long)x, when x is out of range of a long long, clips at
+   LLONG_MAX or LLONG_MIN, then this implementation also clips.  */
 
 long long int
 __llround (double x)
 {
-  double xrf;
-  long long int xr;
-  xr = (long long int) x;
-  xrf = (double) xr;
+  long long xr = (long long) x;
+  double xrf = (double) xr;
+
   if (x >= 0.0)
-    if (x - xrf >= 0.5 && x - xrf < 1.0 && x+1 > 0)
-      return x+1;
-    else
-      return x;
+    {
+      if (x - xrf >= 0.5)
+	xr += (long long) ((unsigned long long) xr + 1) > 0;
+    }
   else
-    if (xrf - x >= 0.5 && xrf - x < 1.0 && x-1 < 0)
-      return x-1;
-    else
-      return x;
+    {
+      if (xrf - x >= 0.5)
+	xr -= (long long) ((unsigned long long) xr - 1) < 0;
+    }
+  return xr;
 }
 weak_alias (__llround, llround)
 #ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/powerpc/fpu/s_llroundf.c b/libc/sysdeps/powerpc/fpu/s_llroundf.c
index 07d12adbf..0935de662 100644
--- a/libc/sysdeps/powerpc/fpu/s_llroundf.c
+++ b/libc/sysdeps/powerpc/fpu/s_llroundf.c
@@ -18,28 +18,27 @@
 
 #include <math.h>
 
-/* I think that what this routine is supposed to do is round a value
-   to the nearest integer, with values exactly on the boundary rounded
-   away from zero.  */
-/* This routine relies on (long long)x, when x is out of range of a long long,
-   clipping to MAX_LLONG or MIN_LLONG.  */
+/* Round to the nearest integer, with values exactly on a 0.5 boundary
+   rounded away from zero, regardless of the current rounding mode.
+   If (long long)x, when x is out of range of a long long, clips at
+   LLONG_MAX or LLONG_MIN, then this implementation also clips.  */
 
 long long int
 __llroundf (float x)
 {
-  float xrf;
-  long long int xr;
-  xr = (long long int) x;
-  xrf = (float) xr;
+  long long xr = (long long) x;
+  float xrf = (float) xr;
+
   if (x >= 0.0)
-    if (x - xrf >= 0.5 && x - xrf < 1.0 && x+1 > 0)
-      return x+1;
-    else
-      return x;
+    {
+      if (x - xrf >= 0.5)
+	xr += (long long) ((unsigned long long) xr + 1) > 0;
+    }
   else
-    if (xrf - x >= 0.5 && xrf - x < 1.0 && x-1 < 0)
-      return x-1;
-    else
-      return x;
+    {
+      if (xrf - x >= 0.5)
+	xr -= (long long) ((unsigned long long) xr - 1) < 0;
+    }
+  return xr;
 }
 weak_alias (__llroundf, llroundf)
diff --git a/libc/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c b/libc/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c
index feffa6b4f..cc9b320bf 100644
--- a/libc/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c
+++ b/libc/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c
@@ -83,7 +83,7 @@ ElfW(Addr) query_auxv(int type)
   return 0;
 }
 
-typedef unsigned long long di_fpscr_t __attribute__ ((__mode__ (__DI__)));
+typedef unsigned int di_fpscr_t __attribute__ ((__mode__ (__DI__)));
 typedef unsigned int si_fpscr_t __attribute__ ((__mode__ (__SI__)));
 
 #define _FPSCR_RESERVED 0xfffffff8ffffff04ULL
@@ -95,50 +95,51 @@ typedef unsigned int si_fpscr_t __attribute__ ((__mode__ (__SI__)));
 #define _FPSCR_TEST1_RN  0x0000000000000002ULL
 
 /* Macros for accessing the hardware control word on Power6[x].  */
-# define _GET_DI_FPSCR(__fpscr) ({					     \
-   union { double d;							     \
-           di_fpscr_t fpscr; }						     \
-     tmp __attribute__ ((__aligned__(8)));				     \
-   __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0");		     \
-   (__fpscr)=tmp.fpscr;							     \
-   tmp.fpscr; })
-
-/* We make sure to zero fp0 after we use it in order to prevent stale data
+#define _GET_DI_FPSCR(__fpscr)						\
+  ({union { double d; di_fpscr_t fpscr; } u;				\
+    register double fr;							\
+    __asm__ ("mffs %0" : "=f" (fr));					\
+    u.d = fr;								\
+    (__fpscr) = u.fpscr;						\
+    u.fpscr;								\
+  })
+
+/* We make sure to zero fp after we use it in order to prevent stale data
    in an fp register from making a test-case pass erroneously.  */
-# define _SET_DI_FPSCR(__fpscr) {					     \
-  union { double d; di_fpscr_t fpscr; }					     \
-    tmp __attribute__ ((__aligned__(8)));				     \
-  tmp.fpscr = __fpscr;							     \
-  /* Set the entire 64-bit FPSCR.  */					     \
-  __asm__ ("lfd%U0 0,%0; "						     \
-	   ".machine push; "						     \
-	   ".machine \"power6\"; "					     \
-	   "mtfsf 255,0,1,0; "						     \
-	   ".machine pop" : : "m" (tmp.d) : "fr0");			     \
-  tmp.d = 0;								     \
-  __asm__("lfd%U0 0,%0" : : "m" (tmp.d) : "fr0");			     \
-}
-
-# define _GET_SI_FPSCR(__fpscr) ({					     \
-   union { double d;							     \
-           si_fpscr_t cw[2]; }						     \
-     tmp __attribute__ ((__aligned__(8)));				     \
-   __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0");		     \
-   (__fpscr)=tmp.cw[1];							     \
-   tmp.cw[0]; })
-
-/* We make sure to zero fp0 after we use it in order to prevent stale data
+# define _SET_DI_FPSCR(__fpscr)						\
+  { union { double d; di_fpscr_t fpscr; } u;				\
+    register double fr;							\
+    u.fpscr = __fpscr;							\
+    fr = u.d;								\
+    /* Set the entire 64-bit FPSCR.  */					\
+    __asm__ (".machine push; "						\
+	     ".machine \"power6\"; "					\
+	     "mtfsf 255,%0,1,0; "					\
+	     ".machine pop" : : "f" (fr));				\
+    fr = 0.0;								\
+  }
+
+# define _GET_SI_FPSCR(__fpscr)						\
+  ({union { double d; di_fpscr_t fpscr; } u;				\
+    register double fr;							\
+    __asm__ ("mffs %0" : "=f" (fr));					\
+    u.d = fr;								\
+    (__fpscr) = (si_fpscr_t) u.fpscr;					\
+    (si_fpscr_t) u.fpscr;						\
+  })
+
+/* We make sure to zero fp after we use it in order to prevent stale data
    in an fp register from making a test-case pass erroneously.  */
-# define _SET_SI_FPSCR(__fpscr) {					     \
-  union { double d; si_fpscr_t fpscr[2]; }				     \
-    tmp __attribute__ ((__aligned__(8)));				     \
-  /* More-or-less arbitrary; this is a QNaN. */				     \
-  tmp.fpscr[0] = 0xFFF80000;						     \
-  tmp.fpscr[1] = __fpscr;						     \
-  __asm__ ("lfd%U0 0,%0; mtfsf 255,0" : : "m" (tmp.d) : "fr0");		     \
-  tmp.d = 0;								     \
-  __asm__("lfd%U0 0,%0" : : "m" (tmp.d) : "fr0");			     \
-}
+# define _SET_SI_FPSCR(__fpscr)						\
+  { union { double d; di_fpscr_t fpscr; } u;				\
+    register double fr;							\
+    /* More-or-less arbitrary; this is a QNaN. */			\
+    u.fpscr = 0xfff80000ULL << 32;					\
+    u.fpscr |= __fpscr & 0xffffffffULL;					\
+    fr = u.d;								\
+    __asm__ ("mtfsf 255,%0" : : "f" (fr));				\
+    fr = 0.0;								\
+  }
 
 void prime_special_regs(int which)
 {
diff --git a/libc/sysdeps/powerpc/fpu_control.h b/libc/sysdeps/powerpc/fpu_control.h
index d03b8eb80..e82e7913c 100644
--- a/libc/sysdeps/powerpc/fpu_control.h
+++ b/libc/sysdeps/powerpc/fpu_control.h
@@ -28,7 +28,7 @@ typedef unsigned int fpu_control_t;
 # define _FPU_SETCW(cw) (void) (cw)
 extern fpu_control_t __fpu_control;
 
-#elif defined __NO_FPRS__ /* E500 */
+#elif defined __NO_FPRS__ /* e500 */
 
 /* rounding control */
 # define _FPU_RC_NEAREST 0x00   /* RECOMMENDED */
@@ -37,33 +37,28 @@ extern fpu_control_t __fpu_control;
 # define _FPU_RC_ZERO    0x01
 
 /* masking of interrupts */
-#define _FPU_MASK_ZM  0x10 /* zero divide */
-#define _FPU_MASK_OM  0x40 /* overflow */
-#define _FPU_MASK_UM  0x80 /* underflow */
-#define _FPU_MASK_XM  0x40 /* inexact */
-#define _FPU_MASK_IM  0x20 /* invalid operation */
+# define _FPU_MASK_ZM  0x10 /* zero divide */
+# define _FPU_MASK_OM  0x04 /* overflow */
+# define _FPU_MASK_UM  0x08 /* underflow */
+# define _FPU_MASK_XM  0x40 /* inexact */
+# define _FPU_MASK_IM  0x20 /* invalid operation */
 
-#define _FPU_RESERVED 0xff3fff7f /* These bits are reserved are not changed. */
+# define _FPU_RESERVED 0x00c10080 /* These bits are reserved and not changed. */
 
-/* The fdlibm code requires no interrupts for exceptions.  */
-#define _FPU_DEFAULT  0x00000000 /* Default value.  */
-
-/* IEEE:  same as above, but (some) exceptions;
-   we leave the 'inexact' exception off.
- */
-#define _FPU_IEEE     0x000003c0
+/* Correct IEEE semantics require traps to be enabled at the hardware
+   level; the kernel then does the emulation and determines whether
+   generation of signals from those traps was enabled using prctl.  */
+# define _FPU_DEFAULT  0x0000003c /* Default value.  */
+# define _FPU_IEEE     _FPU_DEFAULT
 
 /* Type of the control word.  */
 typedef unsigned int fpu_control_t;
 
 /* Macros for accessing the hardware control word.  */
-#define _FPU_GETCW(__cw) ({ \
-  unsigned int env; \
-  asm volatile ("mfspefscr %0" : "=r" (env)); \
-  (__cw) = env; })
-#define _FPU_SETCW(__cw) ({ \
-  unsigned int env = __cw; \
-  asm volatile ("mtspefscr %0" : : "r" (env)); })
+# define _FPU_GETCW(cw) \
+  __asm__ volatile ("mfspefscr %0" : "=r" (cw))
+# define _FPU_SETCW(cw) \
+  __asm__ volatile ("mtspefscr %0" : : "r" (cw))
 
 /* Default control word set at startup.  */
 extern fpu_control_t __fpu_control;
@@ -96,22 +91,26 @@ extern fpu_control_t __fpu_control;
 # define _FPU_IEEE     0x000000f0
 
 /* Type of the control word.  */
-typedef unsigned int fpu_control_t __attribute__ ((__mode__ (__SI__)));
+typedef unsigned int fpu_control_t;
 
 /* Macros for accessing the hardware control word.  */
-# define _FPU_GETCW(__cw) ( { \
-  union { double d; fpu_control_t cw[2]; } \
-    tmp __attribute__ ((__aligned__(8))); \
-  __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0"); \
-  (__cw)=tmp.cw[1]; \
-  tmp.cw[1]; } )
-# define _FPU_SETCW(__cw) { \
-  union { double d; fpu_control_t cw[2]; } \
-    tmp __attribute__ ((__aligned__(8))); \
-  tmp.cw[0] = 0xFFF80000; /* More-or-less arbitrary; this is a QNaN. */ \
-  tmp.cw[1] = __cw; \
-  __asm__ ("lfd%U0 0,%0; mtfsf 255,0" : : "m" (tmp.d) : "fr0"); \
-}
+# define _FPU_GETCW(cw)						\
+  ({union { double __d; unsigned long long __ll; } __u;		\
+    register double __fr;					\
+    __asm__ ("mffs %0" : "=f" (__fr));				\
+    __u.__d = __fr;						\
+    (cw) = (fpu_control_t) __u.__ll;				\
+    (fpu_control_t) __u.__ll;					\
+  })
+
+# define _FPU_SETCW(cw)						\
+  { union { double __d; unsigned long long __ll; } __u;		\
+    register double __fr;					\
+    __u.__ll = 0xfff80000LL << 32; /* This is a QNaN.  */	\
+    __u.__ll |= (cw) & 0xffffffffLL;				\
+    __fr = __u.__d;						\
+    __asm__ ("mtfsf 255,%0" : : "f" (__fr));			\
+  }
 
 /* Default control word set at startup.  */
 extern fpu_control_t __fpu_control;
diff --git a/libc/sysdeps/powerpc/jmpbuf-offsets.h b/libc/sysdeps/powerpc/jmpbuf-offsets.h
index 64c658a58..f2116bd70 100644
--- a/libc/sysdeps/powerpc/jmpbuf-offsets.h
+++ b/libc/sysdeps/powerpc/jmpbuf-offsets.h
@@ -21,12 +21,10 @@
 #define JB_LR     2  /* The address we will return to */
 #if __WORDSIZE == 64
 # define JB_GPRS   3  /* GPRs 14 through 31 are saved, 18*2 words total.  */
-# define JB_CR     21 /* Condition code registers with the VRSAVE at */
-                       /* offset 172 (low half of the double word.  */
+# define JB_CR     21 /* Shared dword with VRSAVE.  CR word at offset 172.  */
 # define JB_FPRS   22 /* FPRs 14 through 31 are saved, 18*2 words total.  */
 # define JB_SIZE   (64 * 8) /* As per PPC64-VMX ABI.  */
-# define JB_VRSAVE 21 /* VRSAVE shares a double word with the CR at offset */
-                       /* 168 (high half of the double word).  */
+# define JB_VRSAVE 21 /* Shared dword with CR.  VRSAVE word at offset 168.  */
 # define JB_VRS    40 /* VRs 20 through 31 are saved, 12*4 words total.  */
 #else
 # define JB_GPRS   3  /* GPRs 14 through 31 are saved, 18 in total.  */
diff --git a/libc/sysdeps/powerpc/longjmp.c b/libc/sysdeps/powerpc/longjmp.c
index 198c89420..189fc03ab 100644
--- a/libc/sysdeps/powerpc/longjmp.c
+++ b/libc/sysdeps/powerpc/longjmp.c
@@ -55,6 +55,6 @@ weak_alias (__vmx__libc_siglongjmp, __vmxsiglongjmp)
 
 default_symbol_version (__vmx__libc_longjmp, __libc_longjmp, GLIBC_PRIVATE);
 default_symbol_version (__vmx__libc_siglongjmp, __libc_siglongjmp, GLIBC_PRIVATE);
-default_symbol_version (__vmx_longjmp, _longjmp, GLIBC_2.3.4);
-default_symbol_version (__vmxlongjmp, longjmp, GLIBC_2.3.4);
-default_symbol_version (__vmxsiglongjmp, siglongjmp, GLIBC_2.3.4);
+versioned_symbol (libc, __vmx_longjmp, _longjmp, GLIBC_2_3_4);
+versioned_symbol (libc, __vmxlongjmp, longjmp, GLIBC_2_3_4);
+versioned_symbol (libc, __vmxsiglongjmp, siglongjmp, GLIBC_2_3_4);
diff --git a/libc/sysdeps/powerpc/nofpu/Makefile b/libc/sysdeps/powerpc/nofpu/Makefile
new file mode 100644
index 000000000..b9cbf8023
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/Makefile
@@ -0,0 +1,30 @@
+# Makefile fragment for PowerPC with no FPU.
+
+ifeq ($(subdir),soft-fp)
+sysdep_routines += $(gcc-single-routines) $(gcc-double-routines) \
+		   sim-full
+endif
+
+ifeq ($(subdir),math)
+libm-support += fenv_const
+CPPFLAGS += -I../soft-fp/
+# The follow CFLAGS are a work around for GCC Bugzilla Bug 29253
+# "expand_abs wrong default code for floating point"
+# As this is not a regression, a fix is not likely to go into
+# gcc-4.1.1 and may be too late for gcc-4.2.  So we need these flags
+# until the fix in a gcc release and glibc drops support for earlier
+# versions of gcc.
+CFLAGS-e_hypotl.c += -fno-builtin-fabsl
+CFLAGS-e_powl.c += -fno-builtin-fabsl
+CFLAGS-s_ccoshl.c += -fno-builtin-fabsl
+CFLAGS-s_csinhl.c += -fno-builtin-fabsl
+CFLAGS-s_clogl.c += -fno-builtin-fabsl
+CFLAGS-s_clog10l.c += -fno-builtin-fabsl
+CFLAGS-s_csinl.c += -fno-builtin-fabsl
+CFLAGS-s_csqrtl.c += -fno-builtin-fabsl
+CFLAGS-w_acosl.c += -fno-builtin-fabsl
+CFLAGS-w_asinl.c += -fno-builtin-fabsl
+CFLAGS-w_atanhl.c += -fno-builtin-fabsl
+CFLAGS-w_j0l.c += -fno-builtin-fabsl
+CFLAGS-w_j1l.c += -fno-builtin-fabsl
+endif
diff --git a/libc/sysdeps/powerpc/nofpu/Subdirs b/libc/sysdeps/powerpc/nofpu/Subdirs
new file mode 100644
index 000000000..87eadf302
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/Subdirs
@@ -0,0 +1 @@
+soft-fp
diff --git a/libc/sysdeps/powerpc/nofpu/Versions b/libc/sysdeps/powerpc/nofpu/Versions
new file mode 100644
index 000000000..1a29319d5
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/Versions
@@ -0,0 +1,20 @@
+libc {
+  GLIBC_2.3.2 {
+    __sim_exceptions; __sim_disabled_exceptions; __sim_round_mode;
+    __adddf3; __addsf3; __divdf3; __divsf3; __eqdf2; __eqsf2;
+    __extendsfdf2; __fixdfdi; __fixdfsi; __fixsfdi; __fixsfsi;
+    __fixunsdfdi; __fixunsdfsi; __fixunssfdi; __fixunssfsi;
+    __floatdidf; __floatdisf; __floatsidf; __floatsisf;
+    __gedf2; __gesf2; __ledf2; __lesf2; __muldf3; __mulsf3;
+    __negdf2; __negsf2; __sqrtdf2; __sqrtsf2; __subdf3;
+    __subsf3; __truncdfsf2;
+  }
+  GLIBC_2.4 {
+    __floatundidf; __floatundisf;
+    __floatunsidf; __floatunsisf;
+    __unorddf2; __unordsf2;
+    __nedf2; __nesf2;
+    __gtdf2; __gtsf2;
+    __ltdf2; __ltsf2;
+  }
+}
diff --git a/libc/sysdeps/powerpc/nofpu/fclrexcpt.c b/libc/sysdeps/powerpc/nofpu/fclrexcpt.c
new file mode 100644
index 000000000..fabda0ab9
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fclrexcpt.c
@@ -0,0 +1,37 @@
+/* Clear floating-point exceptions (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+__feclearexcept (int x)
+{
+  __sim_exceptions &= ~x;
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feclearexcept, __old_feclearexcept)
+compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feclearexcept, feclearexcept)
+versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fedisblxcpt.c b/libc/sysdeps/powerpc/nofpu/fedisblxcpt.c
new file mode 100644
index 000000000..e06c8f767
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fedisblxcpt.c
@@ -0,0 +1,32 @@
+/* Disable exceptions (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+#include <fenv.h>
+
+int
+fedisableexcept (int x)
+{
+  int old_exceptions = ~__sim_disabled_exceptions & FE_ALL_EXCEPT;
+
+  __sim_disabled_exceptions |= x;
+
+  return old_exceptions;
+}
diff --git a/libc/sysdeps/powerpc/nofpu/feenablxcpt.c b/libc/sysdeps/powerpc/nofpu/feenablxcpt.c
new file mode 100644
index 000000000..93249abf6
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/feenablxcpt.c
@@ -0,0 +1,32 @@
+/* Enable exceptions (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+
+extern int __sim_disabled_exceptions;
+
+int
+feenableexcept (int exceptions)
+{
+  int old_exceptions = ~__sim_disabled_exceptions & FE_ALL_EXCEPT;
+
+  __sim_disabled_exceptions &= ~exceptions;
+
+  return old_exceptions;
+}
diff --git a/libc/sysdeps/powerpc/nofpu/fegetenv.c b/libc/sysdeps/powerpc/nofpu/fegetenv.c
new file mode 100644
index 000000000..51bcef30a
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fegetenv.c
@@ -0,0 +1,48 @@
+/* Store current floating-point environment (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002, 2010.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+extern int __sim_exceptions;
+extern int __sim_disabled_exceptions;
+extern int __sim_round_mode;
+
+int
+__fegetenv (fenv_t *envp)
+{
+  fenv_union_t u;
+
+  u.l[0] = __sim_exceptions;
+  u.l[0] |= __sim_round_mode;
+  u.l[1] = __sim_disabled_exceptions;
+
+  *envp = u.fenv;
+
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fegetenv, __old_fegetenv)
+compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__fegetenv, fegetenv)
+versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fegetexcept.c b/libc/sysdeps/powerpc/nofpu/fegetexcept.c
new file mode 100644
index 000000000..ea39a82b7
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fegetexcept.c
@@ -0,0 +1,27 @@
+/* Get floating-point exceptions (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+fegetexcept (void)
+{
+  return (__sim_disabled_exceptions ^ FE_ALL_EXCEPT) & FE_ALL_EXCEPT;
+}
diff --git a/libc/sysdeps/powerpc/nofpu/fegetround.c b/libc/sysdeps/powerpc/nofpu/fegetround.c
new file mode 100644
index 000000000..c232ae379
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fegetround.c
@@ -0,0 +1,28 @@
+/* Return current rounding mode (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+#undef fegetround
+int
+fegetround (void)
+{
+  return __sim_round_mode;
+}
diff --git a/libc/sysdeps/powerpc/nofpu/feholdexcpt.c b/libc/sysdeps/powerpc/nofpu/feholdexcpt.c
new file mode 100644
index 000000000..ba6a53acc
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/feholdexcpt.c
@@ -0,0 +1,43 @@
+/* Store current floating-point environment and clear exceptions
+   (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+feholdexcept (fenv_t *envp)
+{
+  fenv_union_t u;
+
+  /* Get the current state.  */
+  __fegetenv (envp);
+
+  u.fenv = *envp;
+  /* Clear everything except the rounding mode.  */
+  u.l[0] &= 0x3;
+  /* Disable exceptions */
+  u.l[1] = FE_ALL_EXCEPT;
+
+  /* Put the new state in effect.  */
+  fesetenv (&u.fenv);
+
+  return 0;
+}
+libm_hidden_def (feholdexcept)
diff --git a/libc/sysdeps/powerpc/nofpu/fenv_const.c b/libc/sysdeps/powerpc/nofpu/fenv_const.c
new file mode 100644
index 000000000..291b1accc
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fenv_const.c
@@ -0,0 +1,34 @@
+/* Constants for fenv_bits.h (soft float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* We want to specify the bit pattern of the __fe_*_env constants, so
+   pretend they're really `long long' instead of `double'.  */
+
+/* If the default argument is used we use this value.  Disable all
+   signalling exceptions as default.  */
+const unsigned long long __fe_dfl_env __attribute__ ((aligned (8))) =
+0x000000003e000000ULL;
+
+/* Floating-point environment where none of the exceptions are masked.  */
+const unsigned long long __fe_enabled_env __attribute__ ((aligned (8))) =
+0xfff80000000000f8ULL;
+
+/* Floating-point environment with the NI bit set.  */
+const unsigned long long __fe_nonieee_env __attribute__ ((aligned (8))) =
+0xfff8000000000004ULL;
diff --git a/libc/sysdeps/powerpc/nofpu/fenv_libc.h b/libc/sysdeps/powerpc/nofpu/fenv_libc.h
new file mode 100644
index 000000000..14a2d04a2
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fenv_libc.h
@@ -0,0 +1,28 @@
+/* Internal libc stuff for floating point environment routines.
+   Copyright (C) 2007-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _FENV_LIBC_H
+#define _FENV_LIBC_H	1
+
+/* fenv_libc.h is used in libm implementations of ldbl-128ibm.  So we
+   need this version in the soft-fp to at minimum include fenv.h to
+   get the fegetround definition.  */
+
+#include <fenv.h>
+
+#endif /* fenv_libc.h */
diff --git a/libc/sysdeps/powerpc/nofpu/fesetenv.c b/libc/sysdeps/powerpc/nofpu/fesetenv.c
new file mode 100644
index 000000000..3f35909b6
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fesetenv.c
@@ -0,0 +1,42 @@
+/* Set floating point environment (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+__fesetenv (const fenv_t *envp)
+{
+  fenv_union_t u;
+
+  u.fenv = *envp;
+  __sim_exceptions = u.l[0] & FE_ALL_EXCEPT;
+  __sim_round_mode = u.l[0] & 0x3;
+  __sim_disabled_exceptions = u.l[1];
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fesetenv, __old_fesetenv)
+compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__fesetenv, fesetenv)
+versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fesetround.c b/libc/sysdeps/powerpc/nofpu/fesetround.c
new file mode 100644
index 000000000..028c1300c
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fesetround.c
@@ -0,0 +1,33 @@
+/* Set rounding mode (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+fesetround (int round)
+{
+  if ((unsigned int) round > FE_DOWNWARD)
+    return 1;
+
+  __sim_round_mode = round;
+
+  return 0;
+}
+libm_hidden_def (fesetround)
diff --git a/libc/sysdeps/powerpc/nofpu/feupdateenv.c b/libc/sysdeps/powerpc/nofpu/feupdateenv.c
new file mode 100644
index 000000000..163f67310
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/feupdateenv.c
@@ -0,0 +1,51 @@
+/* Install given floating-point environment and raise exceptions
+   (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+#include <signal.h>
+
+int
+__feupdateenv (const fenv_t *envp)
+{
+  int saved_exceptions;
+
+  /* Save currently set exceptions.  */
+  saved_exceptions = __sim_exceptions;
+
+  /* Set environment.  */
+  fesetenv (envp);
+
+  /* Raise old exceptions.  */
+  __sim_exceptions |= saved_exceptions;
+  if (saved_exceptions & ~__sim_disabled_exceptions)
+    raise (SIGFPE);
+
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feupdateenv, __old_feupdateenv)
+compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feupdateenv, feupdateenv)
+versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fgetexcptflg.c b/libc/sysdeps/powerpc/nofpu/fgetexcptflg.c
new file mode 100644
index 000000000..2373fa400
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fgetexcptflg.c
@@ -0,0 +1,37 @@
+/* Store current representation for exceptions (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+__fegetexceptflag (fexcept_t *flagp, int excepts)
+{
+  *flagp = (fexcept_t) __sim_exceptions  & excepts & FE_ALL_EXCEPT;
+
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fegetexceptflag, __old_fegetexceptflag)
+compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fraiseexcpt.c b/libc/sysdeps/powerpc/nofpu/fraiseexcpt.c
new file mode 100644
index 000000000..cd142b60b
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fraiseexcpt.c
@@ -0,0 +1,41 @@
+/* Raise given exceptions (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+#include <signal.h>
+
+#undef feraiseexcept
+int
+__feraiseexcept (int x)
+{
+  __sim_exceptions |= x;
+  if (x & ~__sim_disabled_exceptions)
+    raise (SIGFPE);
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feraiseexcept, __old_feraiseexcept)
+compat_symbol (libm, __old_feraiseexcept, feraiseexcept, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feraiseexcept, feraiseexcept)
+versioned_symbol (libm, __feraiseexcept, feraiseexcept, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fsetexcptflg.c b/libc/sysdeps/powerpc/nofpu/fsetexcptflg.c
new file mode 100644
index 000000000..3dc368fdd
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fsetexcptflg.c
@@ -0,0 +1,38 @@
+/* Set floating-point environment exception handling (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+__fesetexceptflag(const fexcept_t *flagp, int excepts)
+{
+  /* Ignore exceptions not listed in 'excepts'.  */
+  __sim_exceptions = (__sim_exceptions & ~excepts) | (*flagp & excepts);
+
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fesetexceptflag, __old_fesetexceptflag)
+compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/ftestexcept.c b/libc/sysdeps/powerpc/nofpu/ftestexcept.c
new file mode 100644
index 000000000..f5d01e881
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/ftestexcept.c
@@ -0,0 +1,28 @@
+/* Test floating-point exceptions (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+fetestexcept (int x)
+{
+  return __sim_exceptions & x;
+}
+libm_hidden_def (fetestexcept)
diff --git a/libc/sysdeps/powerpc/nofpu/get-rounding-mode.h b/libc/sysdeps/powerpc/nofpu/get-rounding-mode.h
new file mode 100644
index 000000000..20eb81030
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/get-rounding-mode.h
@@ -0,0 +1,35 @@
+/* Determine floating-point rounding mode within libc.  PowerPC
+   soft-float version.
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _POWERPC_NOFPU_GET_ROUNDING_MODE_H
+#define _POWERPC_NOFPU_GET_ROUNDING_MODE_H	1
+
+#include <fenv.h>
+
+#include "soft-supp.h"
+
+/* Return the floating-point rounding mode.  */
+
+static inline int
+get_rounding_mode (void)
+{
+  return __sim_round_mode;
+}
+
+#endif /* get-rounding-mode.h */
diff --git a/libc/sysdeps/powerpc/nofpu/libm-test-ulps b/libc/sysdeps/powerpc/nofpu/libm-test-ulps
new file mode 100644
index 000000000..ad5a9cd42
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/libm-test-ulps
@@ -0,0 +1,7297 @@
+# Begin of automatic generation
+
+# acos
+Test "acos (-0x0.ffffffff8p0)":
+ildouble: 1
+ldouble: 1
+Test "acos (-0x0.ffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "acos (2e-17)":
+ildouble: 1
+ldouble: 1
+
+# acos_downward
+Test "acos_downward (-0)":
+float: 1
+ifloat: 1
+Test "acos_downward (-0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "acos_downward (-1)":
+float: 1
+ifloat: 1
+Test "acos_downward (0)":
+float: 1
+ifloat: 1
+Test "acos_downward (0.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# acos_towardzero
+Test "acos_towardzero (-0)":
+float: 1
+ifloat: 1
+Test "acos_towardzero (-0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "acos_towardzero (-1)":
+float: 1
+ifloat: 1
+Test "acos_towardzero (0)":
+float: 1
+ifloat: 1
+Test "acos_towardzero (0.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# acos_upward
+Test "acos_upward (-0)":
+ildouble: 2
+ldouble: 2
+Test "acos_upward (-1)":
+ildouble: 2
+ldouble: 2
+Test "acos_upward (0)":
+ildouble: 2
+ldouble: 2
+
+# asin
+Test "asin (-0x0.ffffffff8p0)":
+ildouble: 1
+ldouble: 1
+Test "asin (-0x0.ffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "asin (0.75)":
+ildouble: 2
+ldouble: 2
+Test "asin (0x0.ffffffff8p0)":
+ildouble: 1
+ldouble: 1
+Test "asin (0x0.ffffffp0)":
+ildouble: 1
+ldouble: 1
+
+# asin_downward
+Test "asin_downward (-0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "asin_downward (-1.0)":
+ildouble: 1
+ldouble: 1
+Test "asin_downward (0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "asin_downward (1.0)":
+float: 1
+ifloat: 1
+
+# asin_towardzero
+Test "asin_towardzero (-0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "asin_towardzero (-1.0)":
+float: 1
+ifloat: 1
+Test "asin_towardzero (0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "asin_towardzero (1.0)":
+float: 1
+ifloat: 1
+
+# asin_upward
+Test "asin_upward (-1.0)":
+float: 1
+ifloat: 1
+Test "asin_upward (1.0)":
+ildouble: 1
+ldouble: 1
+
+# atan2
+Test "atan2 (-0.00756827042671106339, -.001792735857538728036)":
+ildouble: 1
+ldouble: 1
+Test "atan2 (-0.75, -1.0)":
+float: 1
+ifloat: 1
+Test "atan2 (-inf, -inf)":
+ildouble: 1
+ldouble: 1
+Test "atan2 (-max_value, -min_value)":
+float: 1
+ifloat: 1
+Test "atan2 (0.75, -1.0)":
+float: 1
+ifloat: 1
+Test "atan2 (1.390625, 0.9296875)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "atan2 (inf, -inf)":
+ildouble: 1
+ldouble: 1
+
+# atanh
+Test "atanh (0.75)":
+float: 1
+ifloat: 1
+
+# cabs
+Test "cabs (0.75 + 1.25 i)":
+ildouble: 1
+ldouble: 1
+
+# cacos
+Test "Imaginary part of: cacos (+0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (+0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (+0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (+0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (+0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (+0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.25 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.25 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 + +0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 + 0x1p-112 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 - 0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 - 0x1p-112 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x0.fffffffffffff8p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x0.fffffffffffff8p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x0.ffffffffffffffffp0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x0.ffffffffffffffffp0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x0.ffffffp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x0.ffffffp0 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacos (-0x0.ffffffp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x0.ffffffp0 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacos (-0x1.0000000000000002p0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1.0000000000000002p0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1.000002p0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: cacos (-0x1.000002p0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacos (-0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x1.fp-100 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-100 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 0x0.ffffffp0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 1.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 0x0.ffffffp0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 1.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x1.fp-30 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-30 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-105 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1p-105 + 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-105 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1p-105 - 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 + 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 - 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1p-23 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1p-23 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 + 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 - 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 + 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 - 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-1.0 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-1.0 + 0x1.fp-100 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-1.0 + 0x1p50 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-1.0 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-1.0 - 0x1.fp-100 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-1.0 - 0x1p50 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-inf + inf i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-inf - inf i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0.5 + +0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0.5 + 0x1p-63 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0.5 - 0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0.5 - 0x1p-63 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.fffffffffffff8p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0x0.fffffffffffff8p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0x0.ffffffffffffffffffffffffffcp0 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffffffffffffffffffffffcp0 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffp0 + 0.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffp0 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacos (0x0.ffffffp0 - 0.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffp0 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacos (0x1.0000000000001p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1.0000000000001p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1.000002p0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacos (0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: cacos (0x1.000002p0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacos (0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: cacos (0x1.fp-10 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0x1.fp-10 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0x1.fp-100 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (0x1.fp-100 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1p-105 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1p-105 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (0x1p-52 + 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1p-52 + 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (0x1p-52 - 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1p-52 - 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (1.0 + 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (1.0 + 0x1.fp-10 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (1.0 + 0x1.fp-100 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: cacos (1.0 - 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (1.0 - 0x1.fp-10 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (1.0 - 0x1.fp-100 i)":
+ildouble: 2
+ldouble: 2
+
+# cacosh
+Test "Real part of: cacosh (+0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (+0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (+0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (+0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (+0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (+0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (-0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (-0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.25 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (-0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.25 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0.5 + +0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1p-112 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0.5 - 0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1p-112 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0x0.fffffffffffff8p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0x0.fffffffffffff8p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x0.ffffffffffffffffp0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x0.ffffffffffffffffp0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x0.ffffffp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x0.ffffffp0 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacosh (-0x0.ffffffp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x0.ffffffp0 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacosh (-0x1.0000000000000002p0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1.0000000000000002p0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacosh (-0x1.000002p0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacosh (-0x1.000002p0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-100 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-100 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 0x0.ffffffp0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 1.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 0x0.ffffffp0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 1.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-30 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-30 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-105 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-105 + 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-105 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-105 - 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 + 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 - 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-23 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-23 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-52 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-52 + 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-52 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-52 - 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 + 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 - 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-1.0 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-1.0 + 0x1.fp-100 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-1.0 + 0x1p50 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-1.0 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-1.0 - 0x1.fp-100 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-1.0 - 0x1p50 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-inf + inf i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-inf - inf i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0.5 + +0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1p-63 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0.5 - 0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1p-63 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0x0.fffffffffffff8p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0x0.fffffffffffff8p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffffffffffffffffffffffcp0 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffffffffffffffffffffffcp0 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffp0 + 0.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffp0 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacosh (0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffp0 - 0.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffp0 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacosh (0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x1.0000000000001p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x1.0000000000001p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacosh (0x1.000002p0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacosh (0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacosh (0x1.000002p0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacosh (0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0x1.fp-10 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0x1.fp-10 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1.fp-100 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1.fp-100 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1p-105 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1p-105 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1p-52 + 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x1p-52 + 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1p-52 - 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x1p-52 - 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (1.0 + 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (1.0 + 0x1.fp-10 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (1.0 + 0x1.fp-100 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacosh (1.0 - 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (1.0 - 0x1.fp-10 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (1.0 - 0x1.fp-100 i)":
+ildouble: 2
+ldouble: 2
+
+# carg
+Test "carg (-inf + inf i)":
+ildouble: 1
+ldouble: 1
+Test "carg (-inf - inf i)":
+ildouble: 1
+ldouble: 1
+
+# casin
+Test "Imaginary part of: casin (+0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (+0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (+0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (+0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (+0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (+0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffffffffffffcp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffffffffffffcp0 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffffffffffffcp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffffffffffffcp0 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffp0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffp0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffp0 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffp0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: casin (-0x0.ffffffp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffp0 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffp0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casin (-0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (-0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casin (-0x1.fp-10 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0x1.fp-10 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-105 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x1p-105 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x1p-106 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x1p-106 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-23 + 0.5 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-23 + 0x0.ffffffp0 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (-0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0x1p-23 + 0x1.000002p0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x1p-23 - 0.5 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-23 - 0x0.ffffffp0 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (-0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0x1p-23 - 0x1.000002p0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-63 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-63 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-1.0 + 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-1.0 + 0x1.fp-10 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-1.0 - 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-1.0 - 0x1.fp-10 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-2 - 3 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0.75 + 1.25 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffffffffffffcp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffffffffffffcp0 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffffffffffffcp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffffffffffffcp0 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffp0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffp0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffp0 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffp0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: casin (0x0.ffffffp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffp0 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffp0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casin (0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casin (0x1.fp-10 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0x1.fp-10 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x1p-105 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x1p-105 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x1p-106 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x1p-106 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (0x1p-23 + 0.5 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (0x1p-23 + 0x0.ffffffp0 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0x1p-23 + 0x1.000002p0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x1p-23 - 0.5 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (0x1p-23 - 0x0.ffffffp0 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0x1p-23 - 0x1.000002p0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (0x1p-63 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (0x1p-63 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (1.0 + 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (1.0 + 0x1.fp-10 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (1.0 - 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (1.0 - 0x1.fp-10 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# casinh
+Test "Imaginary part of: casinh (-0.0 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0.0 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0.0 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0.0 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0.25 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-0.25 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0.5 + +0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 + 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0.5 + 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 + 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 + 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 + 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0.5 - 0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 - 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0.5 - 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 - 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 - 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 - 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x0.ffffffffffffffffffffffffffcp0 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x0.ffffffffffffffffffffffffffcp0 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x0.ffffffp0 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-0x0.ffffffp0 + 0x1p-23 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (-0x0.ffffffp0 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-0x0.ffffffp0 - 0x1p-23 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (-0x1.000000000000000000000000008p0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.000000000000000000000000008p0 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.000000000000000000000000008p0 + 0x1p-106 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.000000000000000000000000008p0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.000000000000000000000000008p0 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.000000000000000000000000008p0 - 0x1p-106 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.000002p0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.000002p0 + 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-0x1.000002p0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.000002p0 - 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-0x1.fp-10 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-10 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.fp-10 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-10 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-1025 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-1025 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.fp-129 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-129 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.fp-129 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-129 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-105 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-105 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-112 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-112 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-23 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-23 + 0x0.ffffffp0 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casinh (-0x1p-23 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-23 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (-0x1p-23 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-23 - 0x0.ffffffp0 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casinh (-0x1p-23 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-23 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (-0x1p-52 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1p-52 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-52 + 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-52 + 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-52 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1p-52 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-52 - 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-52 - 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1p-63 + 0x0.ffffffffffffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1p-63 - 0x0.ffffffffffffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-1.0 + +0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 + 0.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-1.0 + 0.5 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-100 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-129 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-30 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 - 0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 - 0.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-1.0 - 0.5 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-100 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-129 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-30 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.5 + +0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.5 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.5 - 0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.5 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0.0 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0.0 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0.0 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0.0 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0.25 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0.25 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0.5 + +0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 + 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.5 + 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 + 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 + 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 + 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.5 - 0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 - 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.5 - 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 - 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 - 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 - 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.75 + 1.25 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (0x0.ffffffffffffffffffffffffffcp0 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x0.ffffffffffffffffffffffffffcp0 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x0.ffffffp0 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0x0.ffffffp0 + 0x1p-23 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (0x0.ffffffp0 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0x0.ffffffp0 - 0x1p-23 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (0x1.000000000000000000000000008p0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.000000000000000000000000008p0 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.000000000000000000000000008p0 + 0x1p-106 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.000000000000000000000000008p0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.000000000000000000000000008p0 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.000000000000000000000000008p0 - 0x1p-106 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.000002p0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.000002p0 + 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (0x1.000002p0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.000002p0 - 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (0x1.fp-10 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.fp-10 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.fp-10 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.fp-10 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.fp-1025 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.fp-1025 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.fp-129 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0x1.fp-129 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.fp-129 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0x1.fp-129 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-105 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-105 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-112 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-112 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-23 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-23 + 0x0.ffffffp0 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casinh (0x1p-23 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-23 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (0x1p-23 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-23 - 0x0.ffffffp0 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casinh (0x1p-23 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-23 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (0x1p-52 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1p-52 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-52 + 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-52 + 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-52 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1p-52 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-52 - 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-52 - 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1p-63 + 0x0.ffffffffffffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1p-63 - 0x0.ffffffffffffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (1.0 + +0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 + 0.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (1.0 + 0.5 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-100 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-129 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-30 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 - 0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 - 0.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (1.0 - 0.5 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-100 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-129 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-30 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.5 + +0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.5 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.5 - 0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.5 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+
+# catan
+Test "Imaginary part of: catan (-0x0.fffffffffffff8p0 + 0x1p-27 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (-0x0.fffffffffffff8p0 - 0x1p-27 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x0.ffffffp0 + 0x1p-13 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: catan (-0x1.000000000000000000000000008p0 + 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catan (-0x1.000000000000000000000000008p0 - 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x1.0000000000001p0 - 0x1p-27 i)":
+double: 1
+idouble: 1
+Test "Real part of: catan (-0x1.000002p0 + 0x1p-126 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1.000002p0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1.000002p0 - 0x1p-126 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1.000002p0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1.000002p0 - 0x1p-13 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (-0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x1.fp1023 - 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x1.fp127 - 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x1p-1020 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (-0x1p-1020 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: catan (-0x1p-13 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1p-13 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1p-13 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1p-54 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1p-54 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1p-57 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1p-57 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-1.0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-1.0 + 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catan (-1.0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-1.0 - 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catan (-2 - 3 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-2 - 3 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x0.fffffffffffff8p0 + 0x1p-27 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (0x0.fffffffffffff8p0 - 0x1p-27 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x0.ffffffp0 + 0x1p-13 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: catan (0x1.000000000000000000000000008p0 + 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catan (0x1.000000000000000000000000008p0 - 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x1.0000000000001p0 - 0x1p-27 i)":
+double: 1
+idouble: 1
+Test "Real part of: catan (0x1.000002p0 + 0x1p-126 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1.000002p0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1.000002p0 - 0x1p-126 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1.000002p0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1.000002p0 - 0x1p-13 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x1.fp1023 - 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x1.fp127 - 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x1p-1020 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (0x1p-1020 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: catan (0x1p-13 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1p-13 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1p-13 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1p-54 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1p-54 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1p-57 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1p-57 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (1.0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (1.0 + 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catan (1.0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (1.0 - 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+
+# catanh
+Test "Real part of: catanh (-0x1.000002p0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (-0x1.000002p0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-0x1.000002p0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (-0x1.000002p0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-0x1.fp1023 - 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-0x1.fp127 - 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (-0x1p-126 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (-0x1p-126 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-0x1p-13 + 0x1.000002p0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (-0x1p-13 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (-0x1p-13 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-0x1p-13 - 0x1.000002p0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (-0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (-0x1p-13 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-0x1p-27 + 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-0x1p-27 + 0x1.0000000000001p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: catanh (-0x1p-27 - 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-0x1p-27 - 0x1.0000000000001p0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (-0x1p-54 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (-0x1p-54 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (-0x1p-54 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (-0x1p-54 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-1.0 + 0x1p-1020 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (-1.0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-1.0 + 0x1p-54 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-1.0 + 0x1p-57 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-1.0 - 0x1p-1020 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (-1.0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-1.0 - 0x1p-54 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-1.0 - 0x1p-57 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-2 - 3 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (0x1.000002p0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1.000002p0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (0x1.fp1023 - 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (0x1.fp127 - 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (0x1p-126 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1p-126 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (0x1p-13 + 0x0.ffffffp0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1p-13 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1p-13 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (0x1p-13 - 0x0.ffffffp0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1p-13 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (0x1p-27 + 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: catanh (0x1p-27 - 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (0x1p-54 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (0x1p-54 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (0x1p-54 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (0x1p-54 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (1.0 + 0x1p-1020 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (1.0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (1.0 + 0x1p-54 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (1.0 + 0x1p-57 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (1.0 - 0x1p-1020 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (1.0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (1.0 - 0x1p-54 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (1.0 - 0x1p-57 i)":
+float: 1
+ifloat: 1
+
+# cbrt
+Test "cbrt (-27.0)":
+double: 1
+idouble: 1
+Test "cbrt (0.75)":
+double: 1
+idouble: 1
+Test "cbrt (0.9921875)":
+double: 1
+idouble: 1
+
+# ccos
+Test "Imaginary part of: ccos (-0.75 + 710.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccos (-0.75 + 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccos (-0.75 - 710.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccos (-0.75 - 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccos (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Real part of: ccos (0.75 + 1.25 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: ccos (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: ccos (0.75 + 710.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccos (0.75 + 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccos (0.75 - 710.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccos (0.75 - 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccos (0x1p-1074 + 1440 i)":
+double: 1
+idouble: 1
+
+# ccosh
+Test "Real part of: ccosh (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: ccosh (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: ccosh (-710.5 + 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccosh (-710.5 - 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccosh (-89.5 + 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccosh (-89.5 - 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: ccosh (0.75 + 1.25 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ccosh (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccosh (1440 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccosh (710.5 + 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccosh (710.5 - 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccosh (89.5 + 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccosh (89.5 - 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# cexp
+Test "Imaginary part of: cexp (-2.0 - 3.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cexp (-95 + 0.75 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cexp (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cexp (0.75 + 1.25 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cexp (1440 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Real part of: cexp (50 + 0x1p127 i)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cexp (50 + 0x1p127 i)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: cexp (500 + 0x1p1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cexp (500 + 0x1p1023 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cexp (709.8125 + 0.75 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cexp (709.8125 + 0.75 i)":
+double: 1
+idouble: 1
+Test "Real part of: cexp (88.75 + 0.75 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cexp (88.75 + 0.75 i)":
+float: 2
+ifloat: 2
+
+# clog
+Test "Real part of: clog (-0x1.0000000123456p0 + 0x1.2345678p-1000 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog (-0x1.0000000123456p0 + 0x1.2345678p-30 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog (-0x1.234566p-40 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (-0x1.fp+127 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (-0x1.fp+127 - 0x1p-149 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (-0x1p-149 + 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (-0x1p-149 + 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (-0x1p-149 - 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (-0x1p-149 - 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (-2 - 3 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (-inf + inf i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (-inf - inf i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0.75 + 1.25 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog (0x0.ffffffp0 + 0x0.ffffffp-100 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (0x1.000566p0 + 0x1.234p-10 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (0x1.fp+127 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (0x1.fp+127 - 0x1p-149 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (0x11682p-23 + 0x7ffed1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0x13836d58a13448d750b4b9p-85 + 0x195ca7bc3ab4f9161edbe6p-85 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0x155f8afc4c48685bf63610p-85 + 0x17d0cf2652cdbeb1294e19p-85 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: clog (0x15cfbd1990d1ffp-53 + 0x176a3973e09a9ap-53 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog (0x1p-1074 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog (0x1p-147 + 0x1p-147 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (0x1p-149 + 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (0x1p-149 - 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (0x2818p-15 + 0x798fp-15 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (0x4d9c37e2b5cb4533p-63 + 0x65c98be2385a042ep-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0x6241ef0da53f539f02fad67dabp-106 + 0x3fb46641182f7efd9caa769dac0p-106 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0xa1f2c1p-24 + 0xc643aep-24 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0xa4722f19346cp-51 + 0x7f9631c5e7f07p-51 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0xf2p-10 + 0x3e3p-10 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog (1.0 + 0x1.234566p-10 i)":
+float: 1
+ifloat: 1
+
+# clog10
+Test "Imaginary part of: clog10 (-0 + inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-0 - inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: clog10 (-0x1.0000000123456p0 + 0x1.2345678p-1000 i)":
+double: 2
+idouble: 2
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: clog10 (-0x1.0000000123456p0 + 0x1.2345678p-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (-0x1.0000000123456p0 + 0x1.2345678p-30 i)":
+double: 2
+idouble: 2
+Test "Imaginary part of: clog10 (-0x1.0000000123456p0 + 0x1.2345678p-30 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1.fp+1023 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1.fp+1023 - 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1.fp+127 + 0x1p-149 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-0x1.fp+127 - 0x1p-149 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-0x1p-1074 + 0x1.fp+1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1p-1074 - 0x1.fp+1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1p-149 + 0x1.fp+127 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1p-149 - 0x1.fp+127 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-1.0 + 0x1.234566p-20 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-2 - 3 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (-3 + inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-3 - inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-inf + 0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-inf + 1 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-inf + inf i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-inf - 0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-inf - 1 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0 + inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0 - inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: clog10 (0x0.fffffffffffff8p0 + 0x0.fffffffffffff8p-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (0x0.ffffffp0 + 0x0.ffffffp-100 i)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "Real part of: clog10 (0x1.000566p0 + 0x1.234p-10 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1.000566p0 + 0x1.234p-10 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog10 (0x1.000566p0 + 0x1.234p-100 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1.234566p-30 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1.234566p-50 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1.234566p-60 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x1.fffffep+127 + 0x1.fffffep+127 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: clog10 (0x1.fffffep+127 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1.fffffffffffffp+1023 + 0x1.fffffffffffffp+1023 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (0x10673dd0f2481p-51 + 0x7ef1d17cefbd2p-51 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (0x1367a310575591p-54 + 0x3cfcc0a0541f60p-54 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1367a310575591p-54 + 0x3cfcc0a0541f60p-54 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x13836d58a13448d750b4b9p-85 + 0x195ca7bc3ab4f9161edbe6p-85 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x155f8afc4c48685bf63610p-85 + 0x17d0cf2652cdbeb1294e19p-85 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: clog10 (0x15d8ab6ed05ca514086ac3a1e84p-105 + 0x1761e480aa094c0b10b34b09ce9p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x164c74eea876p-45 + 0x16f393482f77p-45 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1a6p-10 + 0x3a5p-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1df515eb171a808b9e400266p-95 + 0x7c71eb0cd4688dfe98581c77p-95 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x1p-1073 + 0x1p-1073 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-1074 + 0x1.fp+1023 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (0x1p-1074 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-1074 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-1074 - 0x1.fp+1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-147 + 0x1p-147 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1p-149 + 0x1.fp+127 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1p-149 + 0x1p-149 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1p-149 - 0x1.fp+127 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1p-509 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-510 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-511 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-61 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x1p-62 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1p-63 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: clog10 (0x2818p-15 + 0x798fp-15 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x2818p-15 + 0x798fp-15 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x298c62cb546588a7p-63 + 0x7911b1dfcc4ecdaep-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog10 (0x2dd46725bp-35 + 0x7783a1284p-35 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (0x2ede88p-23 + 0x771c3fp-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x2ede88p-23 + 0x771c3fp-23 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog10 (0x4447d7175p-35 + 0x6c445e00ap-35 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x4d4ep-15 + 0x6605p-15 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x5b06b680ea2ccp-52 + 0xef452b965da9fp-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x81b7efa81fc35ad1p-65 + 0x1ef4b835f1c79d812p-65 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x9b57bp-20 + 0xcb7b4p-20 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0xe33f66c9542ca25cc43c867p-95 + 0x7f35a68ebd3704a43c465864p-95 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog10 (0xf2p-10 + 0x3e3p-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0xf2p-10 + 0x3e3p-10 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0xfe961079616p-45 + 0x1bc37e09e6d1p-45 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (1.0 + 0x1.234566p-10 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (3 + inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (3 - inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (inf + inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (inf - inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+
+# cos
+Test "cos (0x1p+120)":
+float: 1
+ifloat: 1
+Test "cos (0x1p+127)":
+float: 1
+ifloat: 1
+Test "cos (M_PI_6l * 2.0)":
+double: 1
+idouble: 1
+Test "cos (M_PI_6l * 4.0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# cos_downward
+Test "cos_downward (1)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cos_downward (10)":
+ildouble: 1
+ldouble: 1
+Test "cos_downward (2)":
+float: 1
+ifloat: 1
+Test "cos_downward (3)":
+float: 1
+ifloat: 1
+Test "cos_downward (4)":
+float: 1
+ifloat: 1
+Test "cos_downward (5)":
+float: 1
+ifloat: 1
+Test "cos_downward (6)":
+ildouble: 1
+ldouble: 1
+Test "cos_downward (7)":
+float: 1
+ifloat: 1
+Test "cos_downward (8)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "cos_downward (9)":
+ildouble: 1
+ldouble: 1
+
+# cos_tonearest
+Test "cos_tonearest (7)":
+float: 1
+ifloat: 1
+
+# cos_towardzero
+Test "cos_towardzero (1)":
+ildouble: 1
+ldouble: 1
+Test "cos_towardzero (10)":
+ildouble: 1
+ldouble: 1
+Test "cos_towardzero (2)":
+float: 1
+ifloat: 1
+Test "cos_towardzero (3)":
+float: 1
+ifloat: 1
+Test "cos_towardzero (5)":
+float: 1
+ifloat: 1
+Test "cos_towardzero (7)":
+float: 1
+ifloat: 1
+Test "cos_towardzero (8)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# cos_upward
+Test "cos_upward (10)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cos_upward (4)":
+ildouble: 1
+ldouble: 1
+Test "cos_upward (5)":
+ildouble: 1
+ldouble: 1
+Test "cos_upward (6)":
+float: 1
+ifloat: 1
+Test "cos_upward (7)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cos_upward (9)":
+float: 2
+ifloat: 2
+
+# cosh_downward
+Test "cosh_downward (22)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cosh_downward (23)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cosh_downward (24)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# cosh_tonearest
+Test "cosh_tonearest (24)":
+ildouble: 1
+ldouble: 1
+
+# cosh_towardzero
+Test "cosh_towardzero (22)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cosh_towardzero (23)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cosh_towardzero (24)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# cosh_upward
+Test "cosh_upward (22)":
+ildouble: 2
+ldouble: 2
+Test "cosh_upward (23)":
+ildouble: 2
+ldouble: 2
+Test "cosh_upward (24)":
+ildouble: 2
+ldouble: 2
+
+# cpow
+Test "Real part of: cpow (0.75 + 1.25 i, 0.0 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cpow (0.75 + 1.25 i, 0.75 + 1.25 i)":
+double: 1
+float: 4
+idouble: 1
+ifloat: 4
+ildouble: 2
+ldouble: 2
+Test "Real part of: cpow (0.75 + 1.25 i, 1.0 + 0.0 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: cpow (0.75 + 1.25 i, 1.0 + 1.0 i)":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+Test "Real part of: cpow (2 + 0 i, 10 + 0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cpow (2 + 3 i, 4 + 0 i)":
+double: 1
+float: 4
+idouble: 1
+ifloat: 4
+Test "Imaginary part of: cpow (2 + 3 i, 4 + 0 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+# csin
+Test "Real part of: csin (-0.75 + 710.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: csin (-0.75 + 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: csin (-0.75 - 710.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: csin (-0.75 - 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: csin (0.75 + 710.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: csin (0.75 + 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: csin (0.75 - 710.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: csin (0.75 - 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: csin (0x1p-1074 + 1440 i)":
+double: 1
+idouble: 1
+
+# csinh
+Test "Imaginary part of: csinh (-2 - 3 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: csinh (-710.5 + 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csinh (-710.5 - 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csinh (-89.5 + 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: csinh (-89.5 - 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: csinh (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: csinh (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: csinh (1440 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csinh (710.5 + 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csinh (710.5 - 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csinh (89.5 + 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: csinh (89.5 - 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# csqrt
+Test "Real part of: csqrt (-0x1.000002p-126 - 0x1.000002p-126 i)":
+double: 1
+idouble: 1
+Test "Real part of: csqrt (-2 + 3 i)":
+float: 1
+ifloat: 1
+Test "Real part of: csqrt (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: csqrt (0x1.000002p-126 + 0x1.000002p-126 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csqrt (0x1.fffffep+127 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: csqrt (0x1.fffffffffffffp+1023 + 0x1.fffffffffffffp+1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csqrt (0x1.fffffffffffffp+1023 + 0x1.fffffffffffffp+1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csqrt (0x1.fffffffffffffp+1023 + 0x1p+1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: csqrt (0x1p-1074 + 0x1p-1074 i)":
+ildouble: 1
+ldouble: 1
+
+# ctan
+Test "Real part of: ctan (-2 - 3 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctan (-2 - 3 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ctan (0.75 + 1.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: ctan (0x1p1023 + 1 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ctan (0x1p1023 + 1 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: ctan (0x1p127 + 1 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctan (0x1p127 + 1 i)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: ctan (0x3.243f6cp-1 + 0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: ctan (1 + 47 i)":
+ildouble: 2
+ldouble: 2
+
+# ctan_downward
+Test "Real part of: ctan_downward (0x1.921fb54442d18p+0 + 0x1p-1074 i)":
+ildouble: 3
+ldouble: 3
+Test "Real part of: ctan_downward (0x1.921fb6p+0 + 0x1p-149 i)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ctan_downward (0x1.921fb6p+0 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+ildouble: 6
+ldouble: 6
+
+# ctan_tonearest
+Test "Real part of: ctan_tonearest (0x1.921fb6p+0 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctan_tonearest (0x1.921fb6p+0 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# ctan_towardzero
+Test "Real part of: ctan_towardzero (0x1.921fb54442d18p+0 + 0x1p-1074 i)":
+ildouble: 4
+ldouble: 4
+Test "Imaginary part of: ctan_towardzero (0x1.921fb54442d18p+0 + 0x1p-1074 i)":
+ildouble: 13
+ldouble: 13
+Test "Real part of: ctan_towardzero (0x1.921fb6p+0 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ctan_towardzero (0x1.921fb6p+0 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+ildouble: 8
+ldouble: 8
+
+# ctan_upward
+Test "Real part of: ctan_upward (0x1.921fb54442d18p+0 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+ildouble: 6
+ldouble: 6
+Test "Imaginary part of: ctan_upward (0x1.921fb54442d18p+0 + 0x1p-1074 i)":
+ildouble: 10
+ldouble: 10
+Test "Real part of: ctan_upward (0x1.921fb6p+0 + 0x1p-149 i)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 5
+ldouble: 5
+Test "Imaginary part of: ctan_upward (0x1.921fb6p+0 + 0x1p-149 i)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+# ctanh
+Test "Real part of: ctanh (-2 - 3 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ctanh (-2 - 3 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: ctanh (0 + 0x3.243f6cp-1 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctanh (0 + pi/4 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: ctanh (0.75 + 1.25 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ctanh (0.75 + 1.25 i)":
+float: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+Test "Real part of: ctanh (1 + 0x1p1023 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctanh (1 + 0x1p1023 i)":
+double: 1
+idouble: 1
+Test "Real part of: ctanh (1 + 0x1p127 i)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ctanh (1 + 0x1p127 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctanh (47 + 1 i)":
+ildouble: 2
+ldouble: 2
+
+# ctanh_downward
+Test "Imaginary part of: ctanh_downward (0x1p-1074 + 0x1.921fb54442d18p+0 i)":
+ildouble: 3
+ldouble: 3
+Test "Real part of: ctanh_downward (0x1p-149 + 0x1.921fb6p+0 i)":
+float: 1
+ifloat: 1
+ildouble: 6
+ldouble: 6
+Test "Imaginary part of: ctanh_downward (0x1p-149 + 0x1.921fb6p+0 i)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# ctanh_tonearest
+Test "Real part of: ctanh_tonearest (0x1p-149 + 0x1.921fb6p+0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctanh_tonearest (0x1p-149 + 0x1.921fb6p+0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# ctanh_towardzero
+Test "Real part of: ctanh_towardzero (0x1p-1074 + 0x1.921fb54442d18p+0 i)":
+ildouble: 13
+ldouble: 13
+Test "Imaginary part of: ctanh_towardzero (0x1p-1074 + 0x1.921fb54442d18p+0 i)":
+ildouble: 4
+ldouble: 4
+Test "Real part of: ctanh_towardzero (0x1p-149 + 0x1.921fb6p+0 i)":
+float: 1
+ifloat: 1
+ildouble: 8
+ldouble: 8
+Test "Imaginary part of: ctanh_towardzero (0x1p-149 + 0x1.921fb6p+0 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# ctanh_upward
+Test "Real part of: ctanh_upward (0x1p-1074 + 0x1.921fb54442d18p+0 i)":
+ildouble: 10
+ldouble: 10
+Test "Imaginary part of: ctanh_upward (0x1p-1074 + 0x1.921fb54442d18p+0 i)":
+double: 1
+idouble: 1
+ildouble: 6
+ldouble: 6
+Test "Real part of: ctanh_upward (0x1p-149 + 0x1.921fb6p+0 i)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 5
+ldouble: 5
+Test "Imaginary part of: ctanh_upward (0x1p-149 + 0x1.921fb6p+0 i)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+# erf
+Test "erf (1.25)":
+double: 1
+idouble: 1
+
+# erfc
+Test "erfc (0x1.f7303cp+1)":
+double: 1
+idouble: 1
+Test "erfc (0x1.ffa002p+2)":
+float: 1
+ifloat: 1
+Test "erfc (0x1.ffff56789abcdef0123456789a8p+2)":
+ildouble: 1
+ldouble: 1
+Test "erfc (2.0)":
+double: 1
+idouble: 1
+Test "erfc (4.125)":
+double: 1
+idouble: 1
+
+# exp
+Test "exp (0.75)":
+ildouble: 1
+ldouble: 1
+Test "exp (50.0)":
+ildouble: 1
+ldouble: 1
+
+# exp10
+Test "exp10 (-1)":
+double: 1
+idouble: 1
+Test "exp10 (-305)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "exp10 (-36)":
+double: 1
+idouble: 1
+Test "exp10 (3)":
+double: 1
+idouble: 1
+Test "exp10 (36)":
+double: 1
+idouble: 1
+
+# exp_downward
+Test "exp_downward (2)":
+float: 1
+ifloat: 1
+Test "exp_downward (3)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# exp_towardzero
+Test "exp_towardzero (2)":
+float: 1
+ifloat: 1
+Test "exp_towardzero (3)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# exp_upward
+Test "exp_upward (1)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# expm1
+Test "expm1 (0.75)":
+double: 1
+idouble: 1
+Test "expm1 (1)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "expm1 (500.0)":
+double: 1
+idouble: 1
+
+# gamma
+Test "gamma (0.7)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "gamma (1.2)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+# hypot
+Test "hypot (-0.7, -12.4)":
+float: 1
+ifloat: 1
+Test "hypot (-0.7, 12.4)":
+float: 1
+ifloat: 1
+Test "hypot (-12.4, -0.7)":
+float: 1
+ifloat: 1
+Test "hypot (-12.4, 0.7)":
+float: 1
+ifloat: 1
+Test "hypot (0.7, -12.4)":
+float: 1
+ifloat: 1
+Test "hypot (0.7, 12.4)":
+float: 1
+ifloat: 1
+Test "hypot (0.75, 1.25)":
+ildouble: 1
+ldouble: 1
+Test "hypot (12.4, -0.7)":
+float: 1
+ifloat: 1
+Test "hypot (12.4, 0.7)":
+float: 1
+ifloat: 1
+
+# j0
+Test "j0 (-0x1.001000001p+593)":
+ildouble: 2
+ldouble: 2
+Test "j0 (-4.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "j0 (0.75)":
+float: 1
+ifloat: 1
+Test "j0 (0x1.d7ce3ap+107)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "j0 (0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "j0 (10.0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "j0 (2.0)":
+float: 2
+ifloat: 2
+Test "j0 (4.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "j0 (8.0)":
+float: 1
+ifloat: 1
+
+# j1
+Test "j1 (0x1.3ffp+74)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "j1 (0x1.ff00000000002p+840)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "j1 (0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "j1 (10.0)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "j1 (2.0)":
+double: 1
+idouble: 1
+Test "j1 (8.0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+# jn
+Test "jn (0, -4.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "jn (0, 0.75)":
+float: 1
+ifloat: 1
+Test "jn (0, 10.0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "jn (0, 2.0)":
+float: 2
+ifloat: 2
+Test "jn (0, 4.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "jn (0, 8.0)":
+float: 1
+ifloat: 1
+Test "jn (1, 10.0)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "jn (1, 2.0)":
+double: 1
+idouble: 1
+Test "jn (1, 8.0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "jn (10, -1.0)":
+ildouble: 1
+ldouble: 1
+Test "jn (10, 0.125)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "jn (10, 0.75)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "jn (10, 1.0)":
+ildouble: 1
+ldouble: 1
+Test "jn (10, 10.0)":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 4
+ldouble: 4
+Test "jn (10, 2.0)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "jn (2, 0x1.ffff62p+99)":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+Test "jn (2, 2.4048255576957729)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "jn (3, 0.125)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "jn (3, 0.75)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "jn (3, 10.0)":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "jn (3, 2.0)":
+float: 1
+ifloat: 1
+Test "jn (3, 2.4048255576957729)":
+double: 3
+idouble: 3
+ildouble: 1
+ldouble: 1
+Test "jn (4, 2.4048255576957729)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "jn (5, 2.4048255576957729)":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "jn (6, 2.4048255576957729)":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 4
+ldouble: 4
+Test "jn (7, 2.4048255576957729)":
+double: 3
+float: 5
+idouble: 3
+ifloat: 5
+ildouble: 2
+ldouble: 2
+Test "jn (8, 2.4048255576957729)":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 4
+ldouble: 4
+Test "jn (9, 2.4048255576957729)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 7
+ldouble: 7
+
+# lgamma
+Test "lgamma (0.7)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "lgamma (1.2)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+# log10
+Test "log10 (0.75)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "log10 (e)":
+float: 1
+ifloat: 1
+
+# log1p
+Test "log1p (-0.25)":
+float: 1
+ifloat: 1
+
+# log2
+Test "log2 (e)":
+ildouble: 1
+ldouble: 1
+
+# pow
+Test "pow (0x0.ffffffp0, -0x1p24)":
+float: 1
+ifloat: 1
+Test "pow (0x0.ffffffp0, 0x1p24)":
+float: 1
+ifloat: 1
+Test "pow (0x1.000002p0, 0x1p24)":
+float: 1
+ifloat: 1
+
+# pow10
+Test "pow10 (-1)":
+double: 1
+idouble: 1
+Test "pow10 (-305)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "pow10 (-36)":
+double: 1
+idouble: 1
+Test "pow10 (3)":
+double: 1
+idouble: 1
+Test "pow10 (36)":
+double: 1
+idouble: 1
+
+# pow_downward
+Test "pow_downward (1.0625, 1.125)":
+ildouble: 1
+ldouble: 1
+Test "pow_downward (1.5, 1.03125)":
+float: 1
+ifloat: 1
+
+# pow_towardzero
+Test "pow_towardzero (1.0625, 1.125)":
+ildouble: 1
+ldouble: 1
+Test "pow_towardzero (1.5, 1.03125)":
+float: 1
+ifloat: 1
+
+# pow_upward
+Test "pow_upward (1.0625, 1.125)":
+float: 1
+ifloat: 1
+
+# sin_downward
+Test "sin_downward (1)":
+ildouble: 1
+ldouble: 1
+Test "sin_downward (10)":
+float: 1
+ifloat: 1
+Test "sin_downward (2)":
+ildouble: 1
+ldouble: 1
+Test "sin_downward (3)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "sin_downward (4)":
+ildouble: 1
+ldouble: 1
+Test "sin_downward (5)":
+float: 1
+ifloat: 1
+Test "sin_downward (6)":
+float: 1
+ifloat: 1
+Test "sin_downward (8)":
+ildouble: 1
+ldouble: 1
+
+# sin_tonearest
+Test "sin_tonearest (1)":
+float: 1
+ifloat: 1
+
+# sin_towardzero
+Test "sin_towardzero (1)":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+Test "sin_towardzero (10)":
+float: 1
+ifloat: 1
+Test "sin_towardzero (2)":
+ildouble: 1
+ldouble: 1
+Test "sin_towardzero (3)":
+ildouble: 1
+ldouble: 1
+Test "sin_towardzero (4)":
+float: 1
+ifloat: 1
+Test "sin_towardzero (5)":
+float: 1
+ifloat: 1
+Test "sin_towardzero (8)":
+ildouble: 1
+ldouble: 1
+Test "sin_towardzero (9)":
+float: 1
+ifloat: 1
+
+# sin_upward
+Test "sin_upward (1)":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+Test "sin_upward (2)":
+float: 2
+ifloat: 2
+Test "sin_upward (3)":
+ildouble: 1
+ldouble: 1
+Test "sin_upward (4)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "sin_upward (6)":
+ildouble: 1
+ldouble: 1
+Test "sin_upward (9)":
+float: 1
+ifloat: 1
+
+# sincos
+Test "sincos (0x1p+120) extra output 2":
+float: 1
+ifloat: 1
+Test "sincos (0x1p+127) extra output 2":
+float: 1
+ifloat: 1
+Test "sincos (M_PI_6l*2.0) extra output 1":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "sincos (M_PI_6l*2.0) extra output 2":
+double: 1
+idouble: 1
+Test "sincos (pi/6) extra output 2":
+float: 1
+ifloat: 1
+
+# sinh
+Test "sinh (0.75)":
+ildouble: 1
+ldouble: 1
+
+# sinh_downward
+Test "sinh_downward (22)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "sinh_downward (23)":
+float: 1
+ifloat: 1
+Test "sinh_downward (24)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# sinh_towardzero
+Test "sinh_towardzero (22)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "sinh_towardzero (23)":
+float: 1
+ifloat: 1
+Test "sinh_towardzero (24)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# sinh_upward
+Test "sinh_upward (23)":
+ildouble: 1
+ldouble: 1
+Test "sinh_upward (24)":
+ildouble: 1
+ldouble: 1
+
+# tan
+Test "tan (-0xc.908p-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (-0xc.90cp-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (-0xc.90ep-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (-0xc.90f8p-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (-0xc.90fcp-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (-0xc.90fd8p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (-0xc.90fdap-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (-0xc.92p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (-0xc.9p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (0xc.908p-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (0xc.90cp-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (0xc.90ep-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (0xc.90f8p-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (0xc.90fcp-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (0xc.90fd8p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (0xc.90fdap-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (0xc.92p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (0xc.9p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (pi/4)":
+ildouble: 1
+ldouble: 1
+
+# tan_downward
+Test "tan_downward (1)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_downward (10)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "tan_downward (2)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_downward (6)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_downward (8)":
+float: 1
+ifloat: 1
+Test "tan_downward (9)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# tan_tonearest
+Test "tan_tonearest (10)":
+ildouble: 1
+ldouble: 1
+Test "tan_tonearest (4)":
+ildouble: 1
+ldouble: 1
+Test "tan_tonearest (7)":
+ildouble: 1
+ldouble: 1
+
+# tan_towardzero
+Test "tan_towardzero (10)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "tan_towardzero (3)":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+Test "tan_towardzero (4)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_towardzero (5)":
+float: 1
+ifloat: 1
+Test "tan_towardzero (6)":
+ildouble: 1
+ldouble: 1
+Test "tan_towardzero (7)":
+ildouble: 2
+ldouble: 2
+Test "tan_towardzero (9)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# tan_upward
+Test "tan_upward (1)":
+float: 1
+ifloat: 1
+Test "tan_upward (10)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_upward (3)":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+Test "tan_upward (5)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_upward (6)":
+ildouble: 1
+ldouble: 1
+Test "tan_upward (7)":
+ildouble: 1
+ldouble: 1
+Test "tan_upward (9)":
+ildouble: 1
+ldouble: 1
+
+# tanh
+Test "tanh (-0.75)":
+ildouble: 1
+ldouble: 1
+Test "tanh (0.75)":
+ildouble: 1
+ldouble: 1
+
+# tgamma
+Test "tgamma (-0.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-0x0.fffffffffffff8p0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x0.ffffffp0)":
+float: 1
+ifloat: 1
+Test "tgamma (-0x1.000002p0)":
+double: 2
+idouble: 2
+Test "tgamma (-0x1.0a32a2p+5)":
+float: 2
+ifloat: 2
+Test "tgamma (-0x1.fffffffffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x13.ffffep0)":
+float: 2
+ifloat: 2
+Test "tgamma (-0x13.ffffffffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x14.000000000001p0)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x14.00002p0)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x1d.ffffep0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x1d.fffffffffffffffffffffffff8p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x1d.ffffffffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x1e.00000000000000000000000008p0)":
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x1e.000000000001p0)":
+double: 3
+idouble: 3
+ildouble: 3
+ldouble: 3
+Test "tgamma (-0x1e.00002p0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-0x2.0000000000002p0)":
+double: 1
+idouble: 1
+Test "tgamma (-0x2.000004p0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x2.fffffcp0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x27.fffffffffffep0)":
+double: 1
+idouble: 1
+Test "tgamma (-0x27.fffffffffffffffffffffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x28.0000000000000000000000001p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x28.000000000002p0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x28.00004p0)":
+double: 2
+idouble: 2
+Test "tgamma (-0x29.0000000000000000000000001p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x29.000000000002p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x29.00004p0)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x29.ffffcp0)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x29.fffffffffffep0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x2a.0000000000000000000000001p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x3.000004p0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "tgamma (-0x3.fffffcp0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-0x3.ffffffffffffep0)":
+double: 2
+idouble: 2
+Test "tgamma (-0x31.fffffffffffep0)":
+double: 3
+idouble: 3
+Test "tgamma (-0x32.0000000000000000000000001p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x32.000000000002p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x4.000008p0)":
+float: 1
+ifloat: 1
+Test "tgamma (-0x4.fffff8p0)":
+double: 1
+idouble: 1
+Test "tgamma (-0x4.ffffffffffffcp0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x5.0000000000004p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x5.000008p0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-0x5.ffffffffffffcp0)":
+double: 1
+idouble: 1
+Test "tgamma (-0x6.000008p0)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x6.fffff8p0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x6.ffffffffffffcp0)":
+double: 4
+idouble: 4
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x63.fffffffffffcp0)":
+double: 2
+idouble: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x63.ffffffffffffffffffffffffep0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x64.0000000000000000000000002p0)":
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x64.000000000004p0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x7.00000000000000000000000002p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x7.0000000000004p0)":
+double: 3
+idouble: 3
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x7.000008p0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-0x7.fffff8p0)":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+Test "tgamma (-0x7.ffffffffffffcp0)":
+double: 3
+idouble: 3
+ildouble: 3
+ldouble: 3
+Test "tgamma (-0x8.00000000000000000000000004p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x8.00001p0)":
+double: 2
+idouble: 2
+Test "tgamma (-0x9.ffffffffffff8p0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x9.fffffp0)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x96.000000000008p0)":
+double: 1
+idouble: 1
+Test "tgamma (-0xa.00001p0)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (-2.5)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (-3.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-4.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-5.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-6.5)":
+float: 1
+ifloat: 1
+Test "tgamma (-7.5)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "tgamma (-8.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-9.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (0.5)":
+float: 1
+ifloat: 1
+Test "tgamma (0.7)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (0x1.fffffep0)":
+float: 1
+ifloat: 1
+Test "tgamma (0x1.fffffffffffffp0)":
+double: 1
+idouble: 1
+Test "tgamma (0x1p-24)":
+float: 1
+ifloat: 1
+Test "tgamma (0x1p-53)":
+double: 1
+idouble: 1
+Test "tgamma (0x2.30a43cp+4)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "tgamma (0x2.fffffcp0)":
+float: 3
+ifloat: 3
+Test "tgamma (0x3.fffffcp0)":
+float: 1
+ifloat: 1
+Test "tgamma (0x3.ffffffffffffep0)":
+double: 1
+idouble: 1
+Test "tgamma (0x4.0000000000004p0)":
+double: 1
+idouble: 1
+Test "tgamma (0x4.ffffffffffffcp0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (0x5.0000000000004p0)":
+double: 1
+idouble: 1
+Test "tgamma (0x5.000008p0)":
+float: 2
+ifloat: 2
+Test "tgamma (0x5.fffff8p0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (0x6.0000000000004p0)":
+double: 1
+idouble: 1
+Test "tgamma (0x6.000008p0)":
+float: 2
+ifloat: 2
+Test "tgamma (0x6.fffff8p0)":
+double: 1
+idouble: 1
+Test "tgamma (0x6.ffffffffffffcp0)":
+double: 4
+idouble: 4
+ildouble: 1
+ldouble: 1
+Test "tgamma (0x7.0000000000004p0)":
+double: 4
+idouble: 4
+Test "tgamma (0x7.000008p0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (0x7.fffff8p0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "tgamma (0x7.ffffffffffffcp0)":
+double: 2
+idouble: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (0x8.00001p0)":
+double: 2
+idouble: 2
+Test "tgamma (0xa.b9fd72b0fb238p+4)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (0xa.b9fd72b0fb23a9ddbf0d3804f4p+4)":
+ildouble: 2
+ldouble: 2
+Test "tgamma (10)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (18.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (19.5)":
+double: 2
+idouble: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (2.5)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (23.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (29.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (3)":
+float: 1
+ifloat: 1
+Test "tgamma (3.5)":
+float: 2
+ifloat: 2
+Test "tgamma (30.5)":
+float: 1
+ifloat: 1
+Test "tgamma (33.5)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (34.5)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "tgamma (4)":
+float: 1
+ifloat: 1
+Test "tgamma (4.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (6)":
+float: 1
+ifloat: 1
+Test "tgamma (6.5)":
+float: 1
+ifloat: 1
+Test "tgamma (7)":
+double: 1
+idouble: 1
+Test "tgamma (7.5)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (8)":
+double: 1
+idouble: 1
+Test "tgamma (8.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (9)":
+double: 1
+idouble: 1
+Test "tgamma (9.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+# y0
+Test "y0 (0.125)":
+ildouble: 1
+ldouble: 1
+Test "y0 (0x1.3ffp+74)":
+double: 1
+idouble: 1
+Test "y0 (0x1.ff00000000002p+840)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "y0 (0x1p-10)":
+double: 1
+idouble: 1
+Test "y0 (0x1p-100)":
+ildouble: 1
+ldouble: 1
+Test "y0 (0x1p-110)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "y0 (0x1p-20)":
+float: 1
+ifloat: 1
+Test "y0 (0x1p-30)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "y0 (0x1p-40)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "y0 (0x1p-50)":
+float: 1
+ifloat: 1
+Test "y0 (0x1p-70)":
+double: 1
+idouble: 1
+Test "y0 (0x1p-80)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "y0 (0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "y0 (1.0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "y0 (1.5)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "y0 (10.0)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "y0 (8.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# y1
+Test "y1 (0.125)":
+double: 1
+idouble: 1
+Test "y1 (0x1.001000001p+593)":
+ildouble: 2
+ldouble: 2
+Test "y1 (0x1.27e204p+99)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "y1 (0x1p-10)":
+double: 1
+idouble: 1
+Test "y1 (0x1p-20)":
+ildouble: 1
+ldouble: 1
+Test "y1 (0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "y1 (1.5)":
+float: 1
+ifloat: 1
+Test "y1 (10.0)":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "y1 (2.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "y1 (8.0)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+
+# yn
+Test "yn (0, 0.125)":
+ildouble: 1
+ldouble: 1
+Test "yn (0, 1.0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (0, 1.5)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "yn (0, 10.0)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (0, 8.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (1, 0.125)":
+double: 1
+idouble: 1
+Test "yn (1, 1.5)":
+float: 1
+ifloat: 1
+Test "yn (1, 10.0)":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "yn (1, 2.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "yn (1, 8.0)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "yn (10, 0.125)":
+double: 1
+idouble: 1
+Test "yn (10, 0.75)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "yn (10, 1.0)":
+double: 1
+idouble: 1
+Test "yn (10, 10.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (10, 2.0)":
+double: 2
+idouble: 2
+ildouble: 1
+ldouble: 1
+Test "yn (3, 0.125)":
+double: 1
+idouble: 1
+Test "yn (3, 0.75)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "yn (3, 10.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (3, 2.0)":
+double: 1
+idouble: 1
+
+# Maximal error of functions:
+Function: "acos":
+ildouble: 1
+ldouble: 1
+
+Function: "acos_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "acos_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "acos_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "acos_upward":
+ildouble: 2
+ldouble: 2
+
+Function: "acosh":
+ildouble: 1
+ldouble: 1
+
+Function: "asin":
+ildouble: 2
+ldouble: 2
+
+Function: "asin_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asin_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "asin_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asin_upward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asinh":
+ildouble: 1
+ldouble: 1
+
+Function: "atan2":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atanh":
+float: 1
+ifloat: 1
+
+Function: "cabs":
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "cacos":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacos":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cacosh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacosh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "carg":
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "casin":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "casin":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "casinh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "casinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "catan":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catan":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "catanh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catanh":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cbrt":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ccos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ccos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ccosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ccosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cexp":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cexp":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "clog":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "clog":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "clog10":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "clog10":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cos":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cos_downward":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cos_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cos_towardzero":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cos_upward":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "cosh":
+ildouble: 1
+ldouble: 1
+
+Function: "cosh_downward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cosh_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "cosh_towardzero":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cosh_upward":
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cpow":
+double: 2
+float: 4
+idouble: 2
+ifloat: 4
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "cpow":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "csin":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "csin":
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "csinh":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "csinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csqrt":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "csqrt":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ctan":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ctan":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ctan_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ctan_downward":
+float: 1
+ifloat: 1
+ildouble: 6
+ldouble: 6
+
+Function: Real part of "ctan_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ctan_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ctan_towardzero":
+float: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "ctan_towardzero":
+float: 1
+ifloat: 1
+ildouble: 13
+ldouble: 13
+
+Function: Real part of "ctan_upward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 6
+ldouble: 6
+
+Function: Imaginary part of "ctan_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 10
+ldouble: 10
+
+Function: Real part of "ctanh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ctanh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ctanh_downward":
+float: 1
+ifloat: 1
+ildouble: 6
+ldouble: 6
+
+Function: Imaginary part of "ctanh_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ctanh_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ctanh_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ctanh_towardzero":
+float: 1
+ifloat: 1
+ildouble: 13
+ldouble: 13
+
+Function: Imaginary part of "ctanh_towardzero":
+float: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "ctanh_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 10
+ldouble: 10
+
+Function: Imaginary part of "ctanh_upward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 6
+ldouble: 6
+
+Function: "erf":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "erfc":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp":
+ildouble: 1
+ldouble: 1
+
+Function: "exp10":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp_downward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "exp_towardzero":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp_upward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "expm1":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "gamma":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "hypot":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "j0":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "j1":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "jn":
+double: 4
+float: 5
+idouble: 4
+ifloat: 5
+ildouble: 7
+ldouble: 7
+
+Function: "lgamma":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "log":
+ildouble: 1
+ldouble: 1
+
+Function: "log10":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "log1p":
+float: 1
+ifloat: 1
+
+Function: "log2":
+ildouble: 1
+ldouble: 1
+
+Function: "pow":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow10":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow_downward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow_towardzero":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow_upward":
+float: 1
+ifloat: 1
+
+Function: "sin":
+ildouble: 1
+ldouble: 1
+
+Function: "sin_downward":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sin_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "sin_towardzero":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "sin_upward":
+float: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "sincos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "sinh":
+ildouble: 1
+ldouble: 1
+
+Function: "sinh_downward":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sinh_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "sinh_towardzero":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sinh_upward":
+ildouble: 1
+ldouble: 1
+
+Function: "tan":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "tan_downward":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "tan_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "tan_towardzero":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "tan_upward":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "tanh":
+ildouble: 1
+ldouble: 1
+
+Function: "tgamma":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: "y0":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "y1":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "yn":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+# end of automatic generation
diff --git a/libc/sysdeps/powerpc/nofpu/shlib-versions b/libc/sysdeps/powerpc/nofpu/shlib-versions
new file mode 100644
index 000000000..72085ddf4
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/shlib-versions
@@ -0,0 +1 @@
+powerpc.*-.*-.*		ABI			powerpcsoft-@OS@
diff --git a/libc/sysdeps/powerpc/nofpu/sim-full.c b/libc/sysdeps/powerpc/nofpu/sim-full.c
new file mode 100644
index 000000000..e16703323
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/sim-full.c
@@ -0,0 +1,46 @@
+/* Software floating-point exception handling emulation.
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <signal.h>
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+/* FIXME: these variables should be thread specific (see bugzilla bug
+   15483) and ideally preserved across signal handlers, like hardware
+   FP status words, but the latter is quite difficult to accomplish in
+   userland.  */
+
+/* Global to store sticky exceptions.  */
+int __sim_exceptions __attribute__ ((nocommon));
+libc_hidden_data_def (__sim_exceptions);
+
+/* By default, no exceptions should trap.  */
+int __sim_disabled_exceptions = 0xffffffff;
+libc_hidden_data_def (__sim_disabled_exceptions);
+
+int __sim_round_mode __attribute__ ((nocommon));
+libc_hidden_data_def (__sim_round_mode);
+
+void
+__simulate_exceptions (int x)
+{
+  __sim_exceptions |= x;
+  if (x & ~__sim_disabled_exceptions)
+    raise (SIGFPE);
+}
diff --git a/libc/sysdeps/powerpc/nofpu/soft-supp.h b/libc/sysdeps/powerpc/nofpu/soft-supp.h
new file mode 100644
index 000000000..18b4550e3
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/soft-supp.h
@@ -0,0 +1,48 @@
+/* Internal support stuff for complete soft float.
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if defined __NO_FPRS__ && !defined _SOFT_FLOAT
+
+# include <fenv_libc.h>
+
+#else
+
+# include <fenv.h>
+
+typedef union
+{
+  fenv_t fenv;
+  unsigned int l[2];
+} fenv_union_t;
+
+#endif
+
+/* FIXME: these variables should be thread specific (see bugzilla bug
+   15483) and ideally preserved across signal handlers, like hardware
+   FP status words, but the latter is quite difficult to accomplish in
+   userland.  */
+
+extern int __sim_exceptions;
+libc_hidden_proto (__sim_exceptions);
+extern int __sim_disabled_exceptions;
+libc_hidden_proto (__sim_disabled_exceptions);
+extern int __sim_round_mode;
+libc_hidden_proto (__sim_round_mode);
+
+extern void __simulate_exceptions (int x) attribute_hidden;
diff --git a/libc/sysdeps/powerpc/novmx-longjmp.c b/libc/sysdeps/powerpc/novmx-longjmp.c
index 8f6ea357d..b2c0e4cf5 100644
--- a/libc/sysdeps/powerpc/novmx-longjmp.c
+++ b/libc/sysdeps/powerpc/novmx-longjmp.c
@@ -50,13 +50,7 @@ weak_alias (__novmx__libc_siglongjmp, __novmx_longjmp)
 weak_alias (__novmx__libc_siglongjmp, __novmxlongjmp)
 weak_alias (__novmx__libc_siglongjmp, __novmxsiglongjmp)
 
-# if __WORDSIZE == 64
-symbol_version (__novmx_longjmp,_longjmp,GLIBC_2.3);
-symbol_version (__novmxlongjmp,longjmp,GLIBC_2.3);
-symbol_version (__novmxsiglongjmp,siglongjmp,GLIBC_2.3);
-# else
-symbol_version (__novmx_longjmp,_longjmp,GLIBC_2.0);
-symbol_version (__novmxlongjmp,longjmp,GLIBC_2.0);
-symbol_version (__novmxsiglongjmp,siglongjmp,GLIBC_2.0);
-# endif
+compat_symbol (libc, __novmx_longjmp, _longjmp, GLIBC_2_0);
+compat_symbol (libc, __novmxlongjmp, longjmp, GLIBC_2_0);
+compat_symbol (libc, __novmxsiglongjmp, siglongjmp, GLIBC_2_0);
 #endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4))  */
diff --git a/libc/sysdeps/powerpc/powerpc32/405/memcmp.S b/libc/sysdeps/powerpc/powerpc32/405/memcmp.S
new file mode 100644
index 000000000..2849461cd
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/memcmp.S
@@ -0,0 +1,128 @@
+/* Optimized memcmp implementation for PowerPC476.
+   Copyright (C) 2010-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* memcmp
+
+       r3:source1 address, return equality
+       r4:source2 address
+       r5:byte count
+
+       Check 2 words from src1 and src2. If unequal jump to end and
+       return src1 > src2 or src1 < src2.
+       If count = zero check bytes before zero counter and then jump to end and
+       return src1 > src2, src1 < src2 or src1 = src2.
+       If src1 = src2 and no null, repeat. */
+
+EALIGN (memcmp, 5, 0)
+       srwi.   r6,r5,5
+       beq     L(preword2_count_loop)
+       mtctr   r6
+       clrlwi  r5,r5,27
+
+L(word8_compare_loop):
+       lwz     r10,0(r3)
+       lwz     r6,4(r3)
+       lwz     r8,0(r4)
+       lwz     r9,4(r4)
+       cmplw   cr5,r8,r10
+       cmplw   cr1,r9,r6
+       bne     cr5,L(st2)
+       bne     cr1,L(st1)
+       lwz     r10,8(r3)
+       lwz     r6,12(r3)
+       lwz     r8,8(r4)
+       lwz     r9,12(r4)
+       cmplw   cr5,r8,r10
+       cmplw   cr1,r9,r6
+       bne     cr5,L(st2)
+       bne     cr1,L(st1)
+       lwz     r10,16(r3)
+       lwz     r6,20(r3)
+       lwz     r8,16(r4)
+       lwz     r9,20(r4)
+       cmplw   cr5,r8,r10
+       cmplw   cr1,r9,r6
+       bne     cr5,L(st2)
+       bne     cr1,L(st1)
+       lwz     r10,24(r3)
+       lwz     r6,28(r3)
+       addi    r3,r3,0x20
+       lwz     r8,24(r4)
+       lwz     r9,28(r4)
+       addi    r4,r4,0x20
+       cmplw   cr5,r8,r10
+       cmplw   cr1,r9,r6
+       bne     cr5,L(st2)
+       bne     cr1,L(st1)
+       bdnz    L(word8_compare_loop)
+
+L(preword2_count_loop):
+       srwi.   r6,r5,3
+       beq     L(prebyte_count_loop)
+       mtctr   r6
+       clrlwi  r5,r5,29
+
+L(word2_count_loop):
+       lwz     r10,0(r3)
+       lwz     r6,4(r3)
+       addi    r3,r3,0x08
+       lwz     r8,0(r4)
+       lwz     r9,4(r4)
+       addi    r4,r4,0x08
+       cmplw   cr5,r8,r10
+       cmplw   cr1,r9,r6
+       bne     cr5,L(st2)
+       bne     cr1,L(st1)
+       bdnz    L(word2_count_loop)
+
+L(prebyte_count_loop):
+       addi    r5,r5,1
+       mtctr   r5
+       bdz     L(end_memcmp)
+
+L(byte_count_loop):
+       lbz     r6,0(r3)
+       addi    r3,r3,0x01
+       lbz     r8,0(r4)
+       addi    r4,r4,0x01
+       cmplw   cr5,r8,r6
+       bne     cr5,L(st2)
+       bdnz    L(byte_count_loop)
+
+L(end_memcmp):
+       addi    r3,r0,0
+       blr
+
+L(l_r):
+       addi    r3,r0,1
+       blr
+
+L(st1):
+       blt     cr1,L(l_r)
+       addi    r3,r0,-1
+       blr
+
+L(st2):
+       blt     cr5,L(l_r)
+       addi    r3,r0,-1
+       blr
+END (memcmp)
+libc_hidden_builtin_def (memcmp)
+weak_alias (memcmp,bcmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/memcpy.S b/libc/sysdeps/powerpc/powerpc32/405/memcpy.S
new file mode 100644
index 000000000..b01d53920
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/memcpy.S
@@ -0,0 +1,130 @@
+/* Optimized memcpy implementation for PowerPC476.
+   Copyright (C) 2010-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* memcpy
+
+       r0:return address
+       r3:destination address
+       r4:source address
+       r5:byte count
+
+       Save return address in r0.
+       If destinationn and source are unaligned and copy count is greater than 256
+       then copy 0-3 bytes to make destination aligned.
+       If 32 or more bytes to copy we use 32 byte copy loop.
+       Finaly we copy 0-31 extra bytes. */
+
+EALIGN (memcpy, 5, 0)
+/* Check if bytes to copy are greater than 256 and if
+       source and destination are unaligned */
+       cmpwi   r5,0x0100
+       addi    r0,r3,0
+       ble     L(string_count_loop)
+       neg     r6,r3
+       clrlwi. r6,r6,30
+       beq     L(string_count_loop)
+       neg     r6,r4
+       clrlwi. r6,r6,30
+       beq     L(string_count_loop)
+       mtctr   r6
+       subf    r5,r6,r5
+
+L(unaligned_bytecopy_loop): /* Align destination by coping 0-3 bytes */
+       lbz     r8,0x0(r4)
+       addi    r4,r4,1
+       stb     r8,0x0(r3)
+       addi    r3,r3,1
+       bdnz    L(unaligned_bytecopy_loop)
+       srwi.   r7,r5,5
+       beq     L(preword2_count_loop)
+       mtctr   r7
+
+L(word8_count_loop_no_dcbt): /* Copy 32 bytes at a time */
+       lwz     r6,0(r4)
+       lwz     r7,4(r4)
+       lwz     r8,8(r4)
+       lwz     r9,12(r4)
+       subi    r5,r5,0x20
+       stw     r6,0(r3)
+       stw     r7,4(r3)
+       stw     r8,8(r3)
+       stw     r9,12(r3)
+       lwz     r6,16(r4)
+       lwz     r7,20(r4)
+       lwz     r8,24(r4)
+       lwz     r9,28(r4)
+       addi    r4,r4,0x20
+       stw     r6,16(r3)
+       stw     r7,20(r3)
+       stw     r8,24(r3)
+       stw     r9,28(r3)
+       addi    r3,r3,0x20
+       bdnz    L(word8_count_loop_no_dcbt)
+
+L(preword2_count_loop): /* Copy remaining 0-31 bytes */
+       clrlwi. r12,r5,27
+       beq     L(end_memcpy)
+       mtxer   r12
+       lswx    r5,0,r4
+       stswx   r5,0,r3
+       mr       r3,r0
+       blr
+
+L(string_count_loop): /* Copy odd 0-31 bytes */
+       clrlwi. r12,r5,28
+       add     r3,r3,r5
+       add     r4,r4,r5
+       beq     L(pre_string_copy)
+       mtxer   r12
+       subf    r4,r12,r4
+       subf    r3,r12,r3
+       lswx    r6,0,r4
+       stswx   r6,0,r3
+
+L(pre_string_copy): /* Check how many 32 byte chunks to copy */
+       srwi.   r7,r5,4
+       beq     L(end_memcpy)
+       mtctr   r7
+
+L(word4_count_loop_no_dcbt): /* Copy 32 bytes at a time */
+       lwz     r6,-4(r4)
+       lwz     r7,-8(r4)
+       lwz     r8,-12(r4)
+       lwzu    r9,-16(r4)
+       stw     r6,-4(r3)
+       stw     r7,-8(r3)
+       stw     r8,-12(r3)
+       stwu    r9,-16(r3)
+       bdz     L(end_memcpy)
+       lwz     r6,-4(r4)
+       lwz     r7,-8(r4)
+       lwz     r8,-12(r4)
+       lwzu    r9,-16(r4)
+       stw     r6,-4(r3)
+       stw     r7,-8(r3)
+       stw     r8,-12(r3)
+       stwu    r9,-16(r3)
+       bdnz    L(word4_count_loop_no_dcbt)
+
+L(end_memcpy):
+       mr       r3,r0
+       blr
+END (memcpy)
+libc_hidden_builtin_def (memcpy)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/memset.S b/libc/sysdeps/powerpc/powerpc32/405/memset.S
new file mode 100644
index 000000000..b73dba887
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/memset.S
@@ -0,0 +1,152 @@
+/* Optimized memset for PowerPC405,440,464 (32-byte cacheline).
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* memset
+
+       r3:destination address and return address
+       r4:source integer to copy
+       r5:byte count
+       r11:sources integer to copy in all 32 bits of reg
+       r12:temp return address
+
+       Save return address in r12
+       If destinationn is unaligned and count is greater tha 255 bytes
+       set 0-3 bytes to make destination aligned
+       If count is greater tha 255 bytes and setting zero to memory
+       use dbcz to set memeory when we can
+       otherwsie do the follwoing
+       If 16 or more words to set we use 16 word copy loop.
+       Finaly we set 0-15 extra bytes with string store. */
+
+EALIGN (memset, 5, 0)
+       rlwinm  r11,r4,0,24,31
+       rlwimi  r11,r4,8,16,23
+       rlwimi  r11,r11,16,0,15
+       addi    r12,r3,0
+       cmpwi   r5,0x00FF
+       ble     L(preword8_count_loop)
+       cmpwi   r4,0x00
+       beq     L(use_dcbz)
+       neg     r6,r3
+       clrlwi. r6,r6,30
+       beq     L(preword8_count_loop)
+       addi    r8,0,1
+       mtctr   r6
+       subi    r3,r3,1
+
+L(unaligned_bytecopy_loop):
+       stbu    r11,0x1(r3)
+       subf.   r5,r8,r5
+       beq     L(end_memset)
+       bdnz    L(unaligned_bytecopy_loop)
+       addi    r3,r3,1
+
+L(preword8_count_loop):
+       srwi.   r6,r5,4
+       beq     L(preword2_count_loop)
+       mtctr   r6
+       addi    r3,r3,-4
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+
+L(word8_count_loop_no_dcbt):
+       stwu    r8,4(r3)
+       stwu    r9,4(r3)
+       subi    r5,r5,0x10
+       stwu    r10,4(r3)
+       stwu    r11,4(r3)
+       bdnz    L(word8_count_loop_no_dcbt)
+       addi    r3,r3,4
+
+L(preword2_count_loop):
+       clrlwi. r7,r5,28
+       beq     L(end_memset)
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+       mtxer   r7
+       stswx   r8,0,r3
+
+L(end_memset):
+       addi    r3,r12,0
+       blr
+
+L(use_dcbz):
+       neg     r6,r3
+       clrlwi. r7,r6,28
+       beq     L(skip_string_loop)
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+       subf    r5,r7,r5
+       mtxer   r7
+       stswx   r8,0,r3
+       add     r3,r3,r7
+
+L(skip_string_loop):
+       clrlwi  r8,r6,27
+       srwi.   r8,r8,4
+       beq     L(dcbz_pre_loop)
+       mtctr   r8
+
+L(word_loop):
+       stw     r11,0(r3)
+       subi    r5,r5,0x10
+       stw     r11,4(r3)
+       stw     r11,8(r3)
+       stw     r11,12(r3)
+       addi    r3,r3,0x10
+       bdnz    L(word_loop)
+
+L(dcbz_pre_loop):
+       srwi    r6,r5,5
+       mtctr   r6
+       addi    r7,0,0
+
+L(dcbz_loop):
+       dcbz    r3,r7
+       addi    r3,r3,0x20
+       subi    r5,r5,0x20
+       bdnz    L(dcbz_loop)
+       srwi.   r6,r5,4
+       beq     L(postword2_count_loop)
+       mtctr   r6
+
+L(postword8_count_loop):
+       stw     r11,0(r3)
+       subi    r5,r5,0x10
+       stw     r11,4(r3)
+       stw     r11,8(r3)
+       stw     r11,12(r3)
+       addi    r3,r3,0x10
+       bdnz    L(postword8_count_loop)
+
+L(postword2_count_loop):
+       clrlwi. r7,r5,28
+       beq     L(end_memset)
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+       mtxer   r7
+       stswx   r8,0,r3
+       b       L(end_memset)
+END (memset)
+libc_hidden_builtin_def (memset)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/strcmp.S b/libc/sysdeps/powerpc/powerpc32/405/strcmp.S
new file mode 100644
index 000000000..c0b21907b
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/strcmp.S
@@ -0,0 +1,134 @@
+/* Optimized strcmp implementation for PowerPC476.
+   Copyright (C) 2010-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* strcmp
+
+       Register Use
+       r0:temp return equality
+       r3:source1 address, return equality
+       r4:source2 address
+
+       Implementation description
+       Check 2 words from src1 and src2. If unequal jump to end and
+       return src1 > src2 or src1 < src2.
+       If null check bytes before null and then jump to end and
+       return src1 > src2, src1 < src2 or src1 = src2.
+       If src1 = src2 and no null, repeat. */
+
+EALIGN (strcmp,5,0)
+       neg     r7,r3
+       clrlwi  r7,r7,20
+       neg     r8,r4
+       clrlwi  r8,r8,20
+       srwi.   r7,r7,5
+       beq     L(byte_loop)
+       srwi.   r8,r8,5
+       beq     L(byte_loop)
+       cmplw   r7,r8
+       mtctr   r7
+       ble     L(big_loop)
+       mtctr   r8
+
+L(big_loop):
+       lwz     r5,0(r3)
+       lwz     r6,4(r3)
+       lwz     r8,0(r4)
+       lwz     r9,4(r4)
+       dlmzb.  r12,r5,r6
+       bne     L(end_check)
+       cmplw   r5,r8
+       bne     L(st1)
+       cmplw   r6,r9
+       bne     L(st1)
+       lwz     r5,8(r3)
+       lwz     r6,12(r3)
+       lwz     r8,8(r4)
+       lwz     r9,12(r4)
+       dlmzb.  r12,r5,r6
+       bne     L(end_check)
+       cmplw   r5,r8
+       bne     L(st1)
+       cmplw   r6,r9
+       bne     L(st1)
+       lwz     r5,16(r3)
+       lwz     r6,20(r3)
+       lwz     r8,16(r4)
+       lwz     r9,20(r4)
+       dlmzb.  r12,r5,r6
+       bne     L(end_check)
+       cmplw   r5,r8
+       bne     L(st1)
+       cmplw   r6,r9
+       bne     L(st1)
+       lwz     r5,24(r3)
+       lwz     r6,28(r3)
+       addi    r3,r3,0x20
+       lwz     r8,24(r4)
+       lwz     r9,28(r4)
+       addi    r4,r4,0x20
+       dlmzb.  r12,r5,r6
+       bne     L(end_check)
+       cmplw   r5,r8
+       bne     L(st1)
+       cmplw   r6,r9
+       bne     L(st1)
+       bdnz    L(big_loop)
+       b       L(byte_loop)
+
+L(end_check):
+       subfic  r12,r12,4
+       blt     L(end_check2)
+       rlwinm  r12,r12,3,0,31
+       srw     r5,r5,r12
+       srw     r8,r8,r12
+       cmplw   r5,r8
+       bne     L(st1)
+       b       L(end_strcmp)
+
+L(end_check2):
+       addi    r12,r12,4
+       cmplw   r5,r8
+       rlwinm  r12,r12,3,0,31
+       bne     L(st1)
+       srw     r6,r6,r12
+       srw     r9,r9,r12
+       cmplw   r6,r9
+       bne     L(st1)
+
+L(end_strcmp):
+       addi    r3,r0,0
+       blr
+
+L(st1):
+       mfcr    r3
+       blr
+
+L(byte_loop):
+       lbz     r5,0(r3)
+       addi    r3,r3,1
+       lbz     r6,0(r4)
+       addi    r4,r4,1
+       cmplw   r5,r6
+       bne     L(st1)
+       cmpwi   r5,0
+       beq     L(end_strcmp)
+       b       L(byte_loop)
+END (strcmp)
+libc_hidden_builtin_def (strcmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/strcpy.S b/libc/sysdeps/powerpc/powerpc32/405/strcpy.S
new file mode 100644
index 000000000..d7c84569d
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/strcpy.S
@@ -0,0 +1,107 @@
+/* Optimized strcpy implementation for PowerPC476.
+   Copyright (C) 2010-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* strcpy
+
+       Register Use
+       r3:destination and return address
+       r4:source address
+       r10:temp destination address
+
+       Implementation description
+       Loop by checking 2 words at a time, with dlmzb. Check if there is a null
+       in the 2 words. If there is a null jump to end checking to determine
+       where in the last 8 bytes it is. Copy the appropriate bytes of the last
+       8 according to the null position. */
+
+EALIGN (strcpy, 5, 0)
+       neg     r7,r4
+       subi    r4,r4,1
+       clrlwi. r8,r7,29
+       subi    r10,r3,1
+       beq     L(pre_word8_loop)
+       mtctr   r8
+
+L(loop):
+       lbzu    r5,0x01(r4)
+       cmpi    cr5,r5,0x0
+       stbu    r5,0x01(r10)
+       beq     cr5,L(end_strcpy)
+       bdnz    L(loop)
+
+L(pre_word8_loop):
+       subi    r4,r4,3
+       subi    r10,r10,3
+
+L(word8_loop):
+       lwzu    r5,0x04(r4)
+       lwzu    r6,0x04(r4)
+       dlmzb.  r11,r5,r6
+       bne     L(byte_copy)
+       stwu    r5,0x04(r10)
+       stwu    r6,0x04(r10)
+       lwzu    r5,0x04(r4)
+       lwzu    r6,0x04(r4)
+       dlmzb.  r11,r5,r6
+       bne     L(byte_copy)
+       stwu    r5,0x04(r10)
+       stwu    r6,0x04(r10)
+       lwzu    r5,0x04(r4)
+       lwzu    r6,0x04(r4)
+       dlmzb.  r11,r5,r6
+       bne     L(byte_copy)
+       stwu    r5,0x04(r10)
+       stwu    r6,0x04(r10)
+       lwzu    r5,0x04(r4)
+       lwzu    r6,0x04(r4)
+       dlmzb.  r11,r5,r6
+       bne     L(byte_copy)
+       stwu    r5,0x04(r10)
+       stwu    r6,0x04(r10)
+       b       L(word8_loop)
+
+L(last_bytes_copy):
+       stwu    r5,0x04(r10)
+       subi    r11,r11,4
+       mtctr   r11
+       addi    r10,r10,3
+       subi    r4,r4,1
+
+L(last_bytes_copy_loop):
+       lbzu    r5,0x01(r4)
+       stbu    r5,0x01(r10)
+       bdnz    L(last_bytes_copy_loop)
+       blr
+
+L(byte_copy):
+       blt     L(last_bytes_copy)
+       mtctr   r11
+       addi    r10,r10,3
+       subi    r4,r4,5
+
+L(last_bytes_copy_loop2):
+       lbzu    r5,0x01(r4)
+       stbu    r5,0x01(r10)
+       bdnz    L(last_bytes_copy_loop2)
+
+L(end_strcpy):
+       blr
+END (strcpy)
+libc_hidden_builtin_def (strcpy)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/strlen.S b/libc/sysdeps/powerpc/powerpc32/405/strlen.S
new file mode 100644
index 000000000..77d22ea67
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/strlen.S
@@ -0,0 +1,75 @@
+/* Optimized strlen implementation for PowerPC476.
+   Copyright (C) 2010-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* strlen
+
+       Register Use
+       r3:source address and return length of string
+       r4:byte counter
+
+       Implementation description
+       Load 2 words at a time and count bytes, if we find null we subtract one from
+       the count and return the count value. We need to subtract one because
+       we don't count the null character as a byte. */
+
+EALIGN (strlen,5,0)
+       neg     r7,r3
+       clrlwi. r8,r7,29
+       addi    r4,0,0
+       beq     L(byte_count_loop)
+       mtctr   r8
+
+L(loop):
+       lbz     r5,0(r3)
+       cmpi    cr5,r5,0x0
+       addi    r3,r3,0x1
+       addi    r4,r4,0x1
+       beq     cr5,L(end_strlen)
+       bdnz    L(loop)
+
+L(byte_count_loop):
+       lwz     r5,0(r3)
+       lwz     r6,4(r3)
+       dlmzb.  r12,r5,r6
+       add     r4,r4,r12
+       bne     L(end_strlen)
+       lwz     r5,8(r3)
+       lwz     r6,12(r3)
+       dlmzb.  r12,r5,r6
+       add     r4,r4,r12
+       bne     L(end_strlen)
+       lwz     r5,16(r3)
+       lwz     r6,20(r3)
+       dlmzb.  r12,r5,r6
+       add     r4,r4,r12
+       bne     L(end_strlen)
+       lwz     r5,24(r3)
+       lwz     r6,28(r3)
+       addi    r3,r3,0x20
+       dlmzb.  r12,r5,r6
+       add     r4,r4,r12
+       bne     L(end_strlen)
+       b       L(byte_count_loop)
+
+L(end_strlen):
+       addi    r3,r4,-1
+       blr
+END (strlen)
+libc_hidden_builtin_def (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/strncmp.S b/libc/sysdeps/powerpc/powerpc32/405/strncmp.S
new file mode 100644
index 000000000..3e2ba5f85
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/strncmp.S
@@ -0,0 +1,128 @@
+/* Optimized strncmp implementation for PowerPC476.
+   Copyright (C) 2010-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* strncmp
+
+       Register Use
+       r0:temp return equality
+       r3:source1 address, return equality
+       r4:source2 address
+       r5:byte count
+
+       Implementation description
+       Touch in 3 lines of D-cache.
+       If source1 or source2 is unaligned copy 0-3 bytes to make source1 aligned
+       Check 2 words from src1 and src2. If unequal jump to end and
+       return src1 > src2 or src1 < src2.
+       If null check bytes before null and then jump to end and
+       return src1 > src2, src1 < src2 or src1 = src2.
+       If count = zero check bytes before zero counter and then jump to end and
+       return src1 > src2, src1 < src2 or src1 = src2.
+       If src1 = src2 and no null, repeat. */
+
+EALIGN (strncmp,5,0)
+       neg     r7,r3
+       clrlwi  r7,r7,20
+       neg     r8,r4
+       clrlwi  r8,r8,20
+       srwi.   r7,r7,3
+       beq     L(prebyte_count_loop)
+       srwi.   r8,r8,3
+       beq     L(prebyte_count_loop)
+       cmplw   r7,r8
+       mtctr   r7
+       ble     L(preword2_count_loop)
+       mtctr   r8
+
+L(preword2_count_loop):
+       srwi.   r6,r5,3
+       beq     L(prebyte_count_loop)
+       mfctr   r7
+       cmplw   r6,r7
+       bgt     L(set_count_loop)
+       mtctr   r6
+       clrlwi  r5,r5,29
+
+L(word2_count_loop):
+       lwz     r10,0(r3)
+       lwz     r6,4(r3)
+       addi    r3,r3,0x08
+       lwz     r8,0(r4)
+       lwz     r9,4(r4)
+       addi    r4,r4,0x08
+       dlmzb.  r12,r10,r6
+       bne     L(end_check)
+       cmplw   r10,r8
+       bne     L(st1)
+       cmplw   r6,r9
+       bne     L(st1)
+       bdnz    L(word2_count_loop)
+
+L(prebyte_count_loop):
+       addi    r5,r5,1
+       mtctr   r5
+       bdz     L(end_strncmp)
+
+L(byte_count_loop):
+       lbz     r6,0(r3)
+       addi    r3,r3,1
+       lbz     r7,0(r4)
+       addi    r4,r4,1
+       cmplw   r6,r7
+       bne     L(st1)
+       cmpwi   r6,0
+       beq     L(end_strncmp)
+       bdnz    L(byte_count_loop)
+       b       L(end_strncmp)
+
+L(set_count_loop):
+       slwi    r7,r7,3
+       subf    r5,r7,r5
+       b       L(word2_count_loop)
+
+L(end_check):
+       subfic  r12,r12,4
+       blt     L(end_check2)
+       rlwinm  r12,r12,3,0,31
+       srw     r10,r10,r12
+       srw     r8,r8,r12
+       cmplw   r10,r8
+       bne     L(st1)
+       b       L(end_strncmp)
+
+L(end_check2):
+       addi    r12,r12,4
+       cmplw   r10,r8
+       rlwinm  r12,r12,3,0,31
+       bne     L(st1)
+       srw     r6,r6,r12
+       srw     r9,r9,r12
+       cmplw   r6,r9
+       bne     L(st1)
+
+L(end_strncmp):
+       addi    r3,r0,0
+       blr
+
+L(st1):
+       mfcr    r3
+       blr
+END (strncmp)
+libc_hidden_builtin_def (strncmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/440/Implies b/libc/sysdeps/powerpc/powerpc32/440/Implies
new file mode 100644
index 000000000..70c0d2eda
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/440/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/405/fpu
+powerpc/powerpc32/405
diff --git a/libc/sysdeps/powerpc/powerpc32/464/Implies b/libc/sysdeps/powerpc/powerpc32/464/Implies
new file mode 100644
index 000000000..c3e52c550
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/464/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/440/fpu
+powerpc/powerpc32/440
diff --git a/libc/sysdeps/powerpc/powerpc32/476/Implies b/libc/sysdeps/powerpc/powerpc32/476/Implies
new file mode 100644
index 000000000..2829f9cca
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/476/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/464/fpu
+powerpc/powerpc32/464
diff --git a/libc/sysdeps/powerpc/powerpc32/476/memset.S b/libc/sysdeps/powerpc/powerpc32/476/memset.S
new file mode 100644
index 000000000..48c21d620
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/476/memset.S
@@ -0,0 +1,152 @@
+/* Optimized memset for PowerPC476 (128-byte cacheline).
+   Copyright (C) 2010-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* memset
+
+       r3:destination address and return address
+       r4:source integer to copy
+       r5:byte count
+       r11:sources integer to copy in all 32 bits of reg
+       r12:temp return address
+
+       Save return address in r12
+       If destinationn is unaligned and count is greater tha 255 bytes
+       set 0-3 bytes to make destination aligned
+       If count is greater tha 255 bytes and setting zero to memory
+       use dbcz to set memeory when we can
+       otherwsie do the follwoing
+       If 16 or more words to set we use 16 word copy loop.
+       Finaly we set 0-15 extra bytes with string store. */
+
+EALIGN (memset, 5, 0)
+       rlwinm  r11,r4,0,24,31
+       rlwimi  r11,r4,8,16,23
+       rlwimi  r11,r11,16,0,15
+       addi    r12,r3,0
+       cmpwi   r5,0x00FF
+       ble     L(preword8_count_loop)
+       cmpwi   r4,0x00
+       beq     L(use_dcbz)
+       neg     r6,r3
+       clrlwi. r6,r6,30
+       beq     L(preword8_count_loop)
+       addi    r8,0,1
+       mtctr   r6
+       subi    r3,r3,1
+
+L(unaligned_bytecopy_loop):
+       stbu    r11,0x1(r3)
+       subf.   r5,r8,r5
+       beq     L(end_memset)
+       bdnz    L(unaligned_bytecopy_loop)
+       addi    r3,r3,1
+
+L(preword8_count_loop):
+       srwi.   r6,r5,4
+       beq     L(preword2_count_loop)
+       mtctr   r6
+       addi    r3,r3,-4
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+
+L(word8_count_loop_no_dcbt):
+       stwu    r8,4(r3)
+       stwu    r9,4(r3)
+       subi    r5,r5,0x10
+       stwu    r10,4(r3)
+       stwu    r11,4(r3)
+       bdnz    L(word8_count_loop_no_dcbt)
+       addi    r3,r3,4
+
+L(preword2_count_loop):
+       clrlwi. r7,r5,28
+       beq     L(end_memset)
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+       mtxer   r7
+       stswx   r8,0,r3
+
+L(end_memset):
+       addi    r3,r12,0
+       blr
+
+L(use_dcbz):
+       neg     r6,r3
+       clrlwi. r7,r6,28
+       beq     L(skip_string_loop)
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+       subf    r5,r7,r5
+       mtxer   r7
+       stswx   r8,0,r3
+       add     r3,r3,r7
+
+L(skip_string_loop):
+       clrlwi  r8,r6,25
+       srwi.   r8,r8,4
+       beq     L(dcbz_pre_loop)
+       mtctr   r8
+
+L(word_loop):
+       stw     r11,0(r3)
+       subi    r5,r5,0x10
+       stw     r11,4(r3)
+       stw     r11,8(r3)
+       stw     r11,12(r3)
+       addi    r3,r3,0x10
+       bdnz    L(word_loop)
+
+L(dcbz_pre_loop):
+       srwi    r6,r5,7
+       mtctr   r6
+       addi    r7,0,0
+
+L(dcbz_loop):
+       dcbz    r3,r7
+       addi    r3,r3,0x80
+       subi    r5,r5,0x80
+       bdnz    L(dcbz_loop)
+       srwi.   r6,r5,4
+       beq     L(postword2_count_loop)
+       mtctr   r6
+
+L(postword8_count_loop):
+       stw     r11,0(r3)
+       subi    r5,r5,0x10
+       stw     r11,4(r3)
+       stw     r11,8(r3)
+       stw     r11,12(r3)
+       addi    r3,r3,0x10
+       bdnz    L(postword8_count_loop)
+
+L(postword2_count_loop):
+       clrlwi. r7,r5,28
+       beq     L(end_memset)
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+       mtxer   r7
+       stswx   r8,0,r3
+       b       L(end_memset)
+END (memset)
+libc_hidden_builtin_def (memset)
diff --git a/libc/sysdeps/powerpc/powerpc32/Makefile b/libc/sysdeps/powerpc/powerpc32/Makefile
index 64f79003a..cf620c826 100644
--- a/libc/sysdeps/powerpc/powerpc32/Makefile
+++ b/libc/sysdeps/powerpc/powerpc32/Makefile
@@ -1,8 +1,12 @@
 # Powerpc32 specific build options.
 
-ifeq ($(with-fp),no)
-+cflags += -msoft-float
-sysdep-LDFLAGS += -msoft-float
+# Some Powerpc32 variants assume soft-fp is the default even though there is
+# an fp variant so provide -mhard-float if --with-fp is explicitly passed.
+
+ifeq ($(with-fp),yes)
++cflags += -mhard-float
+ASFLAGS += -mhard-float
+sysdep-LDFLAGS += -mhard-float
 endif
 
 ifeq ($(subdir),gmon)
diff --git a/libc/sysdeps/powerpc/powerpc32/__longjmp-common.S b/libc/sysdeps/powerpc/powerpc32/__longjmp-common.S
index 787447363..df1d5195f 100644
--- a/libc/sysdeps/powerpc/powerpc32/__longjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc32/__longjmp-common.S
@@ -24,6 +24,12 @@
 # include <jmpbuf-offsets.h>
 #endif
 
+#if defined __SPE__ || (defined __NO_FPRS__ && !defined _SOFT_FLOAT)
+# define LOAD_GP(N)	evldd r##N,((JB_FPRS+((N)-14)*2)*4)(r3)
+#else
+# define LOAD_GP(N)	lwz r##N,((JB_GPRS+(N)-14)*4)(r3)
+#endif
+
 ENTRY (__longjmp)
 
 #if defined PTR_DEMANGLE || defined CHECK_SP
@@ -39,13 +45,13 @@ ENTRY (__longjmp)
 	lwz r1,(JB_GPR1*4)(r3)
 #endif
 	lwz r0,(JB_LR*4)(r3)
-	lwz r14,((JB_GPRS+0)*4)(r3)
-	lwz r15,((JB_GPRS+1)*4)(r3)
-	lwz r16,((JB_GPRS+2)*4)(r3)
-	lwz r17,((JB_GPRS+3)*4)(r3)
-	lwz r18,((JB_GPRS+4)*4)(r3)
-	lwz r19,((JB_GPRS+5)*4)(r3)
-	lwz r20,((JB_GPRS+6)*4)(r3)
+	LOAD_GP (14)
+	LOAD_GP (15)
+	LOAD_GP (16)
+	LOAD_GP (17)
+	LOAD_GP (18)
+	LOAD_GP (19)
+	LOAD_GP (20)
 #ifdef PTR_DEMANGLE
 # ifndef CHECK_SP
 	PTR_DEMANGLE3 (r1, r24, r25)
@@ -53,19 +59,19 @@ ENTRY (__longjmp)
 	PTR_DEMANGLE2 (r0, r25)
 #endif
 	mtlr r0
-	lwz r21,((JB_GPRS+7)*4)(r3)
-	lwz r22,((JB_GPRS+8)*4)(r3)
+	LOAD_GP (21)
+	LOAD_GP (22)
 	lwz r0,(JB_CR*4)(r3)
-	lwz r23,((JB_GPRS+9)*4)(r3)
-	lwz r24,((JB_GPRS+10)*4)(r3)
-	lwz r25,((JB_GPRS+11)*4)(r3)
+	LOAD_GP (23)
+	LOAD_GP (24)
+	LOAD_GP (25)
 	mtcrf 0xFF,r0
-	lwz r26,((JB_GPRS+12)*4)(r3)
-	lwz r27,((JB_GPRS+13)*4)(r3)
-	lwz r28,((JB_GPRS+14)*4)(r3)
-	lwz r29,((JB_GPRS+15)*4)(r3)
-	lwz r30,((JB_GPRS+16)*4)(r3)
-	lwz r31,((JB_GPRS+17)*4)(r3)
+	LOAD_GP (26)
+	LOAD_GP (27)
+	LOAD_GP (28)
+	LOAD_GP (29)
+	LOAD_GP (30)
+	LOAD_GP (31)
 	mr r3,r4
 	blr
 END (__longjmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/bsd-_setjmp.S b/libc/sysdeps/powerpc/powerpc32/bsd-_setjmp.S
index 95e8a5aa1..ad2b5ffdb 100644
--- a/libc/sysdeps/powerpc/powerpc32/bsd-_setjmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/bsd-_setjmp.S
@@ -30,7 +30,7 @@ libc_hidden_def (_setjmp)
 /* Build a versioned object for libc.  */
 
 # if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
-symbol_version (__novmx_setjmp,_setjmp,GLIBC_2.0);
+compat_symbol (libc, __novmx_setjmp, _setjmp, GLIBC_2_0);
 
 ENTRY (__novmx_setjmp)
 	li r4,0			/* Set second argument to 0.  */
@@ -39,7 +39,7 @@ END (__novmx_setjmp)
 libc_hidden_def (__novmx_setjmp)
 # endif /* defined SHARED  && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) */
 
-default_symbol_version (__vmx_setjmp,_setjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmx_setjmp, _setjmp, GLIBC_2_3_4)
 /* __GI__setjmp prototype is needed for ntpl i.e. _setjmp is defined
    as a libc_hidden_proto & is used in sysdeps/generic/libc-start.c
    if HAVE_CLEANUP_JMP_BUF is defined */
diff --git a/libc/sysdeps/powerpc/powerpc32/bsd-setjmp.S b/libc/sysdeps/powerpc/powerpc32/bsd-setjmp.S
index 1113ea533..5e1e860d8 100644
--- a/libc/sysdeps/powerpc/powerpc32/bsd-setjmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/bsd-setjmp.S
@@ -26,7 +26,7 @@ ENTRY (__novmxsetjmp)
 	b __novmx__sigsetjmp@local
 END (__novmxsetjmp)
 strong_alias (__novmxsetjmp, __novmx__setjmp)
-symbol_version (__novmxsetjmp, setjmp, GLIBC_2.0)
+compat_symbol (libc, __novmxsetjmp, setjmp, GLIBC_2_0)
 
 #endif  /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) ) */
 
@@ -36,4 +36,4 @@ ENTRY (__vmxsetjmp)
 END (__vmxsetjmp)
 strong_alias (__vmxsetjmp, __vmx__setjmp)
 strong_alias (__vmx__setjmp, __setjmp)
-default_symbol_version (__vmxsetjmp,setjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmxsetjmp, setjmp, GLIBC_2_3_4)
diff --git a/libc/sysdeps/powerpc/powerpc32/dl-machine.c b/libc/sysdeps/powerpc/powerpc32/dl-machine.c
index 3e7202d86..aba361856 100644
--- a/libc/sysdeps/powerpc/powerpc32/dl-machine.c
+++ b/libc/sysdeps/powerpc/powerpc32/dl-machine.c
@@ -416,6 +416,12 @@ __process_machine_rela (struct link_map *map,
 			Elf32_Addr const finaladdr,
 			int rinfo)
 {
+  union unaligned
+    {
+      uint16_t u2;
+      uint32_t u4;
+    } __attribute__((__packed__));
+
   switch (rinfo)
     {
     case R_PPC_NONE:
@@ -432,10 +438,7 @@ __process_machine_rela (struct link_map *map,
       return;
 
     case R_PPC_UADDR32:
-      ((char *) reloc_addr)[0] = finaladdr >> 24;
-      ((char *) reloc_addr)[1] = finaladdr >> 16;
-      ((char *) reloc_addr)[2] = finaladdr >> 8;
-      ((char *) reloc_addr)[3] = finaladdr;
+      ((union unaligned *) reloc_addr)->u4 = finaladdr;
       break;
 
     case R_PPC_ADDR24:
@@ -453,8 +456,7 @@ __process_machine_rela (struct link_map *map,
     case R_PPC_UADDR16:
       if (__builtin_expect (finaladdr > 0x7fff && finaladdr < 0xffff8000, 0))
 	_dl_reloc_overflow (map,  "R_PPC_UADDR16", reloc_addr, refsym);
-      ((char *) reloc_addr)[0] = finaladdr >> 8;
-      ((char *) reloc_addr)[1] = finaladdr;
+      ((union unaligned *) reloc_addr)->u2 = finaladdr;
       break;
 
     case R_PPC_ADDR16_LO:
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile
new file mode 100644
index 000000000..adf556870
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile
@@ -0,0 +1,9 @@
+ifeq ($(subdir),math)
+libm-routines += fexcepts_to_spe fexcepts_from_spe
+libm-routines += fexcepts_to_prctl fexcepts_from_prctl
+libm-routines += fe_note_change
+endif
+
+ifeq ($(subdir),soft-fp)
+sysdep_routines += fraiseexcept-soft
+endif
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c
new file mode 100644
index 000000000..92a7dd1e0
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c
@@ -0,0 +1,53 @@
+/* Clear given exceptions in current floating-point environment.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+#undef feclearexcept
+int
+__feclearexcept (int excepts)
+{
+  unsigned int fpescr;
+  int excepts_spe = __fexcepts_to_spe (excepts);
+
+  /* Get the current state.  */
+  fpescr = fegetenv_register ();
+
+  /* Clear the relevant bits.  */
+  fpescr &= ~excepts_spe;
+
+  /* Put the new state in effect.  */
+  fesetenv_register (fpescr);
+
+  /* Let the kernel know if the "invalid" or "underflow" bit was
+     cleared.  */
+  if (excepts & (FE_INVALID | FE_UNDERFLOW))
+    __fe_note_change ();
+
+  /* Success.  */
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feclearexcept, __old_feclearexcept)
+compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feclearexcept, feclearexcept)
+versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fe_note_change.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fe_note_change.c
new file mode 100644
index 000000000..43a570626
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fe_note_change.c
@@ -0,0 +1,39 @@
+/* Note a change to floating-point exceptions.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+/* Inform the kernel of a change to floating-point exceptions.  */
+
+void
+__fe_note_change (void)
+{
+  int pflags, r;
+  INTERNAL_SYSCALL_DECL (err);
+
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return;
+  if ((pflags & PR_FP_EXC_SW_ENABLE) == 0)
+    INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC,
+		      pflags | PR_FP_EXC_SW_ENABLE);
+}
+
+libm_hidden_def (__fe_note_change)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c
new file mode 100644
index 000000000..7cc963c01
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c
@@ -0,0 +1,54 @@
+/* Disable floating-point exceptions.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+fedisableexcept (int excepts)
+{
+  int result = 0, pflags, r;
+  INTERNAL_SYSCALL_DECL (err);
+
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  /* Save old enable bits.  */
+  result = __fexcepts_from_prctl (pflags);
+
+  pflags &= ~__fexcepts_to_prctl (excepts);
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC,
+			pflags | PR_FP_EXC_SW_ENABLE);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  /* If disabling signals for "inexact", also disable trapping to the
+     kernel.  */
+  if ((excepts & FE_INEXACT) != 0)
+    {
+      unsigned long fpescr;
+
+      fpescr = fegetenv_register ();
+      fpescr &= ~SPEFSCR_FINXE;
+      fesetenv_register (fpescr);
+    }
+
+  return result;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c
new file mode 100644
index 000000000..133dde7b3
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c
@@ -0,0 +1,54 @@
+/* Enable floating-point exceptions.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+feenableexcept (int excepts)
+{
+  unsigned int result = 0, pflags, r;
+  INTERNAL_SYSCALL_DECL (err);
+
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  /* Save old enable bits.  */
+  result = __fexcepts_from_prctl (pflags);
+
+  pflags |= __fexcepts_to_prctl (excepts);
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC,
+			pflags | PR_FP_EXC_SW_ENABLE);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  /* If enabling signals for "inexact", also enable trapping to the
+     kernel.  */
+  if ((excepts & FE_INEXACT) != 0)
+    {
+      unsigned long fpescr;
+
+      fpescr = fegetenv_register ();
+      fpescr |= SPEFSCR_FINXE;
+      fesetenv_register (fpescr);
+    }
+
+  return result;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c
new file mode 100644
index 000000000..bfcbca2ad
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c
@@ -0,0 +1,47 @@
+/* Store current floating-point environment.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+__fegetenv (fenv_t *envp)
+{
+  fenv_union_t u;
+  INTERNAL_SYSCALL_DECL (err);
+  int r;
+
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &u.l[0]);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  u.l[1] = fegetenv_register ();
+  *envp = u.fenv;
+
+  /* Success.  */
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fegetenv, __old_fegetenv)
+compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c
new file mode 100644
index 000000000..9c7afc74f
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c
@@ -0,0 +1,36 @@
+/* Get floating-point exceptions.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+fegetexcept (void)
+{
+  int result = 0, pflags, r;
+  INTERNAL_SYSCALL_DECL (err);
+
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  result = __fexcepts_from_prctl (pflags);
+
+  return result;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c
new file mode 100644
index 000000000..f69e9a5bd
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c
@@ -0,0 +1,29 @@
+/* Return current rounding direction.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+#undef fegetround
+int
+fegetround (void)
+{
+  unsigned long fpescr;
+
+  fpescr = fegetenv_register ();
+  return fpescr & 3;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c
new file mode 100644
index 000000000..bd05ebd3c
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c
@@ -0,0 +1,57 @@
+/* Store current floating-point environment and clear exceptions.
+   e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+feholdexcept (fenv_t *envp)
+{
+  fenv_union_t u;
+  INTERNAL_SYSCALL_DECL (err);
+  int r;
+
+  /* Get the current state.  */
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &u.l[0]);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  u.l[1] = fegetenv_register ();
+  *envp = u.fenv;
+
+  /* Clear everything except for the rounding mode and trapping to the
+     kernel.  */
+  u.l[0] &= ~(PR_FP_EXC_DIV
+	      | PR_FP_EXC_OVF
+	      | PR_FP_EXC_UND
+	      | PR_FP_EXC_RES
+	      | PR_FP_EXC_INV);
+  u.l[1] &= SPEFSCR_FRMC | (SPEFSCR_ALL_EXCEPT_ENABLE & ~SPEFSCR_FINXE);
+
+  /* Put the new state in effect.  */
+  fesetenv_register (u.l[1]);
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC,
+			u.l[0] | PR_FP_EXC_SW_ENABLE);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  return 0;
+}
+libm_hidden_def (feholdexcept)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c
new file mode 100644
index 000000000..3a85f1810
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c
@@ -0,0 +1,41 @@
+/* Constant floating-point environments for e500.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The use of "unsigned long long" as the type to define the
+   bit-pattern explicitly, rather than the type "double" used in
+   <bits/fenv.h>, means that we cannot include <fenv_libc.h> here to
+   get the enum constants for the SPEFSCR bits to enable
+   exceptions.  */
+
+#include <sys/prctl.h>
+
+/* If the default argument is used we use this value.  */
+const unsigned long long __fe_dfl_env __attribute__ ((aligned (8))) =
+  0x3cULL;
+
+/* Floating-point environment where none of the exceptions are masked.  */
+const unsigned long long __fe_enabled_env __attribute__ ((aligned (8))) =
+  (((unsigned long long) (PR_FP_EXC_DIV
+			  | PR_FP_EXC_OVF
+			  | PR_FP_EXC_UND
+			  | PR_FP_EXC_RES
+			  | PR_FP_EXC_INV)) << 32) | 0x7cULL;
+
+/* Non-IEEE mode.  */
+const unsigned long long __fe_nonieee_env __attribute__ ((aligned (8))) =
+  0x0ULL;
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h
new file mode 100644
index 000000000..96375808d
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h
@@ -0,0 +1,96 @@
+/* Internal libc stuff for floating point environment routines.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _FENV_LIBC_H
+#define _FENV_LIBC_H	1
+
+#include <fenv.h>
+
+int __feraiseexcept_spe (int);
+libm_hidden_proto (__feraiseexcept_spe)
+
+int __fexcepts_to_spe (int);
+libm_hidden_proto (__fexcepts_to_spe)
+
+int __fexcepts_from_spe (int);
+libm_hidden_proto (__fexcepts_from_spe)
+
+int __fexcepts_to_prctl (int);
+libm_hidden_proto (__fexcepts_to_prctl)
+
+int __fexcepts_from_prctl (int);
+libm_hidden_proto (__fexcepts_from_prctl)
+
+void __fe_note_change (void);
+libm_hidden_proto (__fe_note_change)
+
+/* Equivalent to fegetenv, but returns an unsigned int instead of
+   taking a pointer.  */
+#define fegetenv_register() \
+  ({ unsigned int fscr; asm volatile ("mfspefscr %0" : "=r" (fscr)); fscr; })
+
+/* Equivalent to fesetenv, but takes an unsigned int instead of a
+   pointer.  */
+#define fesetenv_register(fscr) \
+  ({ asm volatile ("mtspefscr %0" : : "r" (fscr)); })
+
+typedef union
+{
+  fenv_t fenv;
+  unsigned int l[2];
+} fenv_union_t;
+
+/* Definitions of all the SPEFSCR bit numbers.  */
+enum {
+  SPEFSCR_SOVH          = 0x80000000,
+  SPEFSCR_OVH           = 0x40000000,
+  SPEFSCR_FGH           = 0x20000000,
+  SPEFSCR_FXH           = 0x10000000,
+  SPEFSCR_FINVH         = 0x08000000,
+  SPEFSCR_FDBZH         = 0x04000000,
+  SPEFSCR_FUNFH         = 0x02000000,
+  SPEFSCR_FOVFH         = 0x01000000,
+  /* 2 unused bits.  */
+  SPEFSCR_FINXS         = 0x00200000,
+  SPEFSCR_FINVS         = 0x00100000,
+  SPEFSCR_FDBZS         = 0x00080000,
+  SPEFSCR_FUNFS         = 0x00040000,
+  SPEFSCR_FOVFS         = 0x00020000,
+  /* Combination of the exception bits.  */
+  SPEFSCR_ALL_EXCEPT    = 0x003e0000,
+  SPEFSCR_MODE          = 0x00010000,
+  SPEFSCR_SOV           = 0x00008000,
+  SPEFSCR_OV            = 0x00004000,
+  SPEFSCR_FG            = 0x00002000,
+  SPEFSCR_FX            = 0x00001000,
+  SPEFSCR_FINV          = 0x00000800,
+  SPEFSCR_FDBZ          = 0x00000400,
+  SPEFSCR_FUNF          = 0x00000200,
+  SPEFSCR_FOVF          = 0x00000100,
+  /* 1 unused bit.  */
+  SPEFSCR_FINXE         = 0x00000040,
+  SPEFSCR_FINVE         = 0x00000020,
+  SPEFSCR_FDBZE         = 0x00000010,
+  SPEFSCR_FUNFE         = 0x00000008,
+  SPEFSCR_FOVFE         = 0x00000004,
+  /* Combination of the exception trap enable bits.  */
+  SPEFSCR_ALL_EXCEPT_ENABLE = 0x0000007c,
+  SPEFSCR_FRMC          = 0x00000003
+};
+
+#endif /* fenv_libc.h */
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c
new file mode 100644
index 000000000..411e6be8d
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c
@@ -0,0 +1,49 @@
+/* Install given floating-point environment.  e500 version.
+   Copyright (C) 1997-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+__fesetenv (const fenv_t *envp)
+{
+  fenv_union_t u;
+  INTERNAL_SYSCALL_DECL (err);
+  int r;
+
+  u.fenv = *envp;
+
+  fesetenv_register (u.l[1]);
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC,
+			u.l[0] | PR_FP_EXC_SW_ENABLE);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  /* Success.  */
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fesetenv, __old_fesetenv)
+compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__fesetenv, fesetenv)
+versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c
new file mode 100644
index 000000000..805008e0c
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c
@@ -0,0 +1,35 @@
+/* Set current rounding direction.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+int
+fesetround (int round)
+{
+  unsigned long fpescr;
+
+  if ((unsigned int) round > 3)
+    return 1;
+
+  fpescr = fegetenv_register ();
+  fpescr = (fpescr & ~SPEFSCR_FRMC) | (round & 3);
+  fesetenv_register (fpescr);
+
+  return 0;
+}
+libm_hidden_def (fesetround)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c
new file mode 100644
index 000000000..505c92363
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c
@@ -0,0 +1,47 @@
+/* Install given floating-point environment and raise exceptions.
+   e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+int
+__feupdateenv (const fenv_t *envp)
+{
+  int exc;
+
+  /* Save the currently set exceptions.  */
+  exc = fegetenv_register () & SPEFSCR_ALL_EXCEPT;
+
+  /* Install new environment.  */
+  fesetenv (envp);
+
+  /* Raise (if appropriate) saved exceptions. */
+  __feraiseexcept_spe (exc);
+
+  /* Success.  */
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feupdateenv, __old_feupdateenv)
+compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feupdateenv, feupdateenv)
+versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c
new file mode 100644
index 000000000..c094a04cb
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c
@@ -0,0 +1,42 @@
+/* Convert floating-point exceptions from prctl form.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sys/prctl.h>
+
+/* Convert EXCEPTS from prctl bits to FE_* form, returning the
+   converted value.  */
+
+int
+__fexcepts_from_prctl (int excepts)
+{
+  int result = 0;
+  if (excepts & PR_FP_EXC_OVF)
+    result |= FE_OVERFLOW;
+  if (excepts & PR_FP_EXC_UND)
+    result |= FE_UNDERFLOW;
+  if (excepts & PR_FP_EXC_INV)
+    result |= FE_INVALID;
+  if (excepts & PR_FP_EXC_DIV)
+    result |= FE_DIVBYZERO;
+  if (excepts & PR_FP_EXC_RES)
+    result |= FE_INEXACT;
+  return result;
+}
+
+libm_hidden_def (__fexcepts_from_prctl)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c
new file mode 100644
index 000000000..3ec939d18
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c
@@ -0,0 +1,41 @@
+/* Convert floating-point exceptions from SPEFSCR form.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+/* Convert EXCEPTS from SPEFSCR bits to FE_* form, returning the
+   converted value.  */
+
+int
+__fexcepts_from_spe (int excepts)
+{
+  int result = 0;
+  if (excepts & SPEFSCR_FINXS)
+    result |= FE_INEXACT;
+  if (excepts & SPEFSCR_FDBZS)
+    result |= FE_DIVBYZERO;
+  if (excepts & SPEFSCR_FUNFS)
+    result |= FE_UNDERFLOW;
+  if (excepts & SPEFSCR_FOVFS)
+    result |= FE_OVERFLOW;
+  if (excepts & SPEFSCR_FINVS)
+    result |= FE_INVALID;
+  return result;
+}
+
+libm_hidden_def (__fexcepts_from_spe)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c
new file mode 100644
index 000000000..b9c51b125
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c
@@ -0,0 +1,42 @@
+/* Convert floating-point exceptions to prctl form.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sys/prctl.h>
+
+/* Convert EXCEPTS from FE_* form to prctl bits, returning the
+   converted value.  */
+
+int
+__fexcepts_to_prctl (int excepts)
+{
+  int result = 0;
+  if (excepts & FE_INEXACT)
+    result |= PR_FP_EXC_RES;
+  if (excepts & FE_DIVBYZERO)
+    result |= PR_FP_EXC_DIV;
+  if (excepts & FE_UNDERFLOW)
+    result |= PR_FP_EXC_UND;
+  if (excepts & FE_OVERFLOW)
+    result |= PR_FP_EXC_OVF;
+  if (excepts & FE_INVALID)
+    result |= PR_FP_EXC_INV;
+  return result;
+}
+
+libm_hidden_def (__fexcepts_to_prctl)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c
new file mode 100644
index 000000000..570934d15
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c
@@ -0,0 +1,41 @@
+/* Convert floating-point exceptions to SPEFSCR form.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+/* Convert EXCEPTS from FE_* form to SPEFSCR bits, returning the
+   converted value.  */
+
+int
+__fexcepts_to_spe (int excepts)
+{
+  int result = 0;
+  if (excepts & FE_INEXACT)
+    result |= SPEFSCR_FINXS;
+  if (excepts & FE_DIVBYZERO)
+    result |= SPEFSCR_FDBZS;
+  if (excepts & FE_UNDERFLOW)
+    result |= SPEFSCR_FUNFS;
+  if (excepts & FE_OVERFLOW)
+    result |= SPEFSCR_FOVFS;
+  if (excepts & FE_INVALID)
+    result |= SPEFSCR_FINVS;
+  return result;
+}
+
+libm_hidden_def (__fexcepts_to_spe)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c
new file mode 100644
index 000000000..b01cadeff
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c
@@ -0,0 +1,41 @@
+/* Store current representation for exceptions.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+int
+__fegetexceptflag (fexcept_t *flagp, int excepts)
+{
+  unsigned long fpescr;
+
+  /* Get the current state.  */
+  fpescr = fegetenv_register ();
+
+  *flagp = fpescr & SPEFSCR_ALL_EXCEPT;
+
+  /* Success.  */
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fegetexceptflag, __old_fegetexceptflag)
+compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c
new file mode 100644
index 000000000..0aed72ff3
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c
@@ -0,0 +1,28 @@
+/* Raise given exceptions.  e500 version for use from soft-fp.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2004.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <libc-symbols.h>
+
+int __feraiseexcept_soft (int);
+libc_hidden_proto (__feraiseexcept_soft)
+
+#define __FERAISEEXCEPT_INTERNAL __feraiseexcept_soft
+#include "spe-raise.c"
+libc_hidden_def (__feraiseexcept_soft)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c
new file mode 100644
index 000000000..0eca9ffff
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c
@@ -0,0 +1,40 @@
+/* Raise given exceptions.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+#define __FERAISEEXCEPT_INTERNAL __feraiseexcept_spe
+#include "spe-raise.c"
+
+libm_hidden_def (__feraiseexcept_spe)
+
+#undef feraiseexcept
+int
+__feraiseexcept (int excepts)
+{
+  return __feraiseexcept_spe (__fexcepts_to_spe (excepts));
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feraiseexcept, __old_feraiseexcept)
+compat_symbol (libm, __old_feraiseexcept, feraiseexcept, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feraiseexcept, feraiseexcept)
+versioned_symbol (libm, __feraiseexcept, feraiseexcept, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c
new file mode 100644
index 000000000..43f2d19d1
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c
@@ -0,0 +1,55 @@
+/* Set floating-point environment exception handling.  e500 version.
+   Copyright (C) 1997-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+int
+__fesetexceptflag (const fexcept_t *flagp, int excepts)
+{
+  unsigned long old_spefscr, spefscr;
+  fexcept_t flag;
+  int excepts_spe = __fexcepts_to_spe (excepts);
+
+  /* Get the current state.  */
+  old_spefscr = fegetenv_register ();
+
+  /* Ignore exceptions not listed in 'excepts'.  */
+  flag = *flagp & excepts_spe;
+
+  /* Replace the exception status */
+  spefscr = (old_spefscr & ~excepts_spe) | flag;
+
+  /* Store the new status word (along with the rest of the environment).  */
+  fesetenv_register (spefscr);
+
+  /* If the state of the "invalid" or "underflow" flag has changed,
+     inform the kernel.  */
+  if (((spefscr ^ old_spefscr) & (SPEFSCR_FINVS | SPEFSCR_FUNFS)) != 0)
+    __fe_note_change ();
+
+  /* Success.  */
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fesetexceptflag, __old_fesetexceptflag)
+compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c
new file mode 100644
index 000000000..f4f547d5f
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c
@@ -0,0 +1,31 @@
+/* Test exception in current environment.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+int
+fetestexcept (int excepts)
+{
+  unsigned long f;
+
+  /* Get the current state.  */
+  f = fegetenv_register ();
+
+  return __fexcepts_from_spe (f) & excepts;
+}
+libm_hidden_def (fetestexcept)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h
new file mode 100644
index 000000000..117e7331e
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h
@@ -0,0 +1,4 @@
+/* The generic version of get-rounding-mode.h using fpu_control.h, not
+   the one using the software rounding mode, is correct for e500.  */
+
+#include <sysdeps/generic/get-rounding-mode.h>
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S
new file mode 100644
index 000000000..823f748ba
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S
@@ -0,0 +1,27 @@
+/* Floating-point absolute value.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+ENTRY (__fabsf)
+/* float [r3] fabsf (float [r3] x) ;  */
+	efsabs r3,r3
+	blr
+END (__fabsf)
+
+weak_alias (__fabsf, fabsf)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c
new file mode 100644
index 000000000..4394ddc7c
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c
@@ -0,0 +1,53 @@
+/* Raise given exceptions, given the SPEFSCR bits for those exceptions.
+   Copyright (C) 1997-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+int
+__FERAISEEXCEPT_INTERNAL (int excepts)
+{
+  unsigned long f;
+
+  f = fegetenv_register ();
+  f |= (excepts & SPEFSCR_ALL_EXCEPT);
+  fesetenv_register (f);
+
+  /* Force the operations that cause the exceptions.  */
+  if ((SPEFSCR_FINVS & excepts) != 0)
+    /* 0 / 0 */
+    asm volatile ("efsdiv %0,%0,%1" : : "r" (0), "r" (0));
+
+  if ((SPEFSCR_FDBZS & excepts) != 0)
+    /* 1.0 / 0.0 */
+    asm volatile ("efsdiv %0,%0,%1" : : "r" (1.0F), "r" (0));
+
+  if ((SPEFSCR_FOVFS & excepts) != 0)
+    /* Largest normalized number plus itself.  */
+    asm volatile ("efsadd %0,%0,%1" : : "r" (0x7f7fffff), "r" (0x7f7fffff));
+
+  if ((SPEFSCR_FUNFS & excepts) != 0)
+    /* Smallest normalized number times itself.  */
+    asm volatile ("efsmul %0,%0,%1" : : "r" (0x800000), "r" (0x800000));
+
+  if ((SPEFSCR_FINXS & excepts) != 0)
+    /* Smallest normalized minus 1.0 raises the inexact flag.  */
+    asm volatile ("efssub %0,%0,%1" : : "r" (0x00800000), "r" (1.0F));
+
+  /* Success.  */
+  return 0;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S b/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
index 9d34cd916..d02aa5754 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
@@ -43,16 +43,16 @@ ENTRY (__longjmp)
 #   endif
 	mtlr    r6
 	cfi_same_value (lr)
-	lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
+	lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
 #  else
 	lwz     r5,_dl_hwcap@got(r5)
 	mtlr    r6
 	cfi_same_value (lr)
-	lwz     r5,4(r5)
+	lwz     r5,LOWORD(r5)
 #  endif
 # else
-	lis	r5,(_dl_hwcap+4)@ha
-	lwz     r5,(_dl_hwcap+4)@l(r5)
+	lis	r5,(_dl_hwcap+LOWORD)@ha
+	lwz     r5,(_dl_hwcap+LOWORD)@l(r5)
 # endif
 	andis.	r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
 	beq	L(no_vmx)
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp.S b/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp.S
index 96e50de37..27166c454 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp.S
@@ -26,14 +26,14 @@
 
 #else /* !NOT_IN_libc */
 /* Build a versioned object for libc.  */
-default_symbol_version (__vmx__longjmp,__longjmp,GLIBC_2.3.4);
+versioned_symbol (libc, __vmx__longjmp, __longjmp, GLIBC_2_3_4);
 # define __longjmp  __vmx__longjmp
 # include "__longjmp-common.S"
 
 # if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
 #  define __NO_VMX__
 #  undef JB_SIZE
-symbol_version (__novmx__longjmp,__longjmp,GLIBC_2.0);
+compat_symbol (libc, __novmx__longjmp, __longjmp, GLIBC_2_0);
 #  undef __longjmp
 #  define __longjmp  __novmx__longjmp
 #  include "__longjmp-common.S"
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_copysign.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_copysign.S
index 840891f1c..1da24f492 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_copysign.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_copysign.S
@@ -29,7 +29,7 @@ ENTRY(__copysign)
 	stwu	r1,-16(r1)
 	cfi_adjust_cfa_offset (16)
 	stfd	fp2,8(r1)
-	lwz	r3,8(r1)
+	lwz	r3,8+HIWORD(r1)
 	cmpwi   r3,0
 	addi    r1,r1,16
 	cfi_adjust_cfa_offset (-16)
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S
index 4ec8389b5..2ad6de273 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S
@@ -30,7 +30,7 @@ ENTRY(__copysignl)
 	fmr	fp0,fp1
 	fabs	fp1,fp1
 	fcmpu	cr7,fp0,fp1
-	lwz	r3,8(r1)
+	lwz	r3,8+HIWORD(r1)
 	cmpwi	cr6,r3,0
 	addi	r1,r1,16
 	cfi_adjust_cfa_offset (-16)
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_lrint.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_lrint.S
index 27881f8cc..249fda501 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_lrint.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_lrint.S
@@ -24,10 +24,10 @@ ENTRY (__lrint)
 	stwu	r1,-16(r1)
 	fctiw	fp13,fp1
 	stfd	fp13,8(r1)
-	nop	/* Insure the following load is in a different dispatch group */
+	nop	/* Ensure the following load is in a different dispatch group */
 	nop	/* to avoid pipe stall on POWER4&5.  */
 	nop
-	lwz	r3,12(r1)
+	lwz	r3,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__lrint)
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S
index 92dc3787d..6309f864b 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S
@@ -67,7 +67,7 @@ ENTRY (__lround)
 	nop	/* Ensure the following load is in a different dispatch  */
 	nop	/* group to avoid pipe stall on POWER4&5.  */
 	nop
-	lwz	r3,12(r1)	/* Load return as integer.  */
+	lwz	r3,8+LOWORD(r1)	/* Load return as integer.  */
 .Lout:
 	addi	r1,r1,16
 	blr
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
index 2ed9ca7b4..8cff1563a 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
@@ -19,7 +19,7 @@
 #include <sysdep.h>
 
 	.section	.rodata.cst8,"aM",@progbits,8
-	.align	2
+	.align	3
 .LC0:	/* 2**23 */
 	.long 0x4b000000
 .LC1:	/* 0.5 */
@@ -60,7 +60,6 @@ ENTRY (__roundf )
 #ifdef SHARED
 	lfs	fp10,.LC1-.LC0(r9)
 #else
-	lis	r9,.LC1@ha
 	lfs	fp10,.LC1@l(r9)
 #endif
 	ble-	cr6,.L4
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S b/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
index 46ea2b00f..f3244060e 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
@@ -94,14 +94,14 @@ ENTRY (__sigsetjmp)
 #   else
 	lwz     r5,_rtld_global_ro@got(r5)
 #   endif
-	lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
+	lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
 #  else
 	lwz     r5,_dl_hwcap@got(r5)
-	lwz     r5,4(r5)
+	lwz     r5,LOWORD(r5)
 #  endif
 # else
-	lis	r6,(_dl_hwcap+4)@ha
-	lwz     r5,(_dl_hwcap+4)@l(r6)
+	lis	r6,(_dl_hwcap+LOWORD)@ha
+	lwz     r5,(_dl_hwcap+LOWORD)@l(r6)
 # endif
 	andis.	r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
 	beq	L(no_vmx)
@@ -111,44 +111,43 @@ ENTRY (__sigsetjmp)
 	stw	r0,((JB_VRSAVE)*4)(3)
 	addi	r6,r5,16
 	beq+	L(aligned_save_vmx)
-	lvsr	v0,0,r5
-	vspltisb v1,-1         /* set v1 to all 1's */
-	vspltisb v2,0          /* set v2 to all 0's */
-	vperm   v3,v2,v1,v0   /* v3 contains shift mask with num all 1 bytes on left = misalignment  */
 
+	lvsr	v0,0,r5
+	lvsl	v1,0,r5
+	addi	r6,r5,-16
 
-	/* Special case for v20 we need to preserve what is in save area below v20 before obliterating it */
-	lvx     v5,0,r5
-	vperm   v20,v20,v20,v0
-	vsel    v5,v5,v20,v3
-	vsel    v20,v20,v2,v3
-	stvx    v5,0,r5
+# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \
+	addi	addgpr,addgpr,32;					 \
+	vperm	tmpvr,prevvr,savevr,shiftvr;				 \
+	stvx	tmpvr,0,savegpr
 
-#define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \
-	addi    addgpr,addgpr,32; \
-	vperm   savevr,savevr,savevr,shiftvr; \
-	vsel    hivr,prev_savevr,savevr,maskvr; \
-	stvx    hivr,0,savegpr;
+	/*
+	 * We have to be careful not to corrupt the data below v20 and
+	 * above v31. To keep things simple we just rotate both ends in
+	 * the opposite direction to our main permute so we can use
+	 * the common macro.
+	 */
 
-	save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6)
+	/* load and rotate data below v20 */
+	lvx	v2,0,r5
+	vperm	v2,v2,v2,v1
+	save_misaligned_vmx(v20,v2,v0,v3,r5,r6)
+	save_misaligned_vmx(v21,v20,v0,v3,r6,r5)
+	save_misaligned_vmx(v22,v21,v0,v3,r5,r6)
+	save_misaligned_vmx(v23,v22,v0,v3,r6,r5)
+	save_misaligned_vmx(v24,v23,v0,v3,r5,r6)
+	save_misaligned_vmx(v25,v24,v0,v3,r6,r5)
+	save_misaligned_vmx(v26,v25,v0,v3,r5,r6)
+	save_misaligned_vmx(v27,v26,v0,v3,r6,r5)
+	save_misaligned_vmx(v28,v27,v0,v3,r5,r6)
+	save_misaligned_vmx(v29,v28,v0,v3,r6,r5)
+	save_misaligned_vmx(v30,v29,v0,v3,r5,r6)
+	save_misaligned_vmx(v31,v30,v0,v3,r6,r5)
+	/* load and rotate data above v31 */
+	lvx	v2,0,r6
+	vperm	v2,v2,v2,v1
+	save_misaligned_vmx(v2,v31,v0,v3,r5,r6)
 
-	/* Special case for r31 we need to preserve what is in save area above v31 before obliterating it */
-	addi    r5,r5,32
-	vperm   v31,v31,v31,v0
-	lvx     v4,0,r5
-	vsel    v5,v30,v31,v3
-	stvx    v5,0,r6
-	vsel    v4,v31,v4,v3
-	stvx    v4,0,r5
 	b	L(no_vmx)
 
 L(aligned_save_vmx):
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/setjmp.S b/libc/sysdeps/powerpc/powerpc32/fpu/setjmp.S
index 60cd35052..92acff1e6 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/setjmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/setjmp.S
@@ -26,7 +26,7 @@
 
 #else /* !NOT_IN_libc */
 /* Build a versioned object for libc.  */
-default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4)
 # define __sigsetjmp __vmx__sigsetjmp
 # define __sigjmp_save __vmx__sigjmp_save
 # include "setjmp-common.S"
@@ -36,7 +36,7 @@ default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
 #  undef __sigsetjmp
 #  undef __sigjmp_save
 #  undef JB_SIZE
-symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.0)
+compat_symbol (libc, __novmx__sigsetjmp, __sigsetjmp, GLIBC_2_0)
 #  define __sigsetjmp __novmx__sigsetjmp
 #  define __sigjmp_save __novmx__sigjmp_save
 #  include "setjmp-common.S"
diff --git a/libc/sysdeps/powerpc/powerpc32/mcount.c b/libc/sysdeps/powerpc/powerpc32/mcount.c
index 0476bf61d..d8c063222 100644
--- a/libc/sysdeps/powerpc/powerpc32/mcount.c
+++ b/libc/sysdeps/powerpc/powerpc32/mcount.c
@@ -9,7 +9,7 @@
 /* __mcount_internal was added in glibc 2.15 with version GLIBC_PRIVATE,
    but it should have been put in version GLIBC_2.15.  Mark the
    GLIBC_PRIVATE version obsolete and add it to GLIBC_2.16 instead.  */
-default_symbol_version (___mcount_internal, __mcount_internal, GLIBC_2.16);
+versioned_symbol (libc, ___mcount_internal, __mcount_internal, GLIBC_2_16);
 
 #if SHLIB_COMPAT (libc, GLIBC_2_15, GLIBC_2_16)
 strong_alias (___mcount_internal, ___mcount_internal_private);
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S
index 55b2850fd..e7a88feb4 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S
@@ -29,8 +29,8 @@ ENTRY (__llrint)
 	nop	/* Insure the following load is in a different dispatch group */
 	nop	/* to avoid pipe stall on POWER4&5.  */
 	nop
-	lwz	r3,8(r1)
-	lwz	r4,12(r1)
+	lwz	r3,8+HIWORD(r1)
+	lwz	r4,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__llrint)
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S
index cc80fcb02..da24ad38d 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S
@@ -28,8 +28,8 @@ ENTRY (__llrintf)
 	nop	/* Insure the following load is in a different dispatch group */
 	nop	/* to avoid pipe stall on POWER4&5.  */
 	nop
-	lwz	r3,8(r1)
-	lwz	r4,12(r1)
+	lwz	r3,8+HIWORD(r1)
+	lwz	r4,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__llrintf)
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
index 631180f07..7246ca4d1 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
@@ -19,12 +19,10 @@
 #include <sysdep.h>
 #include <math_ldbl_opt.h>
 
- .section .rodata.cst12,"aM",@progbits,12
+ .section .rodata.cst8,"aM",@progbits,8
  .align 3
- .LC0:   /* 0x1.0000000000000p+52 == 2^52 */
-	.long 0x43300000
-	.long 0x00000000
-	.long 0x3f000000 /* Use this for 0.5  */
+ .LC0:	.long (52+127)<<23 /* 0x1p+52  */
+	.long (-1+127)<<23 /* 0.5  */
 
 	.section	".text"
 
@@ -57,12 +55,12 @@ ENTRY (__llround)
 	addi	r9,r9,.LC0-got_label@l
 	mtlr	r11
 	cfi_same_value (lr)
-	lfd	fp9,0(r9)
-	lfs	fp10,8(r9)
+	lfs	fp9,0(r9)
+	lfs	fp10,4(r9)
 #else
 	lis r9,.LC0@ha
-	lfd fp9,.LC0@l(r9)	/* Load 2^52 into fpr9.  */
-	lfs fp10,.LC0@l+8(r9)	/* Load 0.5 into fpr10.  */
+	lfs fp9,.LC0@l(r9)	/* Load 2^52 into fpr9.  */
+	lfs fp10,.LC0@l+4(r9)	/* Load 0.5 into fpr10.  */
 #endif
 	fabs	fp2,fp1		/* Get the absolute value of x.  */
 	fsub	fp12,fp10,fp10	/* Compute 0.0 into fpr12.  */
@@ -80,8 +78,8 @@ ENTRY (__llround)
 	nop
 	nop
 	nop
-	lwz	r4,12(r1)	/* Load return as integer.  */
-	lwz	r3,8(r1)
+	lwz	r3,8+HIWORD(r1)	/* Load return as integer.  */
+	lwz	r4,8+LOWORD(r1)
 .Lout:
 	addi	r1,r1,16
 	blr
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/hp-timing.h b/libc/sysdeps/powerpc/powerpc32/power4/hp-timing.h
index 7d6c96e9e..4e42374ea 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/hp-timing.h
+++ b/libc/sysdeps/powerpc/powerpc32/power4/hp-timing.h
@@ -87,18 +87,15 @@ typedef unsigned long long int hp_timing_t;
 
 #define HP_TIMING_NOW(Var)						\
   do {									\
-        union { long long ll; long ii[2]; } _var;			\
-	long tmp;							\
-        __asm__ __volatile__ (						\
-		"1:	mfspr	%0,269;"				\
-		"	mfspr	%1,268;"				\
-		"	mfspr	%2,269;"				\
-		"	cmpw	%0,%2;"					\
-		"	bne	1b;"					\
-		: "=r" (_var.ii[0]), "=r" (_var.ii[1]) , "=r" (tmp)	\
-		: : "cr0"						\
-		);							\
-	Var = _var.ll;							\
+    unsigned int hi, lo, tmp;						\
+    __asm__ __volatile__ ("1:	mfspr	%0,269;"			\
+			  "	mfspr	%1,268;"			\
+			  "	mfspr	%2,269;"			\
+			  "	cmpw	%0,%2;"				\
+			  "	bne	1b;"				\
+			  : "=&r" (hi), "=&r" (lo), "=&r" (tmp)		\
+			  : : "cr0");					\
+    Var = ((hp_timing_t) hi << 32) | lo;				\
   } while (0)
 
 
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/memcmp.S b/libc/sysdeps/powerpc/powerpc32/power4/memcmp.S
index 9a455a3c6..35e162667 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/memcmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/memcmp.S
@@ -1,4 +1,4 @@
-/* Optimized strcmp implementation for PowerPC64.
+/* Optimized strcmp implementation for PowerPC32.
    Copyright (C) 2003-2013 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -18,13 +18,14 @@
 
 #include <sysdep.h>
 
-/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
+/* int [r3] memcmp (const char *s1 [r3],
+		    const char *s2 [r4],
+		    size_t size [r5])  */
 
 	.machine power4
 EALIGN (memcmp, 4, 0)
 	CALL_MCOUNT
 
-#define rTMP	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -35,33 +36,32 @@ EALIGN (memcmp, 4, 0)
 #define rWORD4	r9	/* next word in s2 */
 #define rWORD5	r10	/* next word in s1 */
 #define rWORD6	r11	/* next word in s2 */
-#define rBITDIF	r12	/* bits that differ in s1 & s2 words */
 #define rWORD7	r30	/* next word in s1 */
 #define rWORD8	r31	/* next word in s2 */
 
-	xor	rTMP, rSTR2, rSTR1
+	xor	r0, rSTR2, rSTR1
 	cmplwi	cr6, rN, 0
 	cmplwi	cr1, rN, 12
-	clrlwi.	rTMP, rTMP, 30
-	clrlwi	rBITDIF, rSTR1, 30
-	cmplwi	cr5, rBITDIF, 0
+	clrlwi.	r0, r0, 30
+	clrlwi	r12, rSTR1, 30
+	cmplwi	cr5, r12, 0
 	beq-	cr6, L(zeroLength)
-	dcbt	0,rSTR1
-	dcbt	0,rSTR2
+	dcbt	0, rSTR1
+	dcbt	0, rSTR2
 /* If less than 8 bytes or not aligned, use the unaligned
    byte loop.  */
 	blt	cr1, L(bytealigned)
-        stwu    1,-64(1)
+	stwu	1, -64(r1)
 	cfi_adjust_cfa_offset(64)
-        stw     r31,48(1)
-	cfi_offset(31,(48-64))
-        stw     r30,44(1)
-	cfi_offset(30,(44-64))
+	stw	rWORD8, 48(r1)
+	cfi_offset(rWORD8, (48-64))
+	stw	rWORD7, 44(r1)
+	cfi_offset(rWORD7, (44-64))
 	bne	L(unaligned)
 /* At this point we know both strings have the same alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    2 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then we are already word
+   of r12 to 0.  If r12 == 0 then we are already word
    aligned and can perform the word aligned loop.
 
    Otherwise we know the two strings have the same alignment (but not
@@ -70,74 +70,95 @@ EALIGN (memcmp, 4, 0)
    eliminate bits preceding the first byte.  Since we want to join the
    normal (word aligned) compare loop, starting at the second word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first word. This insures that the loop count is
+   versioning for the first word. This ensures that the loop count is
    correct and the first word (shifted) is in the expected register pair. */
-	.align 4
+	.align	4
 L(samealignment):
 	clrrwi	rSTR1, rSTR1, 2
 	clrrwi	rSTR2, rSTR2, 2
 	beq	cr5, L(Waligned)
-	add	rN, rN, rBITDIF
-	slwi	r11, rBITDIF, 3
-	srwi	rTMP, rN, 4	 /* Divide by 16 */
-	andi.	rBITDIF, rN, 12  /* Get the word remainder */
+	add	rN, rN, r12
+	slwi	rWORD6, r12, 3
+	srwi	r0, rN, 4	/* Divide by 16 */
+	andi.	r12, rN, 12	/* Get the word remainder */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 0(rSTR1)
 	lwz	rWORD2, 0(rSTR2)
-	cmplwi	cr1, rBITDIF, 8
+#endif
+	cmplwi	cr1, r12, 8
 	cmplwi	cr7, rN, 16
 	clrlwi	rN, rN, 30
 	beq	L(dPs4)
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
 	bgt	cr1, L(dPs3)
 	beq	cr1, L(dPs2)
 
 /* Remainder is 4 */
-	.align 3
+	.align	3
 L(dsP1):
-	slw	rWORD5, rWORD1, r11
-	slw	rWORD6, rWORD2, r11
+	slw	rWORD5, rWORD1, rWORD6
+	slw	rWORD6, rWORD2, rWORD6
 	cmplw	cr5, rWORD5, rWORD6
 	blt	cr7, L(dP1x)
 /* Do something useful in this cycle since we have to branch anyway.  */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 4(rSTR1)
 	lwz	rWORD2, 4(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 	b	L(dP1e)
 /* Remainder is 8 */
-	.align 4
+	.align	4
 L(dPs2):
-	slw	rWORD5, rWORD1, r11
-	slw	rWORD6, rWORD2, r11
+	slw	rWORD5, rWORD1, rWORD6
+	slw	rWORD6, rWORD2, rWORD6
 	cmplw	cr6, rWORD5, rWORD6
 	blt	cr7, L(dP2x)
 /* Do something useful in this cycle since we have to branch anyway.  */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD7, 4(rSTR1)
 	lwz	rWORD8, 4(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
 	b	L(dP2e)
 /* Remainder is 12 */
-	.align 4
+	.align	4
 L(dPs3):
-	slw	rWORD3, rWORD1, r11
-	slw	rWORD4, rWORD2, r11
+	slw	rWORD3, rWORD1, rWORD6
+	slw	rWORD4, rWORD2, rWORD6
 	cmplw	cr1, rWORD3, rWORD4
 	b	L(dP3e)
 /* Count is a multiple of 16, remainder is 0 */
-	.align 4
+	.align	4
 L(dPs4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	slw	rWORD1, rWORD1, r11
-	slw	rWORD2, rWORD2, r11
-	cmplw	cr0, rWORD1, rWORD2
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	slw	rWORD1, rWORD1, rWORD6
+	slw	rWORD2, rWORD2, rWORD6
+	cmplw	cr7, rWORD1, rWORD2
 	b	L(dP4e)
 
 /* At this point we know both strings are word aligned and the
    compare length is at least 8 bytes.  */
-	.align 4
+	.align	4
 L(Waligned):
-	andi.	rBITDIF, rN, 12  /* Get the word remainder */
-	srwi	rTMP, rN, 4	 /* Divide by 16 */
-	cmplwi	cr1, rBITDIF, 8
+	andi.	r12, rN, 12	/* Get the word remainder */
+	srwi	r0, rN, 4	/* Divide by 16 */
+	cmplwi	cr1, r12, 8
 	cmplwi	cr7, rN, 16
 	clrlwi	rN, rN, 30
 	beq	L(dP4)
@@ -145,177 +166,352 @@ L(Waligned):
 	beq	cr1, L(dP2)
 
 /* Remainder is 4 */
-	.align 4
+	.align	4
 L(dP1):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
 /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
    (8-15 byte compare), we want to use only volatile registers.  This
    means we can avoid restoring non-volatile registers since we did not
    change any on the early exit path.  The key here is the non-early
    exit path only cares about the condition code (cr5), not about which
    register pair was used.  */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 0(rSTR1)
 	lwz	rWORD6, 0(rSTR2)
+#endif
 	cmplw	cr5, rWORD5, rWORD6
 	blt	cr7, L(dP1x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 4(rSTR1)
 	lwz	rWORD2, 4(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 L(dP1e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 8(rSTR1)
 	lwz	rWORD4, 8(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 12(rSTR1)
 	lwz	rWORD6, 12(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
-	bne	cr5, L(dLcr5)
-	bne	cr0, L(dLcr0)
+	bne	cr5, L(dLcr5x)
+	bne	cr7, L(dLcr7x)
 
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwzu	rWORD7, 16(rSTR1)
 	lwzu	rWORD8, 16(rSTR2)
+#endif
 	bne	cr1, L(dLcr1)
 	cmplw	cr5, rWORD7, rWORD8
 	bdnz	L(dLoop)
 	bne	cr6, L(dLcr6)
-        lwz     r30,44(1)
-        lwz     r31,48(1)
-	.align 3
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+	.align	3
 L(dP1x):
 	slwi.	r12, rN, 3
-	bne	cr5, L(dLcr5)
+	bne	cr5, L(dLcr5x)
 	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
-        lwz     1,0(1)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	bne	L(d00)
 	li	rRTN, 0
 	blr
 
 /* Remainder is 8 */
-	.align 4
+	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dP2):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 0(rSTR1)
 	lwz	rWORD6, 0(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
 	blt	cr7, L(dP2x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD7, 4(rSTR1)
 	lwz	rWORD8, 4(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
 L(dP2e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 8(rSTR1)
 	lwz	rWORD2, 8(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 12(rSTR1)
 	lwz	rWORD4, 12(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 4
 	addi	rSTR2, rSTR2, 4
+#endif
 	bne	cr6, L(dLcr6)
 	bne	cr5, L(dLcr5)
 	b	L(dLoop2)
 /* Again we are on a early exit path (16-23 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
-	.align 4
+	.align	4
 L(dP2x):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 4(rSTR1)
 	lwz	rWORD4, 4(rSTR2)
-	cmplw	cr5, rWORD3, rWORD4
+#endif
+	cmplw	cr1, rWORD3, rWORD4
 	slwi.	r12, rN, 3
-	bne	cr6, L(dLcr6)
+	bne	cr6, L(dLcr6x)
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 4
 	addi	rSTR2, rSTR2, 4
-	bne	cr5, L(dLcr5)
+#endif
+	bne	cr1, L(dLcr1x)
 	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
-        lwz     1,0(1)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	bne	L(d00)
 	li	rRTN, 0
 	blr
 
 /* Remainder is 12 */
-	.align 4
+	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dP3):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 0(rSTR1)
 	lwz	rWORD4, 0(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
 L(dP3e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 4(rSTR1)
 	lwz	rWORD6, 4(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
 	blt	cr7, L(dP3x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD7, 8(rSTR1)
 	lwz	rWORD8, 8(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 12(rSTR1)
 	lwz	rWORD2, 12(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
+#endif
 	bne	cr1, L(dLcr1)
 	bne	cr6, L(dLcr6)
 	b	L(dLoop1)
 /* Again we are on a early exit path (24-31 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
-	.align 4
+	.align	4
 L(dP3x):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 8(rSTR1)
 	lwz	rWORD2, 8(rSTR2)
-	cmplw	cr5, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 	slwi.	r12, rN, 3
-	bne	cr1, L(dLcr1)
+	bne	cr1, L(dLcr1x)
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
-	bne	cr6, L(dLcr6)
+#endif
+	bne	cr6, L(dLcr6x)
 	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
-	bne	cr5, L(dLcr5)
-        lwz     1,0(1)
+	bne	cr7, L(dLcr7x)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	bne	L(d00)
 	li	rRTN, 0
 	blr
 
 /* Count is a multiple of 16, remainder is 0 */
-	.align 4
+	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dP4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 0(rSTR1)
 	lwz	rWORD2, 0(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 L(dP4e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 4(rSTR1)
 	lwz	rWORD4, 4(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 8(rSTR1)
 	lwz	rWORD6, 8(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwzu	rWORD7, 12(rSTR1)
 	lwzu	rWORD8, 12(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
-	bne	cr0, L(dLcr0)
+	bne	cr7, L(dLcr7)
 	bne	cr1, L(dLcr1)
 	bdz-	L(d24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
-	.align 4
+	.align	4
 L(dLoop):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 4(rSTR1)
 	lwz	rWORD2, 4(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
 	bne	cr6, L(dLcr6)
 L(dLoop1):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 8(rSTR1)
 	lwz	rWORD4, 8(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
 	bne	cr5, L(dLcr5)
 L(dLoop2):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 12(rSTR1)
 	lwz	rWORD6, 12(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
-	bne	cr0, L(dLcr0)
+	bne	cr7, L(dLcr7)
 L(dLoop3):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwzu	rWORD7, 16(rSTR1)
 	lwzu	rWORD8, 16(rSTR2)
+#endif
 	bne-	cr1, L(dLcr1)
-	cmplw	cr0, rWORD1, rWORD2
+	cmplw	cr7, rWORD1, rWORD2
 	bdnz+	L(dLoop)
 
 L(dL4):
@@ -325,7 +521,7 @@ L(dL4):
 	bne	cr5, L(dLcr5)
 	cmplw	cr5, rWORD7, rWORD8
 L(d44):
-	bne	cr0, L(dLcr0)
+	bne	cr7, L(dLcr7)
 L(d34):
 	bne	cr1, L(dLcr1)
 L(d24):
@@ -334,69 +530,82 @@ L(d14):
 	slwi.	r12, rN, 3
 	bne	cr5, L(dLcr5)
 L(d04):
-        lwz     r30,44(1)
-        lwz     r31,48(1)
-        lwz     1,0(1)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
 	beq	L(zeroLength)
 /* At this point we have a remainder of 1 to 3 bytes to compare.  Since
    we are aligned it is safe to load the whole word, and use
-   shift right to eliminate bits beyond the compare length. */
+   shift right to eliminate bits beyond the compare length.  */
 L(d00):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 4(rSTR1)
 	lwz	rWORD2, 4(rSTR2)
+#endif
 	srw	rWORD1, rWORD1, rN
 	srw	rWORD2, rWORD2, rN
-        cmplw   rWORD1,rWORD2
-        li      rRTN,0
-        beqlr
-        li      rRTN,1
-        bgtlr
-        li      rRTN,-1
-        blr
-
-	.align 4
-L(dLcr0):
-        lwz     r30,44(1)
-        lwz     r31,48(1)
+	sub	rRTN, rWORD1, rWORD2
+	blr
+
+	.align	4
+	cfi_adjust_cfa_offset(64)
+L(dLcr7):
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+L(dLcr7x):
 	li	rRTN, 1
-        lwz     1,0(1)
-	bgtlr	cr0
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
+	bgtlr	cr7
 	li	rRTN, -1
 	blr
-	.align 4
+	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dLcr1):
-        lwz     r30,44(1)
-        lwz     r31,48(1)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+L(dLcr1x):
 	li	rRTN, 1
-        lwz     1,0(1)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	bgtlr	cr1
 	li	rRTN, -1
 	blr
-	.align 4
+	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dLcr6):
-        lwz     r30,44(1)
-        lwz     r31,48(1)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+L(dLcr6x):
 	li	rRTN, 1
-        lwz     1,0(1)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	bgtlr	cr6
 	li	rRTN, -1
 	blr
-	.align 4
+	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dLcr5):
-        lwz     r30,44(1)
-        lwz     r31,48(1)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
 L(dLcr5x):
 	li	rRTN, 1
-        lwz     1,0(1)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	bgtlr	cr5
 	li	rRTN, -1
 	blr
 
-	.align 4
+	.align	4
 L(bytealigned):
-	cfi_adjust_cfa_offset(-64)
-	mtctr   rN	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	rN	/* Power4 wants mtctr 1st in dispatch group */
 
 /* We need to prime this loop.  This loop is swing modulo scheduled
    to avoid pipe delays.  The dependent instruction latencies (load to
@@ -411,7 +620,7 @@ L(bytealigned):
 	lbz	rWORD1, 0(rSTR1)
 	lbz	rWORD2, 0(rSTR2)
 	bdz-	L(b11)
-	cmplw	cr0, rWORD1, rWORD2
+	cmplw	cr7, rWORD1, rWORD2
 	lbz	rWORD3, 1(rSTR1)
 	lbz	rWORD4, 1(rSTR2)
 	bdz-	L(b12)
@@ -419,11 +628,11 @@ L(bytealigned):
 	lbzu	rWORD5, 2(rSTR1)
 	lbzu	rWORD6, 2(rSTR2)
 	bdz-	L(b13)
-	.align 4
+	.align	4
 L(bLoop):
 	lbzu	rWORD1, 1(rSTR1)
 	lbzu	rWORD2, 1(rSTR2)
-	bne-	cr0, L(bLcr0)
+	bne-	cr7, L(bLcr7)
 
 	cmplw	cr6, rWORD5, rWORD6
 	bdz-	L(b3i)
@@ -432,7 +641,7 @@ L(bLoop):
 	lbzu	rWORD4, 1(rSTR2)
 	bne-	cr1, L(bLcr1)
 
-	cmplw	cr0, rWORD1, rWORD2
+	cmplw	cr7, rWORD1, rWORD2
 	bdz-	L(b2i)
 
 	lbzu	rWORD5, 1(rSTR1)
@@ -449,23 +658,23 @@ L(bLoop):
    tested.  In this case we must complete the pending operations
    before returning.  */
 L(b1i):
-	bne-	cr0, L(bLcr0)
+	bne-	cr7, L(bLcr7)
 	bne-	cr1, L(bLcr1)
 	b	L(bx56)
-	.align 4
+	.align	4
 L(b2i):
 	bne-	cr6, L(bLcr6)
-	bne-	cr0, L(bLcr0)
+	bne-	cr7, L(bLcr7)
 	b	L(bx34)
-	.align 4
+	.align	4
 L(b3i):
 	bne-	cr1, L(bLcr1)
 	bne-	cr6, L(bLcr6)
 	b	L(bx12)
-	.align 4
-L(bLcr0):
+	.align	4
+L(bLcr7):
 	li	rRTN, 1
-	bgtlr	cr0
+	bgtlr	cr7
 	li	rRTN, -1
 	blr
 L(bLcr1):
@@ -480,36 +689,31 @@ L(bLcr6):
 	blr
 
 L(b13):
-	bne-	cr0, L(bx12)
+	bne-	cr7, L(bx12)
 	bne-	cr1, L(bx34)
 L(bx56):
 	sub	rRTN, rWORD5, rWORD6
 	blr
 	nop
 L(b12):
-	bne-	cr0, L(bx12)
+	bne-	cr7, L(bx12)
 L(bx34):
 	sub	rRTN, rWORD3, rWORD4
 	blr
-
 L(b11):
 L(bx12):
 	sub	rRTN, rWORD1, rWORD2
 	blr
-
-	.align 4
-L(zeroLengthReturn):
-
+	.align	4
 L(zeroLength):
 	li	rRTN, 0
 	blr
 
-	cfi_adjust_cfa_offset(64)
-	.align 4
+	.align	4
 /* At this point we know the strings have different alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    2 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then rStr1 is word aligned and can
+   of r12 to 0.  If r12 == 0 then rStr1 is word aligned and can
    perform the Wunaligned loop.
 
    Otherwise we know that rSTR1 is not already word aligned yet.
@@ -518,79 +722,88 @@ L(zeroLength):
    eliminate bits preceding the first byte.  Since we want to join the
    normal (Wualigned) compare loop, starting at the second word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first W. This insures that the loop count is
+   versioning for the first W. This ensures that the loop count is
    correct and the first W (shifted) is in the expected resister pair.  */
 #define rSHL		r29	/* Unaligned shift left count.  */
 #define rSHR		r28	/* Unaligned shift right count.  */
-#define rB		r27	/* Left rotation temp for rWORD2.  */
-#define rD		r26	/* Left rotation temp for rWORD4.  */
-#define rF		r25	/* Left rotation temp for rWORD6.  */
-#define rH		r24	/* Left rotation temp for rWORD8.  */
-#define rA		r0	/* Right rotation temp for rWORD2.  */
-#define rC		r12	/* Right rotation temp for rWORD4.  */
-#define rE		r0	/* Right rotation temp for rWORD6.  */
-#define rG		r12	/* Right rotation temp for rWORD8.  */
+#define rWORD8_SHIFT	r27	/* Left rotation temp for rWORD2.  */
+#define rWORD2_SHIFT	r26	/* Left rotation temp for rWORD4.  */
+#define rWORD4_SHIFT	r25	/* Left rotation temp for rWORD6.  */
+#define rWORD6_SHIFT	r24	/* Left rotation temp for rWORD8.  */
+	cfi_adjust_cfa_offset(64)
 L(unaligned):
-	stw     r29,40(r1)
-	cfi_offset(r29,(40-64))
+	stw	rSHL, 40(r1)
+	cfi_offset(rSHL, (40-64))
 	clrlwi	rSHL, rSTR2, 30
-        stw     r28,36(r1)
-	cfi_offset(r28,(36-64))
+	stw	rSHR, 36(r1)
+	cfi_offset(rSHR, (36-64))
 	beq	cr5, L(Wunaligned)
-        stw     r27,32(r1)
-	cfi_offset(r27,(32-64))
+	stw	rWORD8_SHIFT, 32(r1)
+	cfi_offset(rWORD8_SHIFT, (32-64))
 /* Adjust the logical start of rSTR2 to compensate for the extra bits
    in the 1st rSTR1 W.  */
-	sub	r27, rSTR2, rBITDIF
+	sub	rWORD8_SHIFT, rSTR2, r12
 /* But do not attempt to address the W before that W that contains
    the actual start of rSTR2.  */
 	clrrwi	rSTR2, rSTR2, 2
-        stw     r26,28(r1)
-	cfi_offset(r26,(28-64))
-/* Compute the left/right shift counts for the unalign rSTR2,
+	stw	rWORD2_SHIFT, 28(r1)
+	cfi_offset(rWORD2_SHIFT, (28-64))
+/* Compute the left/right shift counts for the unaligned rSTR2,
    compensating for the logical (W aligned) start of rSTR1.  */
-	clrlwi	rSHL, r27, 30
+	clrlwi	rSHL, rWORD8_SHIFT, 30
 	clrrwi	rSTR1, rSTR1, 2
-        stw     r25,24(r1)
-	cfi_offset(r25,(24-64))
+	stw	rWORD4_SHIFT, 24(r1)
+	cfi_offset(rWORD4_SHIFT, (24-64))
 	slwi	rSHL, rSHL, 3
-	cmplw	cr5, r27, rSTR2
-	add	rN, rN, rBITDIF
-	slwi	r11, rBITDIF, 3
-        stw     r24,20(r1)
-	cfi_offset(r24,(20-64))
+	cmplw	cr5, rWORD8_SHIFT, rSTR2
+	add	rN, rN, r12
+	slwi	rWORD6, r12, 3
+	stw	rWORD6_SHIFT, 20(r1)
+	cfi_offset(rWORD6_SHIFT, (20-64))
 	subfic	rSHR, rSHL, 32
-	srwi	rTMP, rN, 4      /* Divide by 16 */
-	andi.	rBITDIF, rN, 12  /* Get the W remainder */
+	srwi	r0, rN, 4	/* Divide by 16 */
+	andi.	r12, rN, 12	/* Get the W remainder */
 /* We normally need to load 2 Ws to start the unaligned rSTR2, but in
    this special case those bits may be discarded anyway.  Also we
    must avoid loading a W where none of the bits are part of rSTR2 as
    this may cross a page boundary and cause a page fault.  */
 	li	rWORD8, 0
 	blt	cr5, L(dus0)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD8, 0(rSTR2)
-	la	rSTR2, 4(rSTR2)
+	addi	rSTR2, rSTR2, 4
+#endif
 	slw	rWORD8, rWORD8, rSHL
 
 L(dus0):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 0(rSTR1)
 	lwz	rWORD2, 0(rSTR2)
-	cmplwi	cr1, rBITDIF, 8
+#endif
+	cmplwi	cr1, r12, 8
 	cmplwi	cr7, rN, 16
-	srw	rG, rWORD2, rSHR
+	srw	r12, rWORD2, rSHR
 	clrlwi	rN, rN, 30
 	beq	L(duPs4)
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	or	rWORD8, rG, rWORD8
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	or	rWORD8, r12, rWORD8
 	bgt	cr1, L(duPs3)
 	beq	cr1, L(duPs2)
 
 /* Remainder is 4 */
-	.align 4
+	.align	4
 L(dusP1):
-	slw	rB, rWORD2, rSHL
-	slw	rWORD7, rWORD1, r11
-	slw	rWORD8, rWORD8, r11
+	slw	rWORD8_SHIFT, rWORD2, rSHL
+	slw	rWORD7, rWORD1, rWORD6
+	slw	rWORD8, rWORD8, rWORD6
 	bge	cr7, L(duP1e)
 /* At this point we exit early with the first word compare
    complete and remainder of 0 to 3 bytes.  See L(du14) for details on
@@ -600,95 +813,133 @@ L(dusP1):
 	bne	cr5, L(duLcr5)
 	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD2, 4(rSTR2)
-	srw	rA, rWORD2, rSHR
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 8 */
-	.align 4
+	.align	4
 L(duPs2):
-	slw	rH, rWORD2, rSHL
-	slw	rWORD5, rWORD1, r11
-	slw	rWORD6, rWORD8, r11
+	slw	rWORD6_SHIFT, rWORD2, rSHL
+	slw	rWORD5, rWORD1, rWORD6
+	slw	rWORD6, rWORD8, rWORD6
 	b	L(duP2e)
 /* Remainder is 12 */
-	.align 4
+	.align	4
 L(duPs3):
-	slw	rF, rWORD2, rSHL
-	slw	rWORD3, rWORD1, r11
-	slw	rWORD4, rWORD8, r11
+	slw	rWORD4_SHIFT, rWORD2, rSHL
+	slw	rWORD3, rWORD1, rWORD6
+	slw	rWORD4, rWORD8, rWORD6
 	b	L(duP3e)
 /* Count is a multiple of 16, remainder is 0 */
-	.align 4
+	.align	4
 L(duPs4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	or	rWORD8, rG, rWORD8
-	slw	rD, rWORD2, rSHL
-	slw	rWORD1, rWORD1, r11
-	slw	rWORD2, rWORD8, r11
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	or	rWORD8, r12, rWORD8
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	slw	rWORD1, rWORD1, rWORD6
+	slw	rWORD2, rWORD8, rWORD6
 	b	L(duP4e)
 
 /* At this point we know rSTR1 is word aligned and the
    compare length is at least 8 bytes.  */
-	.align 4
+	.align	4
 L(Wunaligned):
-        stw     r27,32(r1)
-	cfi_offset(r27,(32-64))
+	stw	rWORD8_SHIFT, 32(r1)
+	cfi_offset(rWORD8_SHIFT, (32-64))
 	clrrwi	rSTR2, rSTR2, 2
-        stw     r26,28(r1)
-	cfi_offset(r26,(28-64))
-	srwi	rTMP, rN, 4	 /* Divide by 16 */
-        stw     r25,24(r1)
-	cfi_offset(r25,(24-64))
-	andi.	rBITDIF, rN, 12  /* Get the W remainder */
-        stw     r24,20(r1)
-	cfi_offset(r24,(20-64))
+	stw	rWORD2_SHIFT, 28(r1)
+	cfi_offset(rWORD2_SHIFT, (28-64))
+	srwi	r0, rN, 4	/* Divide by 16 */
+	stw	rWORD4_SHIFT, 24(r1)
+	cfi_offset(rWORD4_SHIFT, (24-64))
+	andi.	r12, rN, 12	/* Get the W remainder */
+	stw	rWORD6_SHIFT, 20(r1)
+	cfi_offset(rWORD6_SHIFT, (20-64))
 	slwi	rSHL, rSHL, 3
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD6, 0(rSTR2)
 	lwzu	rWORD8, 4(rSTR2)
-	cmplwi	cr1, rBITDIF, 8
+#endif
+	cmplwi	cr1, r12, 8
 	cmplwi	cr7, rN, 16
 	clrlwi	rN, rN, 30
 	subfic	rSHR, rSHL, 32
-	slw	rH, rWORD6, rSHL
+	slw	rWORD6_SHIFT, rWORD6, rSHL
 	beq	L(duP4)
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
 	bgt	cr1, L(duP3)
 	beq	cr1, L(duP2)
 
 /* Remainder is 4 */
-	.align 4
+	.align	4
 L(duP1):
-	srw	rG, rWORD8, rSHR
+	srw	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
 	lwz	rWORD7, 0(rSTR1)
-	slw	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+#endif
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	blt	cr7, L(duP1x)
 L(duP1e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 4(rSTR1)
 	lwz	rWORD2, 4(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
-	srw	rA, rWORD2, rSHR
-	slw	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 8(rSTR1)
 	lwz	rWORD4, 8(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
-	srw	rC, rWORD4, rSHR
-	slw	rF, rWORD4, rSHL
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
 	bne	cr5, L(duLcr5)
-	or	rWORD4, rC, rD
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 12(rSTR1)
 	lwz	rWORD6, 12(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
-	srw	rE, rWORD6, rSHR
-	slw	rH, rWORD6, rSHL
-	bne	cr0, L(duLcr0)
-	or	rWORD6, rE, rF
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	bne	cr7, L(duLcr7)
+	or	rWORD6, r0, rWORD4_SHIFT
 	cmplw	cr6, rWORD5, rWORD6
 	b	L(duLoop3)
-	.align 4
+	.align	4
 /* At this point we exit early with the first word compare
    complete and remainder of 0 to 3 bytes.  See L(du14) for details on
    how we handle the remaining bytes.  */
@@ -698,186 +949,321 @@ L(duP1x):
 	bne	cr5, L(duLcr5)
 	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
-	ld	rWORD2, 8(rSTR2)
-	srw	rA, rWORD2, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD2, 8(rSTR2)
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 8 */
-	.align 4
+	.align	4
 L(duP2):
-	srw	rE, rWORD8, rSHR
+	srw	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
 	lwz	rWORD5, 0(rSTR1)
-	or	rWORD6, rE, rH
-	slw	rH, rWORD8, rSHL
+#endif
+	or	rWORD6, r0, rWORD6_SHIFT
+	slw	rWORD6_SHIFT, rWORD8, rSHL
 L(duP2e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD7, 4(rSTR1)
 	lwz	rWORD8, 4(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
-	srw	rG, rWORD8, rSHR
-	slw	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	blt	cr7, L(duP2x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 8(rSTR1)
 	lwz	rWORD2, 8(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
 	bne	cr6, L(duLcr6)
-	srw	rA, rWORD2, rSHR
-	slw	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 12(rSTR1)
 	lwz	rWORD4, 12(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 	bne	cr5, L(duLcr5)
-	srw	rC, rWORD4, rSHR
-	slw	rF, rWORD4, rSHL
-	or	rWORD4, rC, rD
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 4
 	addi	rSTR2, rSTR2, 4
+#endif
 	cmplw	cr1, rWORD3, rWORD4
 	b	L(duLoop2)
-	.align 4
+	.align	4
 L(duP2x):
 	cmplw	cr5, rWORD7, rWORD8
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 4
 	addi	rSTR2, rSTR2, 4
+#endif
 	bne	cr6, L(duLcr6)
 	slwi.	rN, rN, 3
 	bne	cr5, L(duLcr5)
 	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD2, 4(rSTR2)
-	srw	rA, rWORD2, rSHR
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Remainder is 12 */
-	.align 4
+	.align	4
 L(duP3):
-	srw	rC, rWORD8, rSHR
+	srw	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
 	lwz	rWORD3, 0(rSTR1)
-	slw	rF, rWORD8, rSHL
-	or	rWORD4, rC, rH
+#endif
+	slw	rWORD4_SHIFT, rWORD8, rSHL
+	or	rWORD4, r12, rWORD6_SHIFT
 L(duP3e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 4(rSTR1)
 	lwz	rWORD6, 4(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
-	srw	rE, rWORD6, rSHR
-	slw	rH, rWORD6, rSHL
-	or	rWORD6, rE, rF
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD7, 8(rSTR1)
 	lwz	rWORD8, 8(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
 	bne	cr1, L(duLcr1)
-	srw	rG, rWORD8, rSHR
-	slw	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	blt	cr7, L(duP3x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 12(rSTR1)
 	lwz	rWORD2, 12(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
 	bne	cr6, L(duLcr6)
-	srw	rA, rWORD2, rSHR
-	slw	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 	b	L(duLoop1)
-	.align 4
+	.align	4
 L(duP3x):
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
+#endif
+#if 0
+/* Huh?  We've already branched on cr1!  */
 	bne	cr1, L(duLcr1)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
 	bne	cr6, L(duLcr6)
 	slwi.	rN, rN, 3
 	bne	cr5, L(duLcr5)
 	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD2, 4(rSTR2)
-	srw	rA, rWORD2, rSHR
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Count is a multiple of 16, remainder is 0 */
-	.align 4
+	.align	4
 L(duP4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	srw	rA, rWORD8, rSHR
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	srw	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
 	lwz	rWORD1, 0(rSTR1)
-	slw	rD, rWORD8, rSHL
-	or	rWORD2, rA, rH
+#endif
+	slw	rWORD2_SHIFT, rWORD8, rSHL
+	or	rWORD2, r0, rWORD6_SHIFT
 L(duP4e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 4(rSTR1)
 	lwz	rWORD4, 4(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
-	srw	rC, rWORD4, rSHR
-	slw	rF, rWORD4, rSHL
-	or	rWORD4, rC, rD
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 8(rSTR1)
 	lwz	rWORD6, 8(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
-	bne	cr0, L(duLcr0)
-	srw	rE, rWORD6, rSHR
-	slw	rH, rWORD6, rSHL
-	or	rWORD6, rE, rF
+	bne	cr7, L(duLcr7)
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwzu	rWORD7, 12(rSTR1)
 	lwzu	rWORD8, 12(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
 	bne	cr1, L(duLcr1)
-	srw	rG, rWORD8, rSHR
-	slw	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	cmplw	cr5, rWORD7, rWORD8
 	bdz-	L(du24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
-	.align 4
+	.align	4
 L(duLoop):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 4(rSTR1)
 	lwz	rWORD2, 4(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
 	bne	cr6, L(duLcr6)
-	srw	rA, rWORD2, rSHR
-	slw	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
 L(duLoop1):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 8(rSTR1)
 	lwz	rWORD4, 8(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
 	bne	cr5, L(duLcr5)
-	srw	rC, rWORD4, rSHR
-	slw	rF, rWORD4, rSHL
-	or	rWORD4, rC, rD
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
 L(duLoop2):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 12(rSTR1)
 	lwz	rWORD6, 12(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
-	bne	cr0, L(duLcr0)
-	srw	rE, rWORD6, rSHR
-	slw	rH, rWORD6, rSHL
-	or	rWORD6, rE, rF
+	bne	cr7, L(duLcr7)
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
 L(duLoop3):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwzu	rWORD7, 16(rSTR1)
 	lwzu	rWORD8, 16(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 	bne-	cr1, L(duLcr1)
-	srw	rG, rWORD8, rSHR
-	slw	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	bdnz+	L(duLoop)
 
 L(duL4):
+#if 0
+/* Huh?  We've already branched on cr1!  */
 	bne	cr1, L(duLcr1)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
 	bne	cr6, L(duLcr6)
 	cmplw	cr6, rWORD5, rWORD6
 	bne	cr5, L(duLcr5)
 	cmplw	cr5, rWORD7, rWORD8
 L(du44):
-	bne	cr0, L(duLcr0)
+	bne	cr7, L(duLcr7)
 L(du34):
 	bne	cr1, L(duLcr1)
 L(du24):
@@ -887,95 +1273,101 @@ L(du14):
 	bne	cr5, L(duLcr5)
 /* At this point we have a remainder of 1 to 3 bytes to compare.  We use
    shift right to eliminate bits beyond the compare length.
+   This allows the use of word subtract to compute the final result.
 
    However it may not be safe to load rWORD2 which may be beyond the
    string length. So we compare the bit length of the remainder to
    the right shift count (rSHR). If the bit count is less than or equal
    we do not need to load rWORD2 (all significant bits are already in
-   rB).  */
+   rWORD8_SHIFT).  */
 	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD2, 4(rSTR2)
-	srw	rA, rWORD2, rSHR
-	.align 4
+#endif
+	srw	r0, rWORD2, rSHR
+	.align	4
 L(dutrim):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+#else
 	lwz	rWORD1, 4(rSTR1)
-        lwz     r31,48(1)
+#endif
+	lwz	rWORD8, 48(r1)
 	subfic	rN, rN, 32	/* Shift count is 32 - (rN * 8).  */
-	or	rWORD2, rA, rB
-        lwz     r30,44(1)
-        lwz     r29,40(r1)
+	or	rWORD2, r0, rWORD8_SHIFT
+	lwz	rWORD7, 44(r1)
+	lwz	rSHL, 40(r1)
 	srw	rWORD1, rWORD1, rN
 	srw	rWORD2, rWORD2, rN
-        lwz     r28,36(r1)
-        lwz     r27,32(r1)
-        cmplw   rWORD1,rWORD2
-        li      rRTN,0
-        beq     L(dureturn26)
-        li      rRTN,1
-        bgt     L(dureturn26)
-        li      rRTN,-1
-	b    L(dureturn26)
-	.align 4
-L(duLcr0):
-        lwz     r31,48(1)
-        lwz     r30,44(1)
+	lwz	rSHR, 36(r1)
+	lwz	rWORD8_SHIFT, 32(r1)
+	sub	rRTN, rWORD1, rWORD2
+	b	L(dureturn26)
+	.align	4
+L(duLcr7):
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
 	li	rRTN, 1
-	bgt	cr0, L(dureturn29)
-	lwz     r29,40(r1)
-        lwz     r28,36(r1)
+	bgt	cr7, L(dureturn29)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
-	.align 4
+	.align	4
 L(duLcr1):
-        lwz     r31,48(1)
-        lwz     r30,44(1)
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
 	li	rRTN, 1
 	bgt	cr1, L(dureturn29)
-        lwz     r29,40(r1)
-        lwz     r28,36(r1)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
-	.align 4
+	.align	4
 L(duLcr6):
-        lwz     r31,48(1)
-        lwz     r30,44(1)
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
 	li	rRTN, 1
 	bgt	cr6, L(dureturn29)
-        lwz     r29,40(r1)
-        lwz     r28,36(r1)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
-	.align 4
+	.align	4
 L(duLcr5):
-        lwz     r31,48(1)
-        lwz     r30,44(1)
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
 	li	rRTN, 1
 	bgt	cr5, L(dureturn29)
-        lwz     r29,40(r1)
-        lwz     r28,36(r1)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
 	.align	3
 L(duZeroReturn):
-	li	rRTN,0
+	li	rRTN, 0
 	.align	4
 L(dureturn):
-        lwz     r31,48(1)
-        lwz     r30,44(1)
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
 L(dureturn29):
-        lwz     r29,40(r1)
-        lwz     r28,36(r1)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
 L(dureturn27):
-        lwz     r27,32(r1)
+	lwz	rWORD8_SHIFT, 32(r1)
 L(dureturn26):
-        lwz     r26,28(r1)
+	lwz	rWORD2_SHIFT, 28(r1)
 L(dureturn25):
-        lwz     r25,24(r1)
-        lwz     r24,20(r1)
-        lwz     1,0(1)
+	lwz	rWORD4_SHIFT, 24(r1)
+	lwz	rWORD6_SHIFT, 20(r1)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	blr
 END (memcmp)
 
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/memcpy.S b/libc/sysdeps/powerpc/powerpc32/power4/memcpy.S
index d9146631e..338d3cce3 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/memcpy.S
@@ -203,15 +203,28 @@ EALIGN (memcpy, 5, 0)
     blt   cr6,5f
     srwi  7,6,16
     bgt	  cr6,3f
+#ifdef __LITTLE_ENDIAN__
+    sth   7,0(3)
+#else
     sth   6,0(3)
+#endif
     b     7f
     .align  4
 3:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,24
+    stb   6,0(3)
+    sth   7,1(3)
+#else
     stb   7,0(3)
     sth   6,1(3)
+#endif
     b     7f
     .align  4
 5:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,8
+#endif
     stb   6,0(3)
 7:
     cmplwi	cr1,10,16
@@ -339,13 +352,23 @@ EALIGN (memcpy, 5, 0)
     bf      30,1f
 
     /* there are at least two words to copy, so copy them */
+#ifdef __LITTLE_ENDIAN__
+    srw   0,6,10
+    slw   8,7,9
+#else
     slw   0,6,10  /* shift 1st src word to left align it in R0 */
     srw   8,7,9   /* shift 2nd src word to right align it in R8 */
+#endif
     or    0,0,8   /* or them to get word to store */
     lwz   6,8(5)  /* load the 3rd src word */
     stw   0,0(4)  /* store the 1st dst word */
+#ifdef __LITTLE_ENDIAN__
+    srw   0,7,10
+    slw   8,6,9
+#else
     slw   0,7,10  /* now left align 2nd src word into R0 */
     srw   8,6,9   /* shift 3rd src word to right align it in R8 */
+#endif
     or    0,0,8   /* or them to get word to store */
     lwz   7,12(5)
     stw   0,4(4)  /* store the 2nd dst word */
@@ -353,8 +376,13 @@ EALIGN (memcpy, 5, 0)
     addi  5,5,16
     bf    31,4f
     /* there is a third word to copy, so copy it */
+#ifdef __LITTLE_ENDIAN__
+    srw   0,6,10
+    slw   8,7,9
+#else
     slw   0,6,10  /* shift 3rd src word to left align it in R0 */
     srw   8,7,9   /* shift 4th src word to right align it in R8 */
+#endif
     or    0,0,8   /* or them to get word to store */
     stw   0,0(4)  /* store 3rd dst word */
     mr    6,7
@@ -364,8 +392,13 @@ EALIGN (memcpy, 5, 0)
     b     4f
     .align 4
 1:
+#ifdef __LITTLE_ENDIAN__
+    srw     0,6,10
+    slw     8,7,9
+#else
     slw     0,6,10  /* shift 1st src word to left align it in R0 */
     srw     8,7,9   /* shift 2nd src word to right align it in R8 */
+#endif
     addi  5,5,8
     or    0,0,8   /* or them to get word to store */
     bf    31,4f
@@ -378,23 +411,43 @@ EALIGN (memcpy, 5, 0)
     .align  4
 4:
     /* copy 16 bytes at a time */
+#ifdef __LITTLE_ENDIAN__
+    srw   0,6,10
+    slw   8,7,9
+#else
     slw   0,6,10
     srw   8,7,9
+#endif
     or    0,0,8
     lwz   6,0(5)
     stw   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srw   0,7,10
+    slw   8,6,9
+#else
     slw   0,7,10
     srw   8,6,9
+#endif
     or    0,0,8
     lwz   7,4(5)
     stw   0,4(4)
+#ifdef __LITTLE_ENDIAN__
+    srw   0,6,10
+    slw   8,7,9
+#else
     slw   0,6,10
     srw   8,7,9
+#endif
     or    0,0,8
     lwz   6,8(5)
     stw   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srw   0,7,10
+    slw   8,6,9
+#else
     slw   0,7,10
     srw   8,6,9
+#endif
     or    0,0,8
     lwz   7,12(5)
     stw   0,12(4)
@@ -403,8 +456,13 @@ EALIGN (memcpy, 5, 0)
     bdnz+ 4b
 8:
     /* calculate and store the final word */
+#ifdef __LITTLE_ENDIAN__
+    srw   0,6,10
+    slw   8,7,9
+#else
     slw   0,6,10
     srw   8,7,9
+#endif
     or    0,0,8
     stw   0,0(4)
 3:
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/memset.S b/libc/sysdeps/powerpc/powerpc32/power4/memset.S
index c2d288b38..4fd9d8cb4 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/memset.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/memset.S
@@ -50,7 +50,7 @@ L(_memset):
 
 /* Align to word boundary.  */
 	cmplwi	cr5, rLEN, 31
-	rlwimi	rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword.  */
+	insrdi	rCHR, rCHR, 8, 48     /* Replicate byte to halfword.  */
 	beq+	L(aligned)
 	mtcrf	0x01, rMEMP0
 	subfic	rALIGN, rALIGN, 4
@@ -65,7 +65,7 @@ L(g0):
 /* Handle the case of size < 31.  */
 L(aligned):
 	mtcrf	0x01, rLEN
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32    /* Replicate halfword to word.  */
 	ble	cr5, L(medium)
 /* Align to 32-byte boundary.  */
 	andi.	rALIGN, rMEMP, 0x1C
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S b/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S
index 724d9084a..89b961e78 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S
@@ -24,7 +24,7 @@
 
 EALIGN (strncmp, 4, 0)
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -37,6 +37,7 @@ EALIGN (strncmp, 4, 0)
 #define r7F7F	r9	/* constant 0x7f7f7f7f */
 #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f) */
 #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
+#define rTMP	r12
 
 	dcbt	0,rSTR1
 	or	rTMP, rSTR2, rSTR1
@@ -75,12 +76,45 @@ L(g1):	add	rTMP, rFEFE, rWORD1
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
 
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	slwi	rTMP, rTMP, 1
+	addi    rTMP2, rTMP, -1
+	andc    rTMP2, rTMP2, rTMP
+	and	rWORD2, rWORD2, rTMP2		/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rldimi	rTMP2, rWORD2, 24, 32
+	rldimi	rTMP, rWORD1, 24, 32
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr+
+	ori	rRTN, rTMP2, 1
+	blr
+
+L(different):
+	lwz	rWORD1, -4(rSTR1)
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rldimi	rTMP2, rWORD2, 24, 32
+	rldimi	rTMP, rWORD1, 24, 32
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr+
+	ori	rRTN, rTMP2, 1
+	blr
+
+#else
 L(endstring):
 	and	rTMP, r7F7F, rWORD1
 	beq	cr1, L(equal)
 	add	rTMP, rTMP, r7F7F
 	xor.	rBITDIF, rWORD1, rWORD2
-
 	andc	rNEG, rNEG, rTMP
 	blt-	L(highbit)
 	cntlzw	rBITDIF, rBITDIF
@@ -88,28 +122,20 @@ L(endstring):
 	addi	rNEG, rNEG, 7
 	cmpw	cr1, rNEG, rBITDIF
 	sub	rRTN, rWORD1, rWORD2
-	blt-	cr1, L(equal)
-	srawi	rRTN, rRTN, 31
-	ori	rRTN, rRTN, 1
-	blr
+	bgelr+	cr1
 L(equal):
 	li	rRTN, 0
 	blr
 
 L(different):
-	lwzu	rWORD1, -4(rSTR1)
+	lwz	rWORD1, -4(rSTR1)
 	xor.	rBITDIF, rWORD1, rWORD2
 	sub	rRTN, rWORD1, rWORD2
-	blt-	L(highbit)
-	srawi	rRTN, rRTN, 31
-	ori	rRTN, rRTN, 1
-	blr
+	bgelr+
 L(highbit):
-	srwi	rWORD2, rWORD2, 24
-	srwi	rWORD1, rWORD1, 24
-	sub	rRTN, rWORD1, rWORD2
+	ori	rRTN, rWORD2, 1
 	blr
-
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S b/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S
index ecd37c3cd..49c8a0866 100644
--- a/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S
+++ b/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S
@@ -39,8 +39,8 @@ ENTRY (__llround)
 	nop	/* Ensure the following load is in a different dispatch  */
 	nop	/* group to avoid pipe stall on POWER4&5.  */
 	nop
-	lwz	r4,12(r1)
-	lwz	r3,8(r1)
+	lwz	r3,8+HIWORD(r1)
+	lwz	r4,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__llround)
diff --git a/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S b/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S
index d4da625bb..780dd9ca4 100644
--- a/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S
+++ b/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S
@@ -38,7 +38,7 @@ ENTRY (__lround)
 	nop	/* Ensure the following load is in a different dispatch  */
 	nop	/* group to avoid pipe stall on POWER4&5.  */
 	nop
-	lwz	r3,12(r1)
+	lwz	r3,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__lround)
diff --git a/libc/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S b/libc/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S
index f2417fdf4..5f7ba43a2 100644
--- a/libc/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S
+++ b/libc/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S
@@ -27,8 +27,8 @@ EALIGN (__isnan, 4, 0)
 	ori	r1,r1,0
 	stfd	fp1,24(r1)	/* copy FPR to GPR */
 	ori	r1,r1,0
-	lwz	r4,24(r1)
-	lwz	r5,28(r1)
+	lwz	r4,24+HIWORD(r1)
+	lwz	r5,24+LOWORD(r1)
 	lis	r0,0x7ff0	/* const long r0 0x7ff00000 00000000 */
 	clrlwi	r4,r4,1		/* x = fabs(x) */
 	cmpw	cr7,r4,r0	/* if (fabs(x) =< inf) */
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S
index 2c095db1d..3ea18589c 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S
@@ -27,8 +27,8 @@ EALIGN (__isnan, 4, 0)
 	ori	r1,r1,0
 	stfd	fp1,24(r1)	/* copy FPR to GPR */
 	ori	r1,r1,0
-	lwz	r4,24(r1)
-	lwz	r5,28(r1)
+	lwz	r4,24+HIWORD(r1)
+	lwz	r5,24+LOWORD(r1)
 	lis	r0,0x7ff0	/* const long r0 0x7ff00000 00000000 */
 	clrlwi	r4,r4,1		/* x = fabs(x) */
 	cmpw	cr7,r4,r0	/* if (fabs(x) =< inf) */
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
index 3344b312e..c0660cf6e 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
@@ -29,8 +29,8 @@ ENTRY (__llrint)
 /* Insure the following load is in a different dispatch group by
    inserting "group ending nop".  */
 	ori	r1,r1,0
-	lwz	r3,8(r1)
-	lwz	r4,12(r1)
+	lwz	r3,8+HIWORD(r1)
+	lwz	r4,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__llrint)
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S
index 7f64f8d12..ce298905c 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S
@@ -28,8 +28,8 @@ ENTRY (__llrintf)
 /* Insure the following load is in a different dispatch group by
    inserting "group ending nop".  */
 	ori	r1,r1,0
-	lwz	r3,8(r1)
-	lwz	r4,12(r1)
+	lwz	r3,8+HIWORD(r1)
+	lwz	r4,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__llrintf)
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S
index 0ff04cb71..abb0840d1 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S
@@ -39,8 +39,8 @@ ENTRY (__llround)
 /* Insure the following load is in a different dispatch group by
    inserting "group ending nop".  */
 	ori	r1,r1,0
-	lwz	r4,12(r1)
-	lwz	r3,8(r1)
+	lwz	r3,8+HIWORD(r1)
+	lwz	r4,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__llround)
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S b/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S
index a76f71e04..f58114a0c 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S
@@ -219,15 +219,28 @@ L(word_unaligned_short):
     blt   cr6,5f
     srwi  7,6,16
     bgt	  cr6,3f
+#ifdef __LITTLE_ENDIAN__
+    sth   7,0(3)
+#else
     sth   6,0(3)
+#endif
     b     7f
     .align  4
 3:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,24
+    stb   6,0(3)
+    sth   7,1(3)
+#else
     stb   7,0(3)
     sth   6,1(3)
+#endif
     b     7f
     .align  4
 5:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,8
+#endif
     stb   6,0(3)
 7:
     cmplwi	cr1,10,16
@@ -577,7 +590,11 @@ L(wdu1_32):
     lwz     6,-1(4)
     cmplwi  cr6,31,4
     srwi    8,31,5    /* calculate the 32 byte loop count */
+#ifdef __LITTLE_ENDIAN__
+    srwi    6,6,8
+#else
     slwi    6,6,8
+#endif
     clrlwi  31,31,27   /* The remaining bytes, < 32.  */
     blt     cr5,L(wdu1_32tail)
     mtctr   8
@@ -585,8 +602,12 @@ L(wdu1_32):
 
     lwz   8,3(4)
     lwz   7,4(4)
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,24,32
+#else
 /*  Equivalent to: srwi   8,8,32-8;  or    6,6,8 */
     rlwimi 6,8,8,(32-8),31
+#endif
     b      L(wdu1_loop32x)
     .align  4
 L(wdu1_loop32):
@@ -595,8 +616,12 @@ L(wdu1_loop32):
     lwz   7,4(4)
     stw   10,-8(3)
     stw   11,-4(3)
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,24,32
+#else
 /*  Equivalent to  srwi   8,8,32-8; or    6,6,8 */
     rlwimi 6,8,8,(32-8),31
+#endif
 L(wdu1_loop32x):
     lwz   10,8(4)
     lwz   11,12(4)
@@ -613,7 +638,11 @@ L(wdu1_loop32x):
     stw   6,16(3)
     stw   7,20(3)
     addi  3,3,32
+#ifdef __LITTLE_ENDIAN__
+    srwi  6,8,8
+#else
     slwi  6,8,8
+#endif
     bdnz+ L(wdu1_loop32)
     stw   10,-8(3)
     stw   11,-4(3)
@@ -624,8 +653,12 @@ L(wdu1_32tail):
     blt     cr6,L(wdu_4tail)
     /* calculate and store the final word */
     lwz   8,3(4)
-/*  Equivalent to: srwi   8,8,32-9;  or    6,6,8  */
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,24,32
+#else
+/*  Equivalent to: srwi   8,8,32-8;  or    6,6,8  */
     rlwimi 6,8,8,(32-8),31
+#endif
     b     L(wdu_32tailx)
 
 L(wdu2_32):
@@ -633,7 +666,11 @@ L(wdu2_32):
     lwz     6,-2(4)
     cmplwi  cr6,31,4
     srwi    8,31,5    /* calculate the 32 byte loop count */
+#ifdef __LITTLE_ENDIAN__
+    srwi    6,6,16
+#else
     slwi    6,6,16
+#endif
     clrlwi  31,31,27   /* The remaining bytes, < 32.  */
     blt     cr5,L(wdu2_32tail)
     mtctr   8
@@ -641,8 +678,11 @@ L(wdu2_32):
 
     lwz   8,2(4)
     lwz   7,4(4)
-/*  Equivalent to: srwi   8,8,32-8;  or    6,6,8 */
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,16,32
+#else
     rlwimi 6,8,16,(32-16),31
+#endif
     b      L(wdu2_loop32x)
     .align  4
 L(wdu2_loop32):
@@ -651,8 +691,11 @@ L(wdu2_loop32):
     lwz   7,4(4)
     stw   10,-8(3)
     stw   11,-4(3)
-/*  Equivalent to  srwi   8,8,32-8; or    6,6,8 */
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,16,32
+#else
     rlwimi 6,8,16,(32-16),31
+#endif
 L(wdu2_loop32x):
     lwz   10,8(4)
     lwz   11,12(4)
@@ -670,7 +713,11 @@ L(wdu2_loop32x):
     stw   6,16(3)
     stw   7,20(3)
     addi  3,3,32
+#ifdef __LITTLE_ENDIAN__
+    srwi  6,8,16
+#else
     slwi  6,8,16
+#endif
     bdnz+ L(wdu2_loop32)
     stw   10,-8(3)
     stw   11,-4(3)
@@ -681,8 +728,11 @@ L(wdu2_32tail):
     blt     cr6,L(wdu_4tail)
     /* calculate and store the final word */
     lwz   8,2(4)
-/*  Equivalent to: srwi   8,8,32-9;  or    6,6,8  */
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,16,32
+#else
     rlwimi 6,8,16,(32-16),31
+#endif
     b     L(wdu_32tailx)
 
 L(wdu3_32):
@@ -690,7 +740,11 @@ L(wdu3_32):
     lwz     6,-3(4)
     cmplwi  cr6,31,4
     srwi    8,31,5    /* calculate the 32 byte loop count */
+#ifdef __LITTLE_ENDIAN__
+    srwi    6,6,24
+#else
     slwi    6,6,24
+#endif
     clrlwi  31,31,27   /* The remaining bytes, < 32.  */
     blt     cr5,L(wdu3_32tail)
     mtctr   8
@@ -698,8 +752,11 @@ L(wdu3_32):
 
     lwz   8,1(4)
     lwz   7,4(4)
-/*  Equivalent to: srwi   8,8,32-8;  or    6,6,8 */
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,8,32
+#else
     rlwimi 6,8,24,(32-24),31
+#endif
     b      L(wdu3_loop32x)
     .align  4
 L(wdu3_loop32):
@@ -708,8 +765,11 @@ L(wdu3_loop32):
     lwz   7,4(4)
     stw   10,-8(3)
     stw   11,-4(3)
-/*  Equivalent to  srwi   8,8,32-8; or    6,6,8 */
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,8,32
+#else
     rlwimi 6,8,24,(32-24),31
+#endif
 L(wdu3_loop32x):
     lwz   10,8(4)
     lwz   11,12(4)
@@ -726,7 +786,11 @@ L(wdu3_loop32x):
     stw   6,16(3)
     stw   7,20(3)
     addi  3,3,32
+#ifdef __LITTLE_ENDIAN__
+    srwi  6,8,24
+#else
     slwi  6,8,24
+#endif
     bdnz+ L(wdu3_loop32)
     stw   10,-8(3)
     stw   11,-4(3)
@@ -737,8 +801,11 @@ L(wdu3_32tail):
     blt     cr6,L(wdu_4tail)
     /* calculate and store the final word */
     lwz   8,1(4)
-/*  Equivalent to: srwi   8,8,32-9;  or    6,6,8  */
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,8,32
+#else
     rlwimi 6,8,24,(32-24),31
+#endif
     b     L(wdu_32tailx)
     .align  4
 L(wdu_32tailx):
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/memset.S b/libc/sysdeps/powerpc/powerpc32/power6/memset.S
index 8c23c8d13..a4b002a96 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/memset.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/memset.S
@@ -48,7 +48,7 @@ L(_memset):
 	ble-	cr1, L(small)
 /* Align to word boundary.  */
 	cmplwi	cr5, rLEN, 31
-	rlwimi	rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword.  */
+	insrdi	rCHR, rCHR, 8, 48	/* Replicate byte to halfword.  */
 	beq+	L(aligned)
 	mtcrf	0x01, rMEMP0
 	subfic	rALIGN, rALIGN, 4
@@ -64,7 +64,7 @@ L(g0):
 /* Handle the case of size < 31.  */
 L(aligned):
 	mtcrf	0x01, rLEN
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32	/* Replicate halfword to word.  */
 	ble	cr5, L(medium)
 /* Align to 32-byte boundary.  */
 	andi.	rALIGN, rMEMP, 0x1C
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S
index b2ab5bfe7..095c15547 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S
@@ -54,9 +54,8 @@ ENTRY (__finite)
 	stfd    fp1,8(r1)     /* Transfer FP to GPR's.  */
 
 	ori	2,2,0	      /* Force a new dispatch group.  */
-	lhz     r0,8(r1)      /* Fetch the upper portion of the high word of
-			      the FP value (where the exponent and sign bits
-			      are).  */
+	lhz	r0,8+HISHORT(r1) /* Fetch the upper 16 bits of the FP value
+				    (biased exponent and sign bit).  */
 	clrlwi	r0,r0,17      /* r0 = abs(r0).  */
 	addi	r1,r1,16      /* Reset the stack pointer.  */
 	cmpwi	cr7,r0,0x7ff0 /* r4 == 0x7ff0?.  */
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S
index 3f8af60a5..0101c8fa1 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S
@@ -48,14 +48,13 @@ ENTRY (__isinf)
 	li	r3,0
 	bflr    29	      /* If not INF, return.  */
 
-	/* Either we have -INF/+INF or a denormal.  */
+	/* Either we have +INF or -INF.  */
 
 	stwu    r1,-16(r1)    /* Allocate stack space.  */
 	stfd    fp1,8(r1)     /* Transfer FP to GPR's.  */
 	ori	2,2,0	      /* Force a new dispatch group.  */
-	lhz	r4,8(r1)      /* Fetch the upper portion of the high word of
-			      the FP value (where the exponent and sign bits
-			      are).  */
+	lhz	r4,8+HISHORT(r1) /* Fetch the upper 16 bits of the FP value
+				    (biased exponent and sign bit).  */
 	addi	r1,r1,16      /* Reset the stack pointer.  */
 	cmpwi	cr7,r4,0x7ff0 /* r4 == 0x7ff0?  */
 	li	r3,1
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
index 99ff12696..0ad1dcf1f 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
@@ -53,8 +53,8 @@ ENTRY (__isnan)
 	stwu	r1,-16(r1)    /* Allocate stack space.  */
 	stfd	fp1,8(r1)     /* Transfer FP to GPR's.  */
 	ori	2,2,0	      /* Force a new dispatch group.  */
-	lwz     r4,8(r1)      /* Load the upper half of the FP value.  */
-	lwz     r5,12(r1)     /* Load the lower half of the FP value.  */
+	lwz     r4,8+HIWORD(r1) /* Load the upper half of the FP value.  */
+	lwz     r5,8+LOWORD(r1) /* Load the lower half of the FP value.  */
 	addi	r1,r1,16      /* Reset the stack pointer.  */
 	lis     r0,0x7ff0     /* Load the upper portion for an INF/NaN.  */
 	clrlwi  r4,r4,1	      /* r4 = abs(r4).  */
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c
index e008ed0c3..1c82577f5 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c
@@ -35,14 +35,14 @@ static const union {
 long double
 __logbl (long double x)
 {
-  double xh, xl;
+  double xh;
   double ret;
 
   if (__builtin_expect (x == 0.0L, 0))
     /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF].  */
     return -1.0L / __builtin_fabsl (x);
 
-  ldbl_unpack (x, &xh, &xl);
+  xh = ldbl_high (x);
   /* ret = x & 0x7ff0000000000000;  */
   asm (
     "xxland %x0,%x1,%x2\n"
@@ -58,9 +58,9 @@ __logbl (long double x)
     {
       /* POSIX specifies that denormal number is treated as
          though it were normalized.  */
-      int64_t lx, hx;
+      int64_t hx;
 
-      GET_LDOUBLE_WORDS64 (hx, lx, x);
+      EXTRACT_WORDS64 (hx, xh);
       return (long double) (-1023 - (__builtin_clzll (hx) - 12));
     }
   /* Test to avoid logb_downward (0.0) == -0.0.  */
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memchr.S b/libc/sysdeps/powerpc/powerpc32/power7/memchr.S
index 369e5e048..85754f3f1 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/memchr.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memchr.S
@@ -25,107 +25,111 @@ ENTRY (__memchr)
 	CALL_MCOUNT
 	dcbt	0,r3
 	clrrwi  r8,r3,2
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
 	add	r7,r3,r5      /* Calculate the last acceptable address.  */
+	insrdi	r4,r4,16,32
 	cmplwi	r5,16
+	li	r9, -1
+	rlwinm	r6,r3,3,27,28 /* Calculate padding.  */
+	addi	r7,r7,-1
+#ifdef __LITTLE_ENDIAN__
+	slw	r9,r9,r6
+#else
+	srw	r9,r9,r6
+#endif
 	ble	L(small_range)
 
-	cmplw	cr7,r3,r7     /* Compare the starting address (r3) with the
-				 ending address (r7).  If (r3 >= r7), the size
-				 passed in is zero or negative.  */
-	ble	cr7,L(proceed)
-
-	li	r7,-1	      /* Artificially set our ending address (r7)
-				 such that we will exit early. */
-L(proceed):
-	rlwinm	r6,r3,3,27,28 /* Calculate padding.  */
-	cmpli	cr6,r6,0      /* cr6 == Do we have padding?  */
 	lwz	r12,0(r8)     /* Load word from memory.  */
-	cmpb	r10,r12,r4    /* Check for BYTEs in WORD1.  */
-	beq	cr6,L(proceed_no_padding)
-	slw	r10,r10,r6
-	srw	r10,r10,r6
-L(proceed_no_padding):
-	cmplwi	cr7,r10,0     /* If r10 == 0, no BYTEs have been found.  */
+	cmpb	r3,r12,r4     /* Check for BYTEs in WORD1.  */
+	and	r3,r3,r9
+	clrlwi	r5,r7,30      /* Byte count - 1 in last word.  */
+	clrrwi	r7,r7,2       /* Address of last word.  */
+	cmplwi	cr7,r3,0      /* If r3 == 0, no BYTEs have been found.  */
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,4
-	cmplw	cr6,r9,r7
-	bge	cr6,L(null)
-
 	mtcrf   0x01,r8
 	/* Are we now aligned to a doubleword boundary?  If so, skip to
 	   the main loop.  Otherwise, go through the alignment code.  */
-
 	bt	29,L(loop_setup)
 
 	/* Handle WORD2 of pair.  */
 	lwzu	r12,4(r8)
-	cmpb	r10,r12,r4
-	cmplwi	cr7,r10,0
+	cmpb	r3,r12,r4
+	cmplwi	cr7,r3,0
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,4
-	cmplw	cr6,r9,r7
-	bge	cr6,L(null)
-
 L(loop_setup):
-	sub	r5,r7,r9
-	srwi	r6,r5,3	      /* Number of loop iterations.  */
+	/* The last word we want to read in the loop below is the one
+	   containing the last byte of the string, ie. the word at
+	   (s + size - 1) & ~3, or r7.  The first word read is at
+	   r8 + 4, we read 2 * cnt words, so the last word read will
+	   be at r8 + 4 + 8 * cnt - 4.  Solving for cnt gives
+	   cnt = (r7 - r8) / 8  */
+	sub	r6,r7,r8
+	srwi	r6,r6,3	      /* Number of loop iterations.  */
 	mtctr	r6            /* Setup the counter.  */
-	b	L(loop)
-	/* Main loop to look for BYTE backwards in the string.  Since
-	   it's a small loop (< 8 instructions), align it to 32-bytes.  */
-	.p2align  5
+
+	/* Main loop to look for BYTE in the string.  Since
+	   it's a small loop (8 instructions), align it to 32-bytes.  */
+	.align	5
 L(loop):
 	/* Load two words, compare and merge in a
 	   single register for speed.  This is an attempt
 	   to speed up the byte-checking process for bigger strings.  */
 	lwz	r12,4(r8)
 	lwzu	r11,8(r8)
-	cmpb	r10,r12,r4
+	cmpb	r3,r12,r4
 	cmpb	r9,r11,r4
-	or	r5,r9,r10     /* Merge everything in one word.  */
-	cmplwi	cr7,r5,0
+	or	r6,r9,r3      /* Merge everything in one word.  */
+	cmplwi	cr7,r6,0
 	bne	cr7,L(found)
 	bdnz	L(loop)
 
-	/* We're here because the counter reached 0, and that means we
-	   didn't have any matches for BYTE in the whole range.  */
-	subi	r11,r7,4
-	cmplw	cr6,r8,r11
-	blt	cr6,L(loop_small)
-	b	L(null)
+	/* We may have one more dword to read.  */
+	cmplw	r8,r7
+	beqlr
+
+	lwzu	r12,4(r8)
+	cmpb	r3,r12,r4
+	cmplwi	cr6,r3,0
+	bne	cr6,L(done)
+	blr
 
+	.align	4
+L(found):
 	/* OK, one (or both) of the words contains BYTE.  Check
 	   the first word and decrement the address in case the first
 	   word really contains BYTE.  */
-	.align	4
-L(found):
-	cmplwi	cr6,r10,0
+	cmplwi	cr6,r3,0
 	addi	r8,r8,-4
 	bne	cr6,L(done)
 
 	/* BYTE must be in the second word.  Adjust the address
-	   again and move the result of cmpb to r10 so we can calculate the
+	   again and move the result of cmpb to r3 so we can calculate the
 	   pointer.  */
 
-	mr	r10,r9
+	mr	r3,r9
 	addi	r8,r8,4
 
-	/* r10 has the output of the cmpb instruction, that is, it contains
+	/* r3 has the output of the cmpb instruction, that is, it contains
 	   0xff in the same position as BYTE in the original
 	   word from the string.  Use that to calculate the pointer.
 	   We need to make sure BYTE is *before* the end of the range.  */
 L(done):
-	cntlzw	r0,r10	      /* Count leading zeroes before the match.  */
-	srwi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r3,-1
+	andc    r0,r0,r3
+	popcntw	r0,r0	      /* Count trailing zeros.  */
+#else
+	cntlzw	r0,r3	      /* Count leading zeros before the match.  */
+#endif
+	cmplw	r8,r7         /* Are we on the last word?  */
+	srwi	r0,r0,3	      /* Convert leading/trailing zeros to bytes.  */
 	add	r3,r8,r0
-	cmplw	r3,r7
-	bge	L(null)
+	cmplw	cr7,r0,r5     /* If on the last dword, check byte offset.  */
+	bnelr
+	blelr	cr7
+	li	r3,0
 	blr
 
 	.align	4
@@ -137,67 +141,42 @@ L(null):
 	.align	4
 L(small_range):
 	cmplwi	r5,0
-	rlwinm	r6,r3,3,27,28 /* Calculate padding.  */
-	beq	L(null)       /* This branch is for the cmplwi r5,0 above */
+	beq	L(null)
 	lwz	r12,0(r8)     /* Load word from memory.  */
-	cmplwi	cr6,r6,0      /* cr6 == Do we have padding?  */
-	cmpb	r10,r12,r4    /* Check for BYTE in DWORD1.  */
-	beq	cr6,L(small_no_padding)
-	slw	r10,r10,r6
-	srw	r10,r10,r6
-L(small_no_padding):
-	cmplwi	cr7,r10,0
+	cmpb	r3,r12,r4     /* Check for BYTE in DWORD1.  */
+	and	r3,r3,r9
+	cmplwi	cr7,r3,0
+	clrlwi	r5,r7,30      /* Byte count - 1 in last word.  */
+	clrrwi	r7,r7,2       /* Address of last word.  */
+	cmplw	r8,r7         /* Are we done already?  */
 	bne	cr7,L(done)
+	beqlr
 
-	/* Are we done already?  */
-	addi    r9,r8,4
-	cmplw	r9,r7
-	bge	L(null)
-
-L(loop_small):                /* loop_small has been unrolled.  */
 	lwzu	r12,4(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,4
-	cmplwi	cr6,r10,0
-	cmplw	r9,r7
+	cmpb	r3,r12,r4
+	cmplwi	cr6,r3,0
+	cmplw	r8,r7
 	bne	cr6,L(done)
-	bge	L(null)
+	beqlr
 
 	lwzu	r12,4(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,4
-	cmplwi	cr6,r10,0
-	cmplw	r9,r7
+	cmpb	r3,r12,r4
+	cmplwi	cr6,r3,0
+	cmplw	r8,r7
 	bne	cr6,L(done)
-	bge	L(null)
+	beqlr
 
 	lwzu	r12,4(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,4
-	cmplwi	cr6,r10,0
-	cmplw	r9,r7
+	cmpb	r3,r12,r4
+	cmplwi	cr6,r3,0
+	cmplw	r8,r7
 	bne	cr6,L(done)
-	bge	L(null)
+	beqlr
 
 	lwzu	r12,4(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,4
-	cmplwi	cr6,r10,0
-	cmplw	r9,r7
+	cmpb	r3,r12,r4
+	cmplwi	cr6,r3,0
 	bne	cr6,L(done)
-	bge	L(null)
-
-	/* For most cases we will never get here.  Under some combinations of
-	   padding + length there is a leftover word that still needs to be
-	   checked.  */
-	lwzu	r12,4(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,4
-	cmplwi	cr6,r10,0
-	bne	cr6,L(done)
-
-	/* save a branch and exit directly */
-	li	r3,0
 	blr
 
 END (__memchr)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memcmp.S b/libc/sysdeps/powerpc/powerpc32/power7/memcmp.S
index 075e19f14..f160ddebf 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/memcmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memcmp.S
@@ -23,10 +23,9 @@
 		    size_t size [r5])  */
 
 	.machine power7
-EALIGN (memcmp,4,0)
+EALIGN (memcmp, 4, 0)
 	CALL_MCOUNT
 
-#define rTMP	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -37,35 +36,32 @@ EALIGN (memcmp,4,0)
 #define rWORD4	r9	/* next word in s2 */
 #define rWORD5	r10	/* next word in s1 */
 #define rWORD6	r11	/* next word in s2 */
-#define rBITDIF	r12	/* bits that differ in s1 & s2 words */
 #define rWORD7	r30	/* next word in s1 */
 #define rWORD8	r31	/* next word in s2 */
 
-	xor	rTMP,rSTR2,rSTR1
-	cmplwi	cr6,rN,0
-	cmplwi	cr1,rN,12
-	clrlwi.	rTMP,rTMP,30
-	clrlwi	rBITDIF,rSTR1,30
-	cmplwi	cr5,rBITDIF,0
-	beq-	cr6,L(zeroLength)
-	dcbt	0,rSTR1
-	dcbt	0,rSTR2
-
-	/* If less than 8 bytes or not aligned, use the unaligned
-	   byte loop.  */
-
-	blt	cr1,L(bytealigned)
-	stwu	1,-64(1)
+	xor	r0, rSTR2, rSTR1
+	cmplwi	cr6, rN, 0
+	cmplwi	cr1, rN, 12
+	clrlwi.	r0, r0, 30
+	clrlwi	r12, rSTR1, 30
+	cmplwi	cr5, r12, 0
+	beq-	cr6, L(zeroLength)
+	dcbt	0, rSTR1
+	dcbt	0, rSTR2
+/* If less than 8 bytes or not aligned, use the unaligned
+   byte loop.  */
+	blt	cr1, L(bytealigned)
+	stwu	1, -64(r1)
 	cfi_adjust_cfa_offset(64)
-	stw	r31,48(1)
-	cfi_offset(31,(48-64))
-	stw	r30,44(1)
-	cfi_offset(30,(44-64))
+	stw	rWORD8, 48(r1)
+	cfi_offset(rWORD8, (48-64))
+	stw	rWORD7, 44(r1)
+	cfi_offset(rWORD7, (44-64))
 	bne	L(unaligned)
 /* At this point we know both strings have the same alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    2 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then we are already word
+   of r12 to 0.  If r12 == 0 then we are already word
    aligned and can perform the word aligned loop.
 
    Otherwise we know the two strings have the same alignment (but not
@@ -74,332 +70,541 @@ EALIGN (memcmp,4,0)
    eliminate bits preceding the first byte.  Since we want to join the
    normal (word aligned) compare loop, starting at the second word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first word. This insures that the loop count is
+   versioning for the first word. This ensures that the loop count is
    correct and the first word (shifted) is in the expected register pair. */
 	.align	4
 L(samealignment):
-	clrrwi	rSTR1,rSTR1,2
-	clrrwi	rSTR2,rSTR2,2
-	beq	cr5,L(Waligned)
-	add	rN,rN,rBITDIF
-	slwi	r11,rBITDIF,3
-	srwi	rTMP,rN,4	/* Divide by 16 */
-	andi.	rBITDIF,rN,12	/* Get the word remainder */
-	lwz	rWORD1,0(rSTR1)
-	lwz	rWORD2,0(rSTR2)
-	cmplwi	cr1,rBITDIF,8
-	cmplwi	cr7,rN,16
-	clrlwi	rN,rN,30
+	clrrwi	rSTR1, rSTR1, 2
+	clrrwi	rSTR2, rSTR2, 2
+	beq	cr5, L(Waligned)
+	add	rN, rN, r12
+	slwi	rWORD6, r12, 3
+	srwi	r0, rN, 4	/* Divide by 16 */
+	andi.	r12, rN, 12	/* Get the word remainder */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 0(rSTR1)
+	lwz	rWORD2, 0(rSTR2)
+#endif
+	cmplwi	cr1, r12, 8
+	cmplwi	cr7, rN, 16
+	clrlwi	rN, rN, 30
 	beq	L(dPs4)
-	mtctr	rTMP
-	bgt	cr1,L(dPs3)
-	beq	cr1,L(dPs2)
+	mtctr	r0
+	bgt	cr1, L(dPs3)
+	beq	cr1, L(dPs2)
 
 /* Remainder is 4 */
 	.align	3
 L(dsP1):
-	slw	rWORD5,rWORD1,r11
-	slw	rWORD6,rWORD2,r11
-	cmplw	cr5,rWORD5,rWORD6
-	blt	cr7,L(dP1x)
+	slw	rWORD5, rWORD1, rWORD6
+	slw	rWORD6, rWORD2, rWORD6
+	cmplw	cr5, rWORD5, rWORD6
+	blt	cr7, L(dP1x)
 /* Do something useful in this cycle since we have to branch anyway.  */
-	lwz	rWORD1,4(rSTR1)
-	lwz	rWORD2,4(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 	b	L(dP1e)
 /* Remainder is 8 */
 	.align	4
 L(dPs2):
-	slw	rWORD5,rWORD1,r11
-	slw	rWORD6,rWORD2,r11
-	cmplw	cr6,rWORD5,rWORD6
-	blt	cr7,L(dP2x)
+	slw	rWORD5, rWORD1, rWORD6
+	slw	rWORD6, rWORD2, rWORD6
+	cmplw	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP2x)
 /* Do something useful in this cycle since we have to branch anyway.  */
-	lwz	rWORD7,4(rSTR1)
-	lwz	rWORD8,4(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD7, 4(rSTR1)
+	lwz	rWORD8, 4(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
 	b	L(dP2e)
 /* Remainder is 12 */
 	.align	4
 L(dPs3):
-	slw	rWORD3,rWORD1,r11
-	slw	rWORD4,rWORD2,r11
-	cmplw	cr1,rWORD3,rWORD4
+	slw	rWORD3, rWORD1, rWORD6
+	slw	rWORD4, rWORD2, rWORD6
+	cmplw	cr1, rWORD3, rWORD4
 	b	L(dP3e)
 /* Count is a multiple of 16, remainder is 0 */
 	.align	4
 L(dPs4):
-	mtctr	rTMP
-	slw	rWORD1,rWORD1,r11
-	slw	rWORD2,rWORD2,r11
-	cmplw	cr0,rWORD1,rWORD2
+	mtctr	r0
+	slw	rWORD1, rWORD1, rWORD6
+	slw	rWORD2, rWORD2, rWORD6
+	cmplw	cr7, rWORD1, rWORD2
 	b	L(dP4e)
 
 /* At this point we know both strings are word aligned and the
    compare length is at least 8 bytes.  */
 	.align	4
 L(Waligned):
-	andi.	rBITDIF,rN,12	/* Get the word remainder */
-	srwi	rTMP,rN,4	/* Divide by 16 */
-	cmplwi	cr1,rBITDIF,8
-	cmplwi	cr7,rN,16
-	clrlwi	rN,rN,30
+	andi.	r12, rN, 12	/* Get the word remainder */
+	srwi	r0, rN, 4	/* Divide by 16 */
+	cmplwi	cr1, r12, 8
+	cmplwi	cr7, rN, 16
+	clrlwi	rN, rN, 30
 	beq	L(dP4)
-	bgt	cr1,L(dP3)
-	beq	cr1,L(dP2)
+	bgt	cr1, L(dP3)
+	beq	cr1, L(dP2)
 
 /* Remainder is 4 */
 	.align	4
 L(dP1):
-	mtctr	rTMP
+	mtctr	r0
 /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
    (8-15 byte compare), we want to use only volatile registers.  This
    means we can avoid restoring non-volatile registers since we did not
    change any on the early exit path.  The key here is the non-early
    exit path only cares about the condition code (cr5), not about which
    register pair was used.  */
-	lwz	rWORD5,0(rSTR1)
-	lwz	rWORD6,0(rSTR2)
-	cmplw	cr5,rWORD5,rWORD6
-	blt	cr7,L(dP1x)
-	lwz	rWORD1,4(rSTR1)
-	lwz	rWORD2,4(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 0(rSTR1)
+	lwz	rWORD6, 0(rSTR2)
+#endif
+	cmplw	cr5, rWORD5, rWORD6
+	blt	cr7, L(dP1x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 L(dP1e):
-	lwz	rWORD3,8(rSTR1)
-	lwz	rWORD4,8(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	lwz	rWORD5,12(rSTR1)
-	lwz	rWORD6,12(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	bne	cr5,L(dLcr5)
-	bne	cr0,L(dLcr0)
-
-	lwzu	rWORD7,16(rSTR1)
-	lwzu	rWORD8,16(rSTR2)
-	bne	cr1,L(dLcr1)
-	cmplw	cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 8(rSTR1)
+	lwz	rWORD4, 8(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 12(rSTR1)
+	lwz	rWORD6, 12(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5x)
+	bne	cr7, L(dLcr7x)
+
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwzu	rWORD7, 16(rSTR1)
+	lwzu	rWORD8, 16(rSTR2)
+#endif
+	bne	cr1, L(dLcr1)
+	cmplw	cr5, rWORD7, rWORD8
 	bdnz	L(dLoop)
-	bne	cr6,L(dLcr6)
-	lwz	r30,44(1)
-	lwz	r31,48(1)
+	bne	cr6, L(dLcr6)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
 	.align	3
 L(dP1x):
-	slwi.	r12,rN,3
-	bne	cr5,L(dLcr5)
-	subfic	rN,r12,32	/* Shift count is 32 - (rN * 8).  */
-	lwz	1,0(1)
+	slwi.	r12, rN, 3
+	bne	cr5, L(dLcr5x)
+	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
 	bne	L(d00)
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 /* Remainder is 8 */
 	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dP2):
-	mtctr	rTMP
-	lwz	rWORD5,0(rSTR1)
-	lwz	rWORD6,0(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	blt	cr7,L(dP2x)
-	lwz	rWORD7,4(rSTR1)
-	lwz	rWORD8,4(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
+	mtctr	r0
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 0(rSTR1)
+	lwz	rWORD6, 0(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP2x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD7, 4(rSTR1)
+	lwz	rWORD8, 4(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
 L(dP2e):
-	lwz	rWORD1,8(rSTR1)
-	lwz	rWORD2,8(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
-	lwz	rWORD3,12(rSTR1)
-	lwz	rWORD4,12(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	addi	rSTR1,rSTR1,4
-	addi	rSTR2,rSTR2,4
-	bne	cr6,L(dLcr6)
-	bne	cr5,L(dLcr5)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 8(rSTR1)
+	lwz	rWORD2, 8(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 12(rSTR1)
+	lwz	rWORD4, 12(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#endif
+	bne	cr6, L(dLcr6)
+	bne	cr5, L(dLcr5)
 	b	L(dLoop2)
 /* Again we are on a early exit path (16-23 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
 	.align	4
 L(dP2x):
-	lwz	rWORD3,4(rSTR1)
-	lwz	rWORD4,4(rSTR2)
-	cmplw	cr5,rWORD3,rWORD4
-	slwi.	r12,rN,3
-	bne	cr6,L(dLcr6)
-	addi	rSTR1,rSTR1,4
-	addi	rSTR2,rSTR2,4
-	bne	cr5,L(dLcr5)
-	subfic	rN,r12,32	/* Shift count is 32 - (rN * 8).  */
-	lwz	1,0(1)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 4(rSTR1)
+	lwz	rWORD4, 4(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+	slwi.	r12, rN, 3
+	bne	cr6, L(dLcr6x)
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#endif
+	bne	cr1, L(dLcr1x)
+	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
 	bne	L(d00)
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 /* Remainder is 12 */
 	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dP3):
-	mtctr	rTMP
-	lwz	rWORD3,0(rSTR1)
-	lwz	rWORD4,0(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
+	mtctr	r0
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 0(rSTR1)
+	lwz	rWORD4, 0(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
 L(dP3e):
-	lwz	rWORD5,4(rSTR1)
-	lwz	rWORD6,4(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	blt	cr7,L(dP3x)
-	lwz	rWORD7,8(rSTR1)
-	lwz	rWORD8,8(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
-	lwz	rWORD1,12(rSTR1)
-	lwz	rWORD2,12(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	bne	cr1,L(dLcr1)
-	bne	cr6,L(dLcr6)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 4(rSTR1)
+	lwz	rWORD6, 4(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP3x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD7, 8(rSTR1)
+	lwz	rWORD8, 8(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 12(rSTR1)
+	lwz	rWORD2, 12(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+	bne	cr1, L(dLcr1)
+	bne	cr6, L(dLcr6)
 	b	L(dLoop1)
 /* Again we are on a early exit path (24-31 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
 	.align	4
 L(dP3x):
-	lwz	rWORD1,8(rSTR1)
-	lwz	rWORD2,8(rSTR2)
-	cmplw	cr5,rWORD1,rWORD2
-	slwi.	r12,rN,3
-	bne	cr1,L(dLcr1)
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	bne	cr6,L(dLcr6)
-	subfic	rN,r12,32	/* Shift count is 32 - (rN * 8).  */
-	bne	cr5,L(dLcr5)
-	lwz	1,0(1)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 8(rSTR1)
+	lwz	rWORD2, 8(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+	slwi.	r12, rN, 3
+	bne	cr1, L(dLcr1x)
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+	bne	cr6, L(dLcr6x)
+	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
+	bne	cr7, L(dLcr7x)
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
 	bne	L(d00)
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 /* Count is a multiple of 16, remainder is 0 */
 	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dP4):
-	mtctr	rTMP
-	lwz	rWORD1,0(rSTR1)
-	lwz	rWORD2,0(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
+	mtctr	r0
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 0(rSTR1)
+	lwz	rWORD2, 0(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 L(dP4e):
-	lwz	rWORD3,4(rSTR1)
-	lwz	rWORD4,4(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	lwz	rWORD5,8(rSTR1)
-	lwz	rWORD6,8(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	lwzu	rWORD7,12(rSTR1)
-	lwzu	rWORD8,12(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
-	bne	cr0,L(dLcr0)
-	bne	cr1,L(dLcr1)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 4(rSTR1)
+	lwz	rWORD4, 4(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 8(rSTR1)
+	lwz	rWORD6, 8(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwzu	rWORD7, 12(rSTR1)
+	lwzu	rWORD8, 12(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr7, L(dLcr7)
+	bne	cr1, L(dLcr1)
 	bdz-	L(d24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
 	.align	4
 L(dLoop):
-	lwz	rWORD1,4(rSTR1)
-	lwz	rWORD2,4(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	bne	cr6,L(dLcr6)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+	bne	cr6, L(dLcr6)
 L(dLoop1):
-	lwz	rWORD3,8(rSTR1)
-	lwz	rWORD4,8(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	bne	cr5,L(dLcr5)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 8(rSTR1)
+	lwz	rWORD4, 8(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5)
 L(dLoop2):
-	lwz	rWORD5,12(rSTR1)
-	lwz	rWORD6,12(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
-	bne	cr0,L(dLcr0)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 12(rSTR1)
+	lwz	rWORD6, 12(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr7, L(dLcr7)
 L(dLoop3):
-	lwzu	rWORD7,16(rSTR1)
-	lwzu	rWORD8,16(rSTR2)
-	bne	cr1,L(dLcr1)
-	cmplw	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwzu	rWORD7, 16(rSTR1)
+	lwzu	rWORD8, 16(rSTR2)
+#endif
+	bne	cr1, L(dLcr1)
+	cmplw	cr7, rWORD1, rWORD2
 	bdnz	L(dLoop)
 
 L(dL4):
-	cmplw	cr1,rWORD3,rWORD4
-	bne	cr6,L(dLcr6)
-	cmplw	cr6,rWORD5,rWORD6
-	bne	cr5,L(dLcr5)
-	cmplw	cr5,rWORD7,rWORD8
+	cmplw	cr1, rWORD3, rWORD4
+	bne	cr6, L(dLcr6)
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5)
+	cmplw	cr5, rWORD7, rWORD8
 L(d44):
-	bne	cr0,L(dLcr0)
+	bne	cr7, L(dLcr7)
 L(d34):
-	bne	cr1,L(dLcr1)
+	bne	cr1, L(dLcr1)
 L(d24):
-	bne	cr6,L(dLcr6)
+	bne	cr6, L(dLcr6)
 L(d14):
-	slwi.	r12,rN,3
-	bne	cr5,L(dLcr5)
+	slwi.	r12, rN, 3
+	bne	cr5, L(dLcr5)
 L(d04):
-	lwz	r30,44(1)
-	lwz	r31,48(1)
-	lwz	1,0(1)
-	subfic	rN,r12,32	/* Shift count is 32 - (rN * 8).  */
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
+	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
 	beq	L(zeroLength)
 /* At this point we have a remainder of 1 to 3 bytes to compare.  Since
    we are aligned it is safe to load the whole word, and use
-   shift right to eliminate bits beyond the compare length. */
+   shift right to eliminate bits beyond the compare length.  */
 L(d00):
-	lwz	rWORD1,4(rSTR1)
-	lwz	rWORD2,4(rSTR2)
-	srw	rWORD1,rWORD1,rN
-	srw	rWORD2,rWORD2,rN
-	cmplw	rWORD1,rWORD2
-	li	rRTN,0
-	beqlr
-	li	rRTN,1
-	bgtlr
-	li	rRTN,-1
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	srw	rWORD1, rWORD1, rN
+	srw	rWORD2, rWORD2, rN
+	sub	rRTN, rWORD1, rWORD2
 	blr
 
 	.align	4
-L(dLcr0):
-	lwz	r30,44(1)
-	lwz	r31,48(1)
-	li	rRTN,1
-	lwz	1,0(1)
-	bgtlr	cr0
-	li	rRTN,-1
+	cfi_adjust_cfa_offset(64)
+L(dLcr7):
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+L(dLcr7x):
+	li	rRTN, 1
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
+	bgtlr	cr7
+	li	rRTN, -1
 	blr
 	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dLcr1):
-	lwz	r30,44(1)
-	lwz	r31,48(1)
-	li	rRTN,1
-	lwz	1,0(1)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+L(dLcr1x):
+	li	rRTN, 1
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
 	bgtlr	cr1
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dLcr6):
-	lwz	r30,44(1)
-	lwz	r31,48(1)
-	li	rRTN,1
-	lwz	1,0(1)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+L(dLcr6x):
+	li	rRTN, 1
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
 	bgtlr	cr6
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dLcr5):
-	lwz	r30,44(1)
-	lwz	r31,48(1)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
 L(dLcr5x):
-	li	rRTN,1
-	lwz	1,0(1)
+	li	rRTN, 1
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
 	bgtlr	cr5
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 
 	.align	4
 L(bytealigned):
-	cfi_adjust_cfa_offset(-64)
 	mtctr	rN
 
 /* We need to prime this loop.  This loop is swing modulo scheduled
@@ -411,38 +616,39 @@ L(bytealigned):
 
    So we must precondition some registers and condition codes so that
    we don't exit the loop early on the first iteration.  */
-	lbz	rWORD1,0(rSTR1)
-	lbz	rWORD2,0(rSTR2)
+
+	lbz	rWORD1, 0(rSTR1)
+	lbz	rWORD2, 0(rSTR2)
 	bdz	L(b11)
-	cmplw	cr0,rWORD1,rWORD2
-	lbz	rWORD3,1(rSTR1)
-	lbz	rWORD4,1(rSTR2)
+	cmplw	cr7, rWORD1, rWORD2
+	lbz	rWORD3, 1(rSTR1)
+	lbz	rWORD4, 1(rSTR2)
 	bdz	L(b12)
-	cmplw	cr1,rWORD3,rWORD4
-	lbzu	rWORD5,2(rSTR1)
-	lbzu	rWORD6,2(rSTR2)
+	cmplw	cr1, rWORD3, rWORD4
+	lbzu	rWORD5, 2(rSTR1)
+	lbzu	rWORD6, 2(rSTR2)
 	bdz	L(b13)
 	.align	4
 L(bLoop):
-	lbzu	rWORD1,1(rSTR1)
-	lbzu	rWORD2,1(rSTR2)
-	bne	cr0,L(bLcr0)
+	lbzu	rWORD1, 1(rSTR1)
+	lbzu	rWORD2, 1(rSTR2)
+	bne	cr7, L(bLcr7)
 
-	cmplw	cr6,rWORD5,rWORD6
+	cmplw	cr6, rWORD5, rWORD6
 	bdz	L(b3i)
 
-	lbzu	rWORD3,1(rSTR1)
-	lbzu	rWORD4,1(rSTR2)
-	bne	cr1,L(bLcr1)
+	lbzu	rWORD3, 1(rSTR1)
+	lbzu	rWORD4, 1(rSTR2)
+	bne	cr1, L(bLcr1)
 
-	cmplw	cr0,rWORD1,rWORD2
+	cmplw	cr7, rWORD1, rWORD2
 	bdz	L(b2i)
 
-	lbzu	rWORD5,1(rSTR1)
-	lbzu	rWORD6,1(rSTR2)
-	bne	cr6,L(bLcr6)
+	lbzu	rWORD5, 1(rSTR1)
+	lbzu	rWORD6, 1(rSTR2)
+	bne	cr6, L(bLcr6)
 
-	cmplw	cr1,rWORD3,rWORD4
+	cmplw	cr1, rWORD3, rWORD4
 	bdnz	L(bLoop)
 
 /* We speculatively loading bytes before we have tested the previous
@@ -452,67 +658,62 @@ L(bLoop):
    tested.  In this case we must complete the pending operations
    before returning.  */
 L(b1i):
-	bne	cr0,L(bLcr0)
-	bne	cr1,L(bLcr1)
+	bne	cr7, L(bLcr7)
+	bne	cr1, L(bLcr1)
 	b	L(bx56)
 	.align	4
 L(b2i):
-	bne	cr6,L(bLcr6)
-	bne	cr0,L(bLcr0)
+	bne	cr6, L(bLcr6)
+	bne	cr7, L(bLcr7)
 	b	L(bx34)
 	.align	4
 L(b3i):
-	bne	cr1,L(bLcr1)
-	bne	cr6,L(bLcr6)
+	bne	cr1, L(bLcr1)
+	bne	cr6, L(bLcr6)
 	b	L(bx12)
 	.align	4
-L(bLcr0):
-	li	rRTN,1
-	bgtlr	cr0
-	li	rRTN,-1
+L(bLcr7):
+	li	rRTN, 1
+	bgtlr	cr7
+	li	rRTN, -1
 	blr
 L(bLcr1):
-	li	rRTN,1
+	li	rRTN, 1
 	bgtlr	cr1
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 L(bLcr6):
-	li	rRTN,1
+	li	rRTN, 1
 	bgtlr	cr6
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 
 L(b13):
-	bne	cr0,L(bx12)
-	bne	cr1,L(bx34)
+	bne	cr7, L(bx12)
+	bne	cr1, L(bx34)
 L(bx56):
-	sub	rRTN,rWORD5,rWORD6
+	sub	rRTN, rWORD5, rWORD6
 	blr
 	nop
 L(b12):
-	bne	cr0,L(bx12)
+	bne	cr7, L(bx12)
 L(bx34):
-	sub	rRTN,rWORD3,rWORD4
+	sub	rRTN, rWORD3, rWORD4
 	blr
-
 L(b11):
 L(bx12):
-	sub	rRTN,rWORD1,rWORD2
+	sub	rRTN, rWORD1, rWORD2
 	blr
-
 	.align	4
-L(zeroLengthReturn):
-
 L(zeroLength):
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
-	cfi_adjust_cfa_offset(64)
 	.align	4
 /* At this point we know the strings have different alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    2 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then rStr1 is word aligned and can
+   of r12 to 0.  If r12 == 0 then rStr1 is word aligned and can
    perform the Wunaligned loop.
 
    Otherwise we know that rSTR1 is not already word aligned yet.
@@ -521,465 +722,654 @@ L(zeroLength):
    eliminate bits preceding the first byte.  Since we want to join the
    normal (Wualigned) compare loop, starting at the second word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first W. This insures that the loop count is
+   versioning for the first W. This ensures that the loop count is
    correct and the first W (shifted) is in the expected resister pair.  */
 #define rSHL		r29	/* Unaligned shift left count.  */
 #define rSHR		r28	/* Unaligned shift right count.  */
-#define rB		r27	/* Left rotation temp for rWORD2.  */
-#define rD		r26	/* Left rotation temp for rWORD4.  */
-#define rF		r25	/* Left rotation temp for rWORD6.  */
-#define rH		r24	/* Left rotation temp for rWORD8.  */
-#define rA		r0	/* Right rotation temp for rWORD2.  */
-#define rC		r12	/* Right rotation temp for rWORD4.  */
-#define rE		r0	/* Right rotation temp for rWORD6.  */
-#define rG		r12	/* Right rotation temp for rWORD8.  */
+#define rWORD8_SHIFT	r27	/* Left rotation temp for rWORD2.  */
+#define rWORD2_SHIFT	r26	/* Left rotation temp for rWORD4.  */
+#define rWORD4_SHIFT	r25	/* Left rotation temp for rWORD6.  */
+#define rWORD6_SHIFT	r24	/* Left rotation temp for rWORD8.  */
+	cfi_adjust_cfa_offset(64)
 L(unaligned):
-	stw	r29,40(r1)
-	cfi_offset(r29,(40-64))
-	clrlwi	rSHL,rSTR2,30
-	stw	r28,36(r1)
-	cfi_offset(r28,(36-64))
-	beq	cr5,L(Wunaligned)
-	stw	r27,32(r1)
-	cfi_offset(r27,(32-64))
+	stw	rSHL, 40(r1)
+	cfi_offset(rSHL, (40-64))
+	clrlwi	rSHL, rSTR2, 30
+	stw	rSHR, 36(r1)
+	cfi_offset(rSHR, (36-64))
+	beq	cr5, L(Wunaligned)
+	stw	rWORD8_SHIFT, 32(r1)
+	cfi_offset(rWORD8_SHIFT, (32-64))
 /* Adjust the logical start of rSTR2 to compensate for the extra bits
    in the 1st rSTR1 W.  */
-	sub	r27,rSTR2,rBITDIF
+	sub	rWORD8_SHIFT, rSTR2, r12
 /* But do not attempt to address the W before that W that contains
    the actual start of rSTR2.  */
-	clrrwi	rSTR2,rSTR2,2
-	stw	r26,28(r1)
-	cfi_offset(r26,(28-64))
-/* Compute the left/right shift counts for the unalign rSTR2,
+	clrrwi	rSTR2, rSTR2, 2
+	stw	rWORD2_SHIFT, 28(r1)
+	cfi_offset(rWORD2_SHIFT, (28-64))
+/* Compute the left/right shift counts for the unaligned rSTR2,
    compensating for the logical (W aligned) start of rSTR1.  */
-	clrlwi	rSHL,r27,30
-	clrrwi	rSTR1,rSTR1,2
-	stw	r25,24(r1)
-	cfi_offset(r25,(24-64))
-	slwi	rSHL,rSHL,3
-	cmplw	cr5,r27,rSTR2
-	add	rN,rN,rBITDIF
-	slwi	r11,rBITDIF,3
-	stw	r24,20(r1)
-	cfi_offset(r24,(20-64))
-	subfic	rSHR,rSHL,32
-	srwi	rTMP,rN,4	/* Divide by 16 */
-	andi.	rBITDIF,rN,12	/* Get the W remainder */
+	clrlwi	rSHL, rWORD8_SHIFT, 30
+	clrrwi	rSTR1, rSTR1, 2
+	stw	rWORD4_SHIFT, 24(r1)
+	cfi_offset(rWORD4_SHIFT, (24-64))
+	slwi	rSHL, rSHL, 3
+	cmplw	cr5, rWORD8_SHIFT, rSTR2
+	add	rN, rN, r12
+	slwi	rWORD6, r12, 3
+	stw	rWORD6_SHIFT, 20(r1)
+	cfi_offset(rWORD6_SHIFT, (20-64))
+	subfic	rSHR, rSHL, 32
+	srwi	r0, rN, 4	/* Divide by 16 */
+	andi.	r12, rN, 12	/* Get the W remainder */
 /* We normally need to load 2 Ws to start the unaligned rSTR2, but in
    this special case those bits may be discarded anyway.  Also we
    must avoid loading a W where none of the bits are part of rSTR2 as
    this may cross a page boundary and cause a page fault.  */
-	li	rWORD8,0
-	blt	cr5,L(dus0)
-	lwz	rWORD8,0(rSTR2)
-	la	rSTR2,4(rSTR2)
-	slw	rWORD8,rWORD8,rSHL
+	li	rWORD8, 0
+	blt	cr5, L(dus0)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD8, 0(rSTR2)
+	addi	rSTR2, rSTR2, 4
+#endif
+	slw	rWORD8, rWORD8, rSHL
 
 L(dus0):
-	lwz	rWORD1,0(rSTR1)
-	lwz	rWORD2,0(rSTR2)
-	cmplwi	cr1,rBITDIF,8
-	cmplwi	cr7,rN,16
-	srw	rG,rWORD2,rSHR
-	clrlwi	rN,rN,30
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 0(rSTR1)
+	lwz	rWORD2, 0(rSTR2)
+#endif
+	cmplwi	cr1, r12, 8
+	cmplwi	cr7, rN, 16
+	srw	r12, rWORD2, rSHR
+	clrlwi	rN, rN, 30
 	beq	L(duPs4)
-	mtctr	rTMP
-	or	rWORD8,rG,rWORD8
-	bgt	cr1,L(duPs3)
-	beq	cr1,L(duPs2)
+	mtctr	r0
+	or	rWORD8, r12, rWORD8
+	bgt	cr1, L(duPs3)
+	beq	cr1, L(duPs2)
 
 /* Remainder is 4 */
 	.align	4
 L(dusP1):
-	slw	rB,rWORD2,rSHL
-	slw	rWORD7,rWORD1,r11
-	slw	rWORD8,rWORD8,r11
-	bge	cr7,L(duP1e)
+	slw	rWORD8_SHIFT, rWORD2, rSHL
+	slw	rWORD7, rWORD1, rWORD6
+	slw	rWORD8, rWORD8, rWORD6
+	bge	cr7, L(duP1e)
 /* At this point we exit early with the first word compare
    complete and remainder of 0 to 3 bytes.  See L(du14) for details on
    how we handle the remaining bytes.  */
-	cmplw	cr5,rWORD7,rWORD8
-	slwi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmplw	cr7,rN,rSHR
+	cmplw	cr5, rWORD7, rWORD8
+	slwi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	lwz	rWORD2,4(rSTR2)
-	srw	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 8 */
 	.align	4
 L(duPs2):
-	slw	rH,rWORD2,rSHL
-	slw	rWORD5,rWORD1,r11
-	slw	rWORD6,rWORD8,r11
+	slw	rWORD6_SHIFT, rWORD2, rSHL
+	slw	rWORD5, rWORD1, rWORD6
+	slw	rWORD6, rWORD8, rWORD6
 	b	L(duP2e)
 /* Remainder is 12 */
 	.align	4
 L(duPs3):
-	slw	rF,rWORD2,rSHL
-	slw	rWORD3,rWORD1,r11
-	slw	rWORD4,rWORD8,r11
+	slw	rWORD4_SHIFT, rWORD2, rSHL
+	slw	rWORD3, rWORD1, rWORD6
+	slw	rWORD4, rWORD8, rWORD6
 	b	L(duP3e)
 /* Count is a multiple of 16, remainder is 0 */
 	.align	4
 L(duPs4):
-	mtctr	rTMP
-	or	rWORD8,rG,rWORD8
-	slw	rD,rWORD2,rSHL
-	slw	rWORD1,rWORD1,r11
-	slw	rWORD2,rWORD8,r11
+	mtctr	r0
+	or	rWORD8, r12, rWORD8
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	slw	rWORD1, rWORD1, rWORD6
+	slw	rWORD2, rWORD8, rWORD6
 	b	L(duP4e)
 
 /* At this point we know rSTR1 is word aligned and the
    compare length is at least 8 bytes.  */
 	.align	4
 L(Wunaligned):
-	stw	r27,32(r1)
-	cfi_offset(r27,(32-64))
-	clrrwi	rSTR2,rSTR2,2
-	stw	r26,28(r1)
-	cfi_offset(r26,(28-64))
-	srwi	rTMP,rN,4	/* Divide by 16 */
-	stw	r25,24(r1)
-	cfi_offset(r25,(24-64))
-	andi.	rBITDIF,rN,12	/* Get the W remainder */
-	stw	r24,20(r1)
-	cfi_offset(r24,(24-64))
-	slwi	rSHL,rSHL,3
-	lwz	rWORD6,0(rSTR2)
-	lwzu	rWORD8,4(rSTR2)
-	cmplwi	cr1,rBITDIF,8
-	cmplwi	cr7,rN,16
-	clrlwi	rN,rN,30
-	subfic	rSHR,rSHL,32
-	slw	rH,rWORD6,rSHL
+	stw	rWORD8_SHIFT, 32(r1)
+	cfi_offset(rWORD8_SHIFT, (32-64))
+	clrrwi	rSTR2, rSTR2, 2
+	stw	rWORD2_SHIFT, 28(r1)
+	cfi_offset(rWORD2_SHIFT, (28-64))
+	srwi	r0, rN, 4	/* Divide by 16 */
+	stw	rWORD4_SHIFT, 24(r1)
+	cfi_offset(rWORD4_SHIFT, (24-64))
+	andi.	r12, rN, 12	/* Get the W remainder */
+	stw	rWORD6_SHIFT, 20(r1)
+	cfi_offset(rWORD6_SHIFT, (20-64))
+	slwi	rSHL, rSHL, 3
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD6, 0(rSTR2)
+	lwzu	rWORD8, 4(rSTR2)
+#endif
+	cmplwi	cr1, r12, 8
+	cmplwi	cr7, rN, 16
+	clrlwi	rN, rN, 30
+	subfic	rSHR, rSHL, 32
+	slw	rWORD6_SHIFT, rWORD6, rSHL
 	beq	L(duP4)
-	mtctr	rTMP
-	bgt	cr1,L(duP3)
-	beq	cr1,L(duP2)
+	mtctr	r0
+	bgt	cr1, L(duP3)
+	beq	cr1, L(duP2)
 
 /* Remainder is 4 */
 	.align	4
 L(duP1):
-	srw	rG,rWORD8,rSHR
-	lwz	rWORD7,0(rSTR1)
-	slw	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	blt	cr7,L(duP1x)
+	srw	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
+	lwz	rWORD7, 0(rSTR1)
+#endif
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	blt	cr7, L(duP1x)
 L(duP1e):
-	lwz	rWORD1,4(rSTR1)
-	lwz	rWORD2,4(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
-	srw	rA,rWORD2,rSHR
-	slw	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
-	lwz	rWORD3,8(rSTR1)
-	lwz	rWORD4,8(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
-	srw	rC,rWORD4,rSHR
-	slw	rF,rWORD4,rSHL
-	bne	cr5,L(duLcr5)
-	or	rWORD4,rC,rD
-	lwz	rWORD5,12(rSTR1)
-	lwz	rWORD6,12(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	srw	rE,rWORD6,rSHR
-	slw	rH,rWORD6,rSHL
-	bne	cr0,L(duLcr0)
-	or	rWORD6,rE,rF
-	cmplw	cr6,rWORD5,rWORD6
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 8(rSTR1)
+	lwz	rWORD4, 8(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
+	bne	cr5, L(duLcr5)
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 12(rSTR1)
+	lwz	rWORD6, 12(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	bne	cr7, L(duLcr7)
+	or	rWORD6, r0, rWORD4_SHIFT
+	cmplw	cr6, rWORD5, rWORD6
 	b	L(duLoop3)
 	.align	4
 /* At this point we exit early with the first word compare
    complete and remainder of 0 to 3 bytes.  See L(du14) for details on
    how we handle the remaining bytes.  */
 L(duP1x):
-	cmplw	cr5,rWORD7,rWORD8
-	slwi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmplw	cr7,rN,rSHR
+	cmplw	cr5, rWORD7, rWORD8
+	slwi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	ld	rWORD2,8(rSTR2)
-	srw	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD2, 8(rSTR2)
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 8 */
 	.align	4
 L(duP2):
-	srw	rE,rWORD8,rSHR
-	lwz	rWORD5,0(rSTR1)
-	or	rWORD6,rE,rH
-	slw	rH,rWORD8,rSHL
+	srw	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
+	lwz	rWORD5, 0(rSTR1)
+#endif
+	or	rWORD6, r0, rWORD6_SHIFT
+	slw	rWORD6_SHIFT, rWORD8, rSHL
 L(duP2e):
-	lwz	rWORD7,4(rSTR1)
-	lwz	rWORD8,4(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	srw	rG,rWORD8,rSHR
-	slw	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	blt	cr7,L(duP2x)
-	lwz	rWORD1,8(rSTR1)
-	lwz	rWORD2,8(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
-	bne	cr6,L(duLcr6)
-	srw	rA,rWORD2,rSHR
-	slw	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
-	lwz	rWORD3,12(rSTR1)
-	lwz	rWORD4,12(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
-	bne	cr5,L(duLcr5)
-	srw	rC,rWORD4,rSHR
-	slw	rF,rWORD4,rSHL
-	or	rWORD4,rC,rD
-	addi	rSTR1,rSTR1,4
-	addi	rSTR2,rSTR2,4
-	cmplw	cr1,rWORD3,rWORD4
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD7, 4(rSTR1)
+	lwz	rWORD8, 4(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	blt	cr7, L(duP2x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 8(rSTR1)
+	lwz	rWORD2, 8(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 12(rSTR1)
+	lwz	rWORD4, 12(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+	bne	cr5, L(duLcr5)
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#endif
+	cmplw	cr1, rWORD3, rWORD4
 	b	L(duLoop2)
 	.align	4
 L(duP2x):
-	cmplw	cr5,rWORD7,rWORD8
-	addi	rSTR1,rSTR1,4
-	addi	rSTR2,rSTR2,4
-	bne	cr6,L(duLcr6)
-	slwi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmplw	cr7,rN,rSHR
+	cmplw	cr5, rWORD7, rWORD8
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#endif
+	bne	cr6, L(duLcr6)
+	slwi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	lwz	rWORD2,4(rSTR2)
-	srw	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Remainder is 12 */
 	.align	4
 L(duP3):
-	srw	rC,rWORD8,rSHR
-	lwz	rWORD3,0(rSTR1)
-	slw	rF,rWORD8,rSHL
-	or	rWORD4,rC,rH
+	srw	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
+	lwz	rWORD3, 0(rSTR1)
+#endif
+	slw	rWORD4_SHIFT, rWORD8, rSHL
+	or	rWORD4, r12, rWORD6_SHIFT
 L(duP3e):
-	lwz	rWORD5,4(rSTR1)
-	lwz	rWORD6,4(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	srw	rE,rWORD6,rSHR
-	slw	rH,rWORD6,rSHL
-	or	rWORD6,rE,rF
-	lwz	rWORD7,8(rSTR1)
-	lwz	rWORD8,8(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	bne	cr1,L(duLcr1)
-	srw	rG,rWORD8,rSHR
-	slw	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	blt	cr7,L(duP3x)
-	lwz	rWORD1,12(rSTR1)
-	lwz	rWORD2,12(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
-	bne	cr6,L(duLcr6)
-	srw	rA,rWORD2,rSHR
-	slw	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	cmplw	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 4(rSTR1)
+	lwz	rWORD6, 4(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD7, 8(rSTR1)
+	lwz	rWORD8, 8(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr1, L(duLcr1)
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	blt	cr7, L(duP3x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 12(rSTR1)
+	lwz	rWORD2, 12(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 	b	L(duLoop1)
 	.align	4
 L(duP3x):
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	bne	cr1,L(duLcr1)
-	cmplw	cr5,rWORD7,rWORD8
-	bne	cr6,L(duLcr6)
-	slwi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmplw	cr7,rN,rSHR
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+#if 0
+/* Huh?  We've already branched on cr1!  */
+	bne	cr1, L(duLcr1)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	slwi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	lwz	rWORD2,4(rSTR2)
-	srw	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Count is a multiple of 16, remainder is 0 */
 	.align	4
 L(duP4):
-	mtctr	rTMP
-	srw	rA,rWORD8,rSHR
-	lwz	rWORD1,0(rSTR1)
-	slw	rD,rWORD8,rSHL
-	or	rWORD2,rA,rH
+	mtctr	r0
+	srw	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
+	lwz	rWORD1, 0(rSTR1)
+#endif
+	slw	rWORD2_SHIFT, rWORD8, rSHL
+	or	rWORD2, r0, rWORD6_SHIFT
 L(duP4e):
-	lwz	rWORD3,4(rSTR1)
-	lwz	rWORD4,4(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
-	srw	rC,rWORD4,rSHR
-	slw	rF,rWORD4,rSHL
-	or	rWORD4,rC,rD
-	lwz	rWORD5,8(rSTR1)
-	lwz	rWORD6,8(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	bne	cr0,L(duLcr0)
-	srw	rE,rWORD6,rSHR
-	slw	rH,rWORD6,rSHL
-	or	rWORD6,rE,rF
-	lwzu	rWORD7,12(rSTR1)
-	lwzu	rWORD8,12(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	bne	cr1,L(duLcr1)
-	srw	rG,rWORD8,rSHR
-	slw	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	cmplw	cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 4(rSTR1)
+	lwz	rWORD4, 4(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 8(rSTR1)
+	lwz	rWORD6, 8(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+	bne	cr7, L(duLcr7)
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwzu	rWORD7, 12(rSTR1)
+	lwzu	rWORD8, 12(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr1, L(duLcr1)
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	cmplw	cr5, rWORD7, rWORD8
 	bdz	L(du24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
 	.align	4
 L(duLoop):
-	lwz	rWORD1,4(rSTR1)
-	lwz	rWORD2,4(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	bne	cr6,L(duLcr6)
-	srw	rA,rWORD2,rSHR
-	slw	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+	bne	cr6, L(duLcr6)
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
 L(duLoop1):
-	lwz	rWORD3,8(rSTR1)
-	lwz	rWORD4,8(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	bne	cr5,L(duLcr5)
-	srw	rC,rWORD4,rSHR
-	slw	rF,rWORD4,rSHL
-	or	rWORD4,rC,rD
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 8(rSTR1)
+	lwz	rWORD4, 8(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr5, L(duLcr5)
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
 L(duLoop2):
-	lwz	rWORD5,12(rSTR1)
-	lwz	rWORD6,12(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
-	bne	cr0,L(duLcr0)
-	srw	rE,rWORD6,rSHR
-	slw	rH,rWORD6,rSHL
-	or	rWORD6,rE,rF
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 12(rSTR1)
+	lwz	rWORD6, 12(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr7, L(duLcr7)
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
 L(duLoop3):
-	lwzu	rWORD7,16(rSTR1)
-	lwzu	rWORD8,16(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
-	bne	cr1,L(duLcr1)
-	srw	rG,rWORD8,rSHR
-	slw	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwzu	rWORD7, 16(rSTR1)
+	lwzu	rWORD8, 16(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+	bne	cr1, L(duLcr1)
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	bdnz	L(duLoop)
 
 L(duL4):
-	bne	cr1,L(duLcr1)
-	cmplw	cr1,rWORD3,rWORD4
-	bne	cr6,L(duLcr6)
-	cmplw	cr6,rWORD5,rWORD6
-	bne	cr5,L(duLcr5)
-	cmplw	cr5,rWORD7,rWORD8
+#if 0
+/* Huh?  We've already branched on cr1!  */
+	bne	cr1, L(duLcr1)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+	bne	cr6, L(duLcr6)
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr5, L(duLcr5)
+	cmplw	cr5, rWORD7, rWORD8
 L(du44):
-	bne	cr0,L(duLcr0)
+	bne	cr7, L(duLcr7)
 L(du34):
-	bne	cr1,L(duLcr1)
+	bne	cr1, L(duLcr1)
 L(du24):
-	bne	cr6,L(duLcr6)
+	bne	cr6, L(duLcr6)
 L(du14):
-	slwi.	rN,rN,3
-	bne	cr5,L(duLcr5)
+	slwi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
 /* At this point we have a remainder of 1 to 3 bytes to compare.  We use
    shift right to eliminate bits beyond the compare length.
+   This allows the use of word subtract to compute the final result.
 
    However it may not be safe to load rWORD2 which may be beyond the
    string length. So we compare the bit length of the remainder to
    the right shift count (rSHR). If the bit count is less than or equal
    we do not need to load rWORD2 (all significant bits are already in
-   rB).  */
-	cmplw	cr7,rN,rSHR
+   rWORD8_SHIFT).  */
+	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	lwz	rWORD2,4(rSTR2)
-	srw	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	srw	r0, rWORD2, rSHR
 	.align	4
 L(dutrim):
-	lwz	rWORD1,4(rSTR1)
-	lwz	r31,48(1)
-	subfic	rN,rN,32	/* Shift count is 32 - (rN * 8).  */
-	or	rWORD2,rA,rB
-	lwz	r30,44(1)
-	lwz	r29,40(r1)
-	srw	rWORD1,rWORD1,rN
-	srw	rWORD2,rWORD2,rN
-	lwz	r28,36(r1)
-	lwz	r27,32(r1)
-	cmplw	rWORD1,rWORD2
-	li	rRTN,0
-	beq	L(dureturn26)
-	li	rRTN,1
-	bgt	L(dureturn26)
-	li	rRTN,-1
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+#else
+	lwz	rWORD1, 4(rSTR1)
+#endif
+	lwz	rWORD8, 48(r1)
+	subfic	rN, rN, 32	/* Shift count is 32 - (rN * 8).  */
+	or	rWORD2, r0, rWORD8_SHIFT
+	lwz	rWORD7, 44(r1)
+	lwz	rSHL, 40(r1)
+	srw	rWORD1, rWORD1, rN
+	srw	rWORD2, rWORD2, rN
+	lwz	rSHR, 36(r1)
+	lwz	rWORD8_SHIFT, 32(r1)
+	sub	rRTN, rWORD1, rWORD2
 	b	L(dureturn26)
 	.align	4
-L(duLcr0):
-	lwz	r31,48(1)
-	lwz	r30,44(1)
-	li	rRTN,1
-	bgt	cr0,L(dureturn29)
-	lwz	r29,40(r1)
-	lwz	r28,36(r1)
-	li	rRTN,-1
+L(duLcr7):
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
+	li	rRTN, 1
+	bgt	cr7, L(dureturn29)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	4
 L(duLcr1):
-	lwz	r31,48(1)
-	lwz	r30,44(1)
-	li	rRTN,1
-	bgt	cr1,L(dureturn29)
-	lwz	r29,40(r1)
-	lwz	r28,36(r1)
-	li	rRTN,-1
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
+	li	rRTN, 1
+	bgt	cr1, L(dureturn29)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	4
 L(duLcr6):
-	lwz	r31,48(1)
-	lwz	r30,44(1)
-	li	rRTN,1
-	bgt	cr6,L(dureturn29)
-	lwz	r29,40(r1)
-	lwz	r28,36(r1)
-	li	rRTN,-1
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
+	li	rRTN, 1
+	bgt	cr6, L(dureturn29)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	4
 L(duLcr5):
-	lwz	r31,48(1)
-	lwz	r30,44(1)
-	li	rRTN,1
-	bgt	cr5,L(dureturn29)
-	lwz	r29,40(r1)
-	lwz	r28,36(r1)
-	li	rRTN,-1
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
+	li	rRTN, 1
+	bgt	cr5, L(dureturn29)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	3
 L(duZeroReturn):
-	li	rRTN,0
+	li	rRTN, 0
 	.align	4
 L(dureturn):
-	lwz	r31,48(1)
-	lwz	r30,44(1)
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
 L(dureturn29):
-	lwz	r29,40(r1)
-	lwz	r28,36(r1)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
 L(dureturn27):
-	lwz	r27,32(r1)
+	lwz	rWORD8_SHIFT, 32(r1)
 L(dureturn26):
-	lwz	r26,28(r1)
+	lwz	rWORD2_SHIFT, 28(r1)
 L(dureturn25):
-	lwz	r25,24(r1)
-	lwz	r24,20(r1)
-	lwz	1,0(1)
+	lwz	rWORD4_SHIFT, 24(r1)
+	lwz	rWORD6_SHIFT, 20(r1)
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
 	blr
 END (memcmp)
+
 libc_hidden_builtin_def (memcmp)
-weak_alias (memcmp,bcmp)
+weak_alias (memcmp, bcmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memcpy.S b/libc/sysdeps/powerpc/powerpc32/power7/memcpy.S
index 7f0077823..acf3c1019 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memcpy.S
@@ -383,7 +383,7 @@ L(copy_GE_32_unaligned):
 
 	beq    L(copy_GE_32_unaligned_cont)
 
-	/* SRC is not quadword aligned, get it aligned.  */
+	/* DST is not quadword aligned, get it aligned.  */
 
 	mtcrf   0x01,0
 	subf    31,0,5
@@ -435,13 +435,21 @@ L(copy_GE_32_unaligned_cont):
 	mr      11,12
 	mtcrf   0x01,9
 	cmplwi  cr6,9,1
+#ifdef __LITTLE_ENDIAN__
+	lvsr    5,0,12
+#else
 	lvsl    5,0,12
+#endif
 	lvx     3,0,12
 	bf      31,L(setup_unaligned_loop)
 
 	/* Copy another 16 bytes to align to 32-bytes due to the loop .  */
 	lvx     4,12,6
+#ifdef __LITTLE_ENDIAN__
+	vperm   6,4,3,5
+#else
 	vperm   6,3,4,5
+#endif
 	addi    11,12,16
 	addi    10,3,16
 	stvx    6,0,3
@@ -461,11 +469,17 @@ L(unaligned_loop):
 	vector instructions though.  */
 
 	lvx	4,11,6	      /* vr4 = r11+16.  */
-	vperm   6,3,4,5	      /* Merge the correctly-aligned portions
-			      of vr3/vr4 into vr6.  */
+#ifdef __LITTLE_ENDIAN__
+	vperm   6,4,3,5
+#else
+	vperm   6,3,4,5
+#endif
 	lvx	3,11,7	      /* vr3 = r11+32.  */
-	vperm   10,4,3,5      /* Merge the correctly-aligned portions
-			      of vr3/vr4 into vr10.  */
+#ifdef __LITTLE_ENDIAN__
+	vperm   10,3,4,5
+#else
+	vperm   10,4,3,5
+#endif
 	addi    11,11,32
 	stvx    6,0,10
 	stvx    10,10,6
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/mempcpy.S b/libc/sysdeps/powerpc/powerpc32/power7/mempcpy.S
index 5ad4edb58..4610ec5b5 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/mempcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/mempcpy.S
@@ -325,7 +325,7 @@ L(copy_GE_32_unaligned):
 
 	beq	L(copy_GE_32_unaligned_cont)
 
-	/* SRC is not quadword aligned, get it aligned.  */
+	/* DST is not quadword aligned, get it aligned.  */
 
 	mtcrf	0x01,0
 	subf	31,0,5
@@ -377,13 +377,21 @@ L(copy_GE_32_unaligned_cont):
 	mr	11,12
 	mtcrf	0x01,9
 	cmplwi	cr6,9,1
-	lvsl	5,0,12
+#ifdef __LITTLE_ENDIAN__
+	lvsr    5,0,12
+#else
+	lvsl    5,0,12
+#endif
 	lvx	3,0,12
 	bf	31,L(setup_unaligned_loop)
 
 	/* Copy another 16 bytes to align to 32-bytes due to the loop .  */
 	lvx	4,12,6
-	vperm	6,3,4,5
+#ifdef __LITTLE_ENDIAN__
+	vperm   6,4,3,5
+#else
+	vperm   6,3,4,5
+#endif
 	addi	11,12,16
 	addi	10,3,16
 	stvx	6,0,3
@@ -403,11 +411,17 @@ L(unaligned_loop):
 	vector instructions though.  */
 
 	lvx	4,11,6	      /* vr4 = r11+16.  */
-	vperm	6,3,4,5	      /* Merge the correctly-aligned portions
-				 of vr3/vr4 into vr6.  */
+#ifdef __LITTLE_ENDIAN__
+	vperm   6,4,3,5
+#else
+	vperm   6,3,4,5
+#endif
 	lvx	3,11,7	      /* vr3 = r11+32.  */
-	vperm	10,4,3,5      /* Merge the correctly-aligned portions
-				 of vr3/vr4 into vr10.  */
+#ifdef __LITTLE_ENDIAN__
+	vperm   10,3,4,5
+#else
+	vperm   10,4,3,5
+#endif
 	addi	11,11,32
 	stvx	6,0,10
 	stvx	10,10,6
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memrchr.S b/libc/sysdeps/powerpc/powerpc32/power7/memrchr.S
index defd832b0..9601aa799 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/memrchr.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memrchr.S
@@ -23,117 +23,131 @@
 	.machine  power7
 ENTRY (__memrchr)
 	CALL_MCOUNT
-	dcbt	0,r3
-	mr	r7,r3
-	add	r3,r7,r5      /* Calculate the last acceptable address.  */
-	cmplw	cr7,r3,r7     /* Is the address equal or less than r3?  */
+	add	r7,r3,r5      /* Calculate the last acceptable address.  */
+	neg	r0,r7
+	addi	r7,r7,-1
+	mr	r10,r3
+	clrrwi	r6,r7,7
+	li	r9,3<<5
+	dcbt	r9,r6,16      /* Stream hint, decreasing addresses.  */
 
 	/* Replicate BYTE to word.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
-	bge	cr7,L(proceed)
-
-	li	r3,-1	      /* Make r11 the biggest if r4 <= 0.  */
-L(proceed):
+	rldimi	r4,r4,8,48
+	rldimi	r4,r4,16,32
 	li	r6,-4
-	addi	r9,r3,-1
-	clrrwi  r8,r9,2
-	addi	r8,r8,4
-	neg	r0,r3
+	li	r9,-1
 	rlwinm	r0,r0,3,27,28 /* Calculate padding.  */
-
+	clrrwi	r8,r7,2
+	srw	r9,r9,r0
 	cmplwi	r5,16
+	clrrwi	r0,r10,2
 	ble	L(small_range)
 
-	lwbrx	r12,r8,r6     /* Load reversed word from memory.  */
-	cmpb	r10,r12,r4    /* Check for BYTE in WORD1.  */
-	slw	r10,r10,r0
-	srw	r10,r10,r0
-	cmplwi	cr7,r10,0     /* If r10 == 0, no BYTEs have been found.  */
+#ifdef __LITTLE_ENDIAN__
+	lwzx	r12,0,r8
+#else
+	lwbrx	r12,0,r8      /* Load reversed word from memory.  */
+#endif
+	cmpb	r3,r12,r4     /* Check for BYTE in WORD1.  */
+	and	r3,r3,r9
+	cmplwi	cr7,r3,0      /* If r3 == 0, no BYTEs have been found.  */
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,-4
-	cmplw	cr6,r9,r7
-	ble	cr6,L(null)
-
 	mtcrf   0x01,r8
 	/* Are we now aligned to a doubleword boundary?  If so, skip to
 	   the main loop.  Otherwise, go through the alignment code.  */
-	mr	r8,r9
-	bt	29,L(loop_setup)
+	bf	29,L(loop_setup)
 
 	/* Handle WORD2 of pair.  */
+#ifdef __LITTLE_ENDIAN__
+	lwzx	r12,r8,r6
+#else
 	lwbrx	r12,r8,r6
-	cmpb	r10,r12,r4
-	cmplwi	cr7,r10,0
-	bne	cr7,L(done)
-
-	/* Are we done already?  */
+#endif
 	addi	r8,r8,-4
-	cmplw	cr6,r8,r7
-	ble	cr6,L(null)
+	cmpb	r3,r12,r4
+	cmplwi	cr7,r3,0
+	bne	cr7,L(done)
 
 L(loop_setup):
-	li	r0,-8
-	sub	r5,r8,r7
-	srwi	r9,r5,3	      /* Number of loop iterations.  */
+	/* The last word we want to read in the loop below is the one
+	   containing the first byte of the string, ie. the word at
+	   s & ~3, or r0.  The first word read is at r8 - 4, we
+	   read 2 * cnt words, so the last word read will be at
+	   r8 - 4 - 8 * cnt + 4.  Solving for cnt gives
+	   cnt = (r8 - r0) / 8  */
+	sub	r5,r8,r0
+	addi	r8,r8,-4
+	srwi	r9,r5,3       /* Number of loop iterations.  */
 	mtctr	r9	      /* Setup the counter.  */
-	b	L(loop)
-	/* Main loop to look for BYTE backwards in the string.  Since it's a
-	   small loop (< 8 instructions), align it to 32-bytes.  */
-	.p2align  5
+
+	/* Main loop to look for BYTE backwards in the string.
+	   FIXME: Investigate whether 32 byte align helps with this
+	   9 instruction loop.  */
+	.align	5
 L(loop):
 	/* Load two words, compare and merge in a
 	   single register for speed.  This is an attempt
 	   to speed up the byte-checking process for bigger strings.  */
 
-	lwbrx	r12,r8,r6
-	lwbrx	r11,r8,r0
-	addi	r8,r8,-4
-	cmpb	r10,r12,r4
+#ifdef __LITTLE_ENDIAN__
+	lwzx	r12,0,r8
+	lwzx	r11,r8,r6
+#else
+	lwbrx	r12,0,r8
+	lwbrx	r11,r8,r6
+#endif
+	cmpb	r3,r12,r4
 	cmpb	r9,r11,r4
-	or	r5,r9,r10     /* Merge everything in one word.  */
+	or	r5,r9,r3      /* Merge everything in one word.  */
 	cmplwi	cr7,r5,0
 	bne	cr7,L(found)
-	addi	r8,r8,-4
+	addi	r8,r8,-8
 	bdnz	L(loop)
-	/* We're here because the counter reached 0, and that means we
-	   didn't have any matches for BYTE in the whole range.  Just return
-	   the original range.  */
-	addi	r8,r8,4
-	cmplw	cr6,r8,r7
-	bgt	cr6,L(loop_small)
-	b	L(null)
 
-	/* OK, one (or both) of the words contains BYTE.  Check
-	   the first word and decrement the address in case the first
-	   word really contains BYTE.  */
+	/* We may have one more word to read.  */
+	cmplw	r8,r0
+	bnelr
+
+#ifdef __LITTLE_ENDIAN__
+	lwzx	r12,0,r8
+#else
+	lwbrx	r12,0,r8
+#endif
+	cmpb	r3,r12,r4
+	cmplwi	cr7,r3,0
+	bne	cr7,L(done)
+	blr
+
 	.align	4
 L(found):
-	cmplwi	cr6,r10,0
-	addi	r8,r8,4
+	/* OK, one (or both) of the words contains BYTE.  Check
+	   the first word.  */
+	cmplwi	cr6,r3,0
 	bne	cr6,L(done)
 
 	/* BYTE must be in the second word.  Adjust the address
-	   again and move the result of cmpb to r10 so we can calculate the
+	   again and move the result of cmpb to r3 so we can calculate the
 	   pointer.  */
 
-	mr	r10,r9
+	mr	r3,r9
 	addi	r8,r8,-4
 
-	/* r10 has the output of the cmpb instruction, that is, it contains
+	/* r3 has the output of the cmpb instruction, that is, it contains
 	   0xff in the same position as BYTE in the original
 	   word from the string.  Use that to calculate the pointer.
 	   We need to make sure BYTE is *before* the end of the
 	   range.  */
 L(done):
-	cntlzw	r0,r10	      /* Count leading zeroes before the match.  */
-	srwi	r6,r0,3	      /* Convert leading zeroes to bytes.  */
-	addi	r0,r6,1
+	cntlzw	r9,r3	      /* Count leading zeros before the match.  */
+	cmplw	r8,r0         /* Are we on the last word?  */
+	srwi	r6,r9,3	      /* Convert leading zeros to bytes.  */
+	addi	r0,r6,-3
 	sub	r3,r8,r0
-	cmplw	r3,r7
-	blt	L(null)
+	cmplw	cr7,r3,r10
+	bnelr
+	bgelr	cr7
+	li	r3,0
 	blr
 
 	.align	4
@@ -147,28 +161,35 @@ L(small_range):
 	cmplwi	r5,0
 	beq	L(null)
 
-	lwbrx	r12,r8,r6     /* Load reversed word from memory.  */
-	cmpb	r10,r12,r4    /* Check for null bytes in WORD1.  */
-	slw	r10,r10,r0
-	srw	r10,r10,r0
-	cmplwi	cr7,r10,0
+#ifdef __LITTLE_ENDIAN__
+	lwzx	r12,0,r8
+#else
+	lwbrx	r12,0,r8      /* Load reversed word from memory.  */
+#endif
+	cmpb	r3,r12,r4     /* Check for BYTE in WORD1.  */
+	and	r3,r3,r9
+	cmplwi	cr7,r3,0
 	bne	cr7,L(done)
 
+	/* Are we done already?  */
+	cmplw	r8,r0
 	addi	r8,r8,-4
-	cmplw	r8,r7
-	ble	L(null)
-	b	L(loop_small)
+	beqlr
 
-	.p2align  5
+	.align	5
 L(loop_small):
-	lwbrx	r12,r8,r6
-	cmpb	r10,r12,r4
-	cmplwi	cr6,r10,0
-	bne	cr6,L(done)
+#ifdef __LITTLE_ENDIAN__
+	lwzx	r12,0,r8
+#else
+	lwbrx	r12,0,r8
+#endif
+	cmpb	r3,r12,r4
+	cmplw	r8,r0
+	cmplwi	cr7,r3,0
+	bne	cr7,L(done)
 	addi	r8,r8,-4
-	cmplw	r8,r7
-	ble	L(null)
-	b	L(loop_small)
+	bne	L(loop_small)
+	blr
 
 END (__memrchr)
 weak_alias (__memrchr, memrchr)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memset.S b/libc/sysdeps/powerpc/powerpc32/power7/memset.S
index 360ea717f..aadda2558 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/memset.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memset.S
@@ -35,8 +35,8 @@ L(_memset):
 	cfi_offset(31,-8)
 
 	/* Replicate byte to word.  */
-	rlwimi	4,4,8,16,23
-	rlwimi	4,4,16,0,15
+	insrdi	4,4,8,48
+	insrdi	4,4,16,32
 
 	ble	cr6,L(small)	/* If length <= 8, use short copy code.  */
 
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/rawmemchr.S b/libc/sysdeps/powerpc/powerpc32/power7/rawmemchr.S
index a80c74a09..c2d8c4b7b 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/rawmemchr.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/rawmemchr.S
@@ -27,16 +27,21 @@ ENTRY (__rawmemchr)
 	clrrwi	r8,r3,2	      /* Align the address to word boundary.  */
 
 	/* Replicate byte to word.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	rldimi	r4,r4,8,48
+	rldimi	r4,r4,16,32
 
 	/* Now r4 has a word of c bytes.  */
 
 	rlwinm	r6,r3,3,27,28 /* Calculate padding.  */
 	lwz	r12,0(r8)     /* Load word from memory.  */
 	cmpb	r5,r12,r4     /* Compare each byte against c byte.  */
+#ifdef __LITTLE_ENDIAN__
+	srw	r5,r5,r6
+	slw	r5,r5,r6
+#else
 	slw	r5,r5,r6      /* Move left to discard ignored bits.  */
 	srw	r5,r5,r6      /* Bring the bits back as zeros.  */
+#endif
 	cmpwi	cr7,r5,0      /* If r5 == 0, no c bytes have been found.  */
 	bne	cr7,L(done)
 
@@ -90,8 +95,14 @@ L(loop):
 	   word from the string.  Use that fact to find out what is
 	   the position of the byte inside the string.  */
 L(done):
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r5,-1
+	andc    r0,r0,r5
+	popcntw	r0,r0
+#else
 	cntlzw	r0,r5	      /* Count leading zeros before the match.  */
-	srwi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
+#endif
+	srwi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching char.  */
 	blr
 END (__rawmemchr)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strchr.S b/libc/sysdeps/powerpc/powerpc32/power7/strchr.S
index 0ecadb271..b66265967 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strchr.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strchr.S
@@ -35,8 +35,8 @@ ENTRY (strchr)
 	beq	cr7,L(null_match)
 
 	/* Replicate byte to word.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
 
 	/* Now r4 has a word of c bytes and r0 has
 	   a word of null bytes.  */
@@ -46,11 +46,17 @@ ENTRY (strchr)
 
 	/* Move the words left and right to discard the bits that are
 	   not part of the string and to bring them back as zeros.  */
-
+#ifdef __LITTLE_ENDIAN__
+	srw	r10,r10,r6
+	srw	r11,r11,r6
+	slw	r10,r10,r6
+	slw	r11,r11,r6
+#else
 	slw	r10,r10,r6
 	slw	r11,r11,r6
 	srw	r10,r10,r6
 	srw	r11,r11,r6
+#endif
 	or	r5,r10,r11    /* OR the results to speed things up.  */
 	cmpwi	cr7,r5,0      /* If r5 == 0, no c or null bytes
 				 have been found.  */
@@ -65,7 +71,7 @@ ENTRY (strchr)
 
 	/* Handle WORD2 of pair.  */
 	lwzu	r12,4(r8)
-	cmpb    r10,r12,r4
+	cmpb	r10,r12,r4
 	cmpb	r11,r12,r0
 	or	r5,r10,r11
 	cmpwi	cr7,r5,0
@@ -100,22 +106,31 @@ L(loop):
 	bne	cr6,L(done)
 
 	/* The c/null byte must be in the second word.  Adjust the address
-	   again and move the result of cmpb to r10 so we can calculate the
-	   pointer.  */
+	   again and move the result of cmpb to r10/r11 so we can calculate
+	   the pointer.  */
 
 	mr	r10,r6
 	mr	r11,r7
 	addi	r8,r8,4
 
-	/* r5 has the output of the cmpb instruction, that is, it contains
+	/* r10/r11 have the output of the cmpb instructions, that is,
 	   0xff in the same position as the c/null byte in the original
 	   word from the string.  Use that to calculate the pointer.  */
 L(done):
-	cntlzw	r4,r10	      /* Count leading zeroes before c matches.  */
-	cntlzw	r0,r11	      /* Count leading zeroes before null matches.  */
-	cmplw	cr7,r4,r0
+#ifdef __LITTLE_ENDIAN__
+	addi    r3,r10,-1
+	andc    r3,r3,r10
+	popcntw	r0,r3
+	addi    r4,r11,-1
+	andc    r4,r4,r11
+	cmplw	cr7,r3,r4
+	bgt	cr7,L(no_match)
+#else
+	cntlzw	r0,r10	      /* Count leading zeros before c matches.  */
+	cmplw	cr7,r11,r10
 	bgt	cr7,L(no_match)
-	srwi	r0,r4,3	      /* Convert leading zeroes to bytes.  */
+#endif
+	srwi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching c byte
 				 or null in case c was not found.  */
 	blr
@@ -133,10 +148,14 @@ L(null_match):
 	cmpb	r5,r12,r0     /* Compare each byte against null bytes.  */
 
 	/* Move the words left and right to discard the bits that are
-	   not part of the string and to bring them back as zeros.  */
-
+	   not part of the string and bring them back as zeros.  */
+#ifdef __LITTLE_ENDIAN__
+	srw	r5,r5,r6
+	slw	r5,r5,r6
+#else
 	slw	r5,r5,r6
 	srw	r5,r5,r6
+#endif
 	cmpwi	cr7,r5,0      /* If r10 == 0, no c or null bytes
 				 have been found.  */
 	bne	cr7,L(done_null)
@@ -191,7 +210,13 @@ L(loop_null):
 	   0xff in the same position as the null byte in the original
 	   word from the string.  Use that to calculate the pointer.  */
 L(done_null):
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r5,-1
+	andc    r0,r0,r5
+	popcntw	r0,r0
+#else
 	cntlzw	r0,r5	      /* Count leading zeros before the match.  */
+#endif
 	srwi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching null byte.  */
 	blr
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strchrnul.S b/libc/sysdeps/powerpc/powerpc32/power7/strchrnul.S
index d4cacab60..f5d24d434 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strchrnul.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strchrnul.S
@@ -27,8 +27,8 @@ ENTRY (__strchrnul)
 	clrrwi	r8,r3,2	      /* Align the address to word boundary.  */
 
 	/* Replicate byte to word.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
 
 	rlwinm	r6,r3,3,27,28 /* Calculate padding.  */
 	lwz	r12,0(r8)     /* Load word from memory.  */
@@ -43,10 +43,17 @@ ENTRY (__strchrnul)
 
 	/* Move the words left and right to discard the bits that are
 	   not part of the string and bring them back as zeros.  */
+#ifdef __LITTLE_ENDIAN__
+	srw	r10,r10,r6
+	srw	r9,r9,r6
+	slw	r10,r10,r6
+	slw	r9,r9,r6
+#else
 	slw	r10,r10,r6
 	slw	r9,r9,r6
 	srw	r10,r10,r6
 	srw	r9,r9,r6
+#endif
 	or	r5,r9,r10     /* OR the results to speed things up.  */
 	cmpwi	cr7,r5,0      /* If r5 == 0, no c or null bytes
 				 have been found.  */
@@ -54,7 +61,7 @@ ENTRY (__strchrnul)
 
 	mtcrf   0x01,r8
 
-	/* Are we now aligned to a quadword boundary?  If so, skip to
+	/* Are we now aligned to a doubleword boundary?  If so, skip to
 	   the main loop.  Otherwise, go through the alignment code.  */
 
 	bt	29,L(loop)
@@ -76,7 +83,7 @@ L(loop):
 	   single register for speed.  This is an attempt
 	   to speed up the null-checking process for bigger strings.  */
 	lwz	r12,4(r8)
-	lwzu     r11,8(r8)
+	lwzu	r11,8(r8)
 	cmpb	r10,r12,r0
 	cmpb	r9,r12,r4
 	cmpb	r6,r11,r0
@@ -95,9 +102,9 @@ L(loop):
 	addi	r8,r8,-4
 	bne	cr6,L(done)
 
-	/* The c/null byte must be in the second word.  Adjust the
-	   address again and move the result of cmpb to r10 so we can calculate
-	   the pointer.  */
+	/* The c/null byte must be in the second word.  Adjust the address
+	   again and move the result of cmpb to r5 so we can calculate the
+	   pointer.  */
 	mr	r5,r10
 	addi	r8,r8,4
 
@@ -105,7 +112,13 @@ L(loop):
 	   0xff in the same position as the c/null byte in the original
 	   word from the string.  Use that to calculate the pointer.  */
 L(done):
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r5,-1
+	andc    r0,r0,r5
+	popcntw	r0,r0
+#else
 	cntlzw	r0,r5	      /* Count leading zeros before the match.  */
+#endif
 	srwi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of matching c/null byte.  */
 	blr
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strlen.S b/libc/sysdeps/powerpc/powerpc32/power7/strlen.S
index b71a10f5c..b08d6c028 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strlen.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strlen.S
@@ -29,7 +29,11 @@ ENTRY (strlen)
 	li	r0,0	      /* Word with null chars to use with cmpb.  */
 	li	r5,-1	      /* MASK = 0xffffffffffffffff.  */
 	lwz	r12,0(r4)     /* Load word from memory.  */
+#ifdef __LITTLE_ENDIAN__
+	slw	r5,r5,r6
+#else
 	srw	r5,r5,r6      /* MASK = MASK >> padding.  */
+#endif
 	orc	r9,r12,r5     /* Mask bits that are not part of the string.  */
 	cmpb	r10,r9,r0     /* Check for null bytes in WORD1.  */
 	cmpwi	cr7,r10,0     /* If r10 == 0, no null's have been found.  */
@@ -47,9 +51,6 @@ ENTRY (strlen)
 	cmpb	r10,r12,r0
 	cmpwi	cr7,r10,0
 	bne	cr7,L(done)
-	b	L(loop)	      /* We branch here (rather than falling through)
-				 to skip the nops due to heavy alignment
-				 of the loop below.  */
 
 	/* Main loop to look for the end of the string.  Since it's a
 	   small loop (< 8 instructions), align it to 32-bytes.  */
@@ -86,9 +87,15 @@ L(loop):
 	   0xff in the same position as the null byte in the original
 	   word from the string.  Use that to calculate the length.  */
 L(done):
-	cntlzw	r0,r10	      /* Count leading zeroes before the match.  */
+#ifdef __LITTLE_ENDIAN__
+	addi	r9, r10, -1   /* Form a mask from trailing zeros.  */
+	andc	r9, r9, r10
+	popcntw r0, r9	      /* Count the bits in the mask.  */
+#else
+	cntlzw	r0,r10	      /* Count leading zeros before the match.  */
+#endif
 	subf	r5,r3,r4
-	srwi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
+	srwi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r5,r0      /* Compute final length.  */
 	blr
 END (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S b/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S
index fdae44d26..10c9d251b 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S
@@ -26,7 +26,7 @@
 
 EALIGN (strncmp,5,0)
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -39,6 +39,7 @@ EALIGN (strncmp,5,0)
 #define r7F7F	r9	/* constant 0x7f7f7f7f */
 #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f) */
 #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
+#define rTMP	r12
 
 	dcbt	0,rSTR1
 	nop
@@ -78,13 +79,45 @@ L(g1):	add	rTMP,rFEFE,rWORD1
 /* OK. We've hit the end of the string. We need to be careful that
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	slwi	rTMP, rTMP, 1
+	addi    rTMP2, rTMP, -1
+	andc    rTMP2, rTMP2, rTMP
+	and	rWORD2, rWORD2, rTMP2		/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rldimi	rTMP2, rWORD2, 24, 32
+	rldimi	rTMP, rWORD1, 24, 32
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr
+	ori	rRTN, rTMP2, 1
+	blr
+
+L(different):
+	lwz	rWORD1, -4(rSTR1)
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rldimi	rTMP2, rWORD2, 24, 32
+	rldimi	rTMP, rWORD1, 24, 32
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr
+	ori	rRTN, rTMP2, 1
+	blr
 
+#else
 L(endstring):
 	and	rTMP,r7F7F,rWORD1
 	beq	cr1,L(equal)
 	add	rTMP,rTMP,r7F7F
 	xor.	rBITDIF,rWORD1,rWORD2
-
 	andc	rNEG,rNEG,rTMP
 	blt	L(highbit)
 	cntlzw	rBITDIF,rBITDIF
@@ -92,28 +125,20 @@ L(endstring):
 	addi	rNEG,rNEG,7
 	cmpw	cr1,rNEG,rBITDIF
 	sub	rRTN,rWORD1,rWORD2
-	blt	cr1,L(equal)
-	srawi	rRTN,rRTN,31
-	ori	rRTN,rRTN,1
-	blr
+	bgelr	cr1
 L(equal):
 	li	rRTN,0
 	blr
 
 L(different):
-	lwzu	rWORD1,-4(rSTR1)
+	lwz	rWORD1,-4(rSTR1)
 	xor.	rBITDIF,rWORD1,rWORD2
 	sub	rRTN,rWORD1,rWORD2
-	blt	L(highbit)
-	srawi	rRTN,rRTN,31
-	ori	rRTN,rRTN,1
-	blr
+	bgelr
 L(highbit):
-	srwi	rWORD2,rWORD2,24
-	srwi	rWORD1,rWORD1,24
-	sub	rRTN,rWORD1,rWORD2
+	ori	rRTN, rWORD2, 1
 	blr
-
+#endif
 
 /* Oh well. In this case, we just do a byte-by-byte comparison.  */
 	.align	4
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strnlen.S b/libc/sysdeps/powerpc/powerpc32/power7/strnlen.S
index ed088366a..eb52afd1a 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strnlen.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strnlen.S
@@ -28,51 +28,47 @@ ENTRY (__strnlen)
 	add	r7,r3,r4      /* Calculate the last acceptable address.  */
 	cmplwi	r4,16
 	li	r0,0	      /* Word with null chars.  */
+	addi	r7,r7,-1
 	ble	L(small_range)
 
-	cmplw	cr7,r3,r7     /* Is the address equal or less than r3?  If
-				 it's equal or less, it means size is either 0
-				 or a negative number.  */
-	ble	cr7,L(proceed)
-
-	li	r7,-1	      /* Make r11 the biggest if r4 <= 0.  */
-L(proceed):
 	rlwinm	r6,r3,3,27,28 /* Calculate padding.  */
 	lwz	r12,0(r8)     /* Load word from memory.  */
 	cmpb	r10,r12,r0    /* Check for null bytes in DWORD1.  */
+#ifdef __LITTLE_ENDIAN__
+	srw	r10,r10,r6
+	slw	r10,r10,r6
+#else
 	slw	r10,r10,r6
 	srw	r10,r10,r6
+#endif
 	cmplwi	cr7,r10,0     /* If r10 == 0, no null's have been found.  */
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,4
-	cmplw	cr6,r9,r7
-	bge	cr6,L(end_max)
-
+	clrrwi	r7,r7,2       /* Address of last word.  */
 	mtcrf   0x01,r8
 	/* Are we now aligned to a doubleword boundary?  If so, skip to
 	   the main loop.  Otherwise, go through the alignment code.  */
 
 	bt	29,L(loop_setup)
 
-	/* Handle DWORD2 of pair.  */
+	/* Handle WORD2 of pair.  */
 	lwzu	r12,4(r8)
 	cmpb	r10,r12,r0
 	cmplwi	cr7,r10,0
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,4
-	cmplw	cr6,r9,r7
-	bge	cr6,L(end_max)
-
 L(loop_setup):
-	sub	r5,r7,r9
+	/* The last word we want to read in the loop below is the one
+	   containing the last byte of the string, ie. the word at
+	   (s + size - 1) & ~3, or r7.  The first word read is at
+	   r8 + 4, we read 2 * cnt words, so the last word read will
+	   be at r8 + 4 + 8 * cnt - 4.  Solving for cnt gives
+	   cnt = (r7 - r8) / 8  */
+	sub	r5,r7,r8
 	srwi	r6,r5,3	      /* Number of loop iterations.  */
 	mtctr	r6	      /* Setup the counter.  */
-	b	L(loop)
-	/* Main loop to look for the null byte backwards in the string.  Since
+
+	/* Main loop to look for the null byte in the string.  Since
 	   it's a small loop (< 8 instructions), align it to 32-bytes.  */
 	.p2align  5
 L(loop):
@@ -88,15 +84,18 @@ L(loop):
 	cmplwi	cr7,r5,0
 	bne	cr7,L(found)
 	bdnz	L(loop)
-	/* We're here because the counter reached 0, and that means we
-	   didn't have any matches for null in the whole range.  Just return
-	   the original size.  */
-	addi	r9,r8,4
-	cmplw	cr6,r9,r7
-	blt	cr6,L(loop_small)
+
+	/* We may have one more word to read.  */
+	cmplw	cr6,r8,r7
+	beq	cr6,L(end_max)
+
+	lwzu	r12,4(r8)
+	cmpb	r10,r12,r0
+	cmplwi	cr6,r10,0
+	bne	cr6,L(done)
 
 L(end_max):
-	sub	r3,r7,r3
+	mr	r3,r4
 	blr
 
 	/* OK, one (or both) of the words contains a null byte.  Check
@@ -121,49 +120,56 @@ L(found):
 	   We need to make sure the null char is *before* the end of the
 	   range.  */
 L(done):
-	cntlzw	r0,r10	      /* Count leading zeroes before the match.  */
-	srwi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
-	add	r9,r8,r0
-	sub	r6,r9,r3      /* Length until the match.  */
-	cmplw	r9,r7
-	bgt	L(end_max)
-	mr	r3,r6
-	blr
-
-	.align	4
-L(zero):
-	li	r3,0
+#ifdef __LITTLE_ENDIAN__
+	addi	r0,r10,-1
+	andc	r0,r0,r10
+	popcntw	r0,r0
+#else
+	cntlzw	r0,r10	      /* Count leading zeros before the match.  */
+#endif
+	sub	r3,r8,r3
+	srwi	r0,r0,3	      /* Convert leading/trailing zeros to bytes.  */
+	add	r3,r3,r0      /* Length until the match.  */
+	cmplw	r3,r4
+	blelr
+	mr	r3,r4
 	blr
 
-/* Deals with size <= 32.  */
+/* Deals with size <= 16.  */
 	.align	4
 L(small_range):
 	cmplwi	r4,0
-	beq	L(zero)
+	beq	L(end_max)
+
+	clrrwi	r7,r7,2       /* Address of last word.  */
 
 	rlwinm	r6,r3,3,27,28 /* Calculate padding.  */
 	lwz	r12,0(r8)     /* Load word from memory.  */
 	cmpb	r10,r12,r0    /* Check for null bytes in WORD1.  */
+#ifdef __LITTLE_ENDIAN__
+	srw	r10,r10,r6
+	slw	r10,r10,r6
+#else
 	slw	r10,r10,r6
 	srw	r10,r10,r6
+#endif
 	cmplwi	cr7,r10,0
 	bne	cr7,L(done)
 
-	addi    r9,r8,4
-	cmplw	r9,r7
-	bge	L(end_max)
-	b	L(loop_small)
+	cmplw	r8,r7
+	beq	L(end_max)
 
 	.p2align  5
 L(loop_small):
 	lwzu	r12,4(r8)
 	cmpb	r10,r12,r0
-	addi	r9,r8,4
 	cmplwi	cr6,r10,0
 	bne	cr6,L(done)
-	cmplw	r9,r7
-	bge	L(end_max)
-	b	L(loop_small)
+	cmplw	r8,r7
+	bne	L(loop_small)
+	mr	r3,r4
+	blr
+
 END (__strnlen)
 weak_alias (__strnlen, strnlen)
 libc_hidden_builtin_def (strnlen)
diff --git a/libc/sysdeps/powerpc/powerpc32/setjmp-common.S b/libc/sysdeps/powerpc/powerpc32/setjmp-common.S
index 60b0026fa..3fb65b5f7 100644
--- a/libc/sysdeps/powerpc/powerpc32/setjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc32/setjmp-common.S
@@ -24,6 +24,11 @@
 # include <jmpbuf-offsets.h>
 #endif
 
+#if defined __SPE__ || (defined __NO_FPRS__ && !defined _SOFT_FLOAT)
+# define SAVE_GP(N)	evstdd r##N,((JB_FPRS+((N)-14)*2)*4)(3)
+#else
+# define SAVE_GP(N)	stw r##N,((JB_GPRS+(N)-14)*4)(3)
+#endif
 
 ENTRY (__sigsetjmp)
 
@@ -35,31 +40,31 @@ ENTRY (__sigsetjmp)
 	stw  r1,(JB_GPR1*4)(3)
 #endif
 	mflr r0
-	stw  r14,((JB_GPRS+0)*4)(3)
+	SAVE_GP (14)
 #ifdef PTR_MANGLE
 	PTR_MANGLE2 (r0, r10)
 	li   r10,0
 #endif
 	stw  r0,(JB_LR*4)(3)
-	stw  r15,((JB_GPRS+1)*4)(3)
+	SAVE_GP (15)
 	mfcr r0
-	stw  r16,((JB_GPRS+2)*4)(3)
+	SAVE_GP (16)
 	stw  r0,(JB_CR*4)(3)
-	stw  r17,((JB_GPRS+3)*4)(3)
-	stw  r18,((JB_GPRS+4)*4)(3)
-	stw  r19,((JB_GPRS+5)*4)(3)
-	stw  r20,((JB_GPRS+6)*4)(3)
-	stw  r21,((JB_GPRS+7)*4)(3)
-	stw  r22,((JB_GPRS+8)*4)(3)
-	stw  r23,((JB_GPRS+9)*4)(3)
-	stw  r24,((JB_GPRS+10)*4)(3)
-	stw  r25,((JB_GPRS+11)*4)(3)
-	stw  r26,((JB_GPRS+12)*4)(3)
-	stw  r27,((JB_GPRS+13)*4)(3)
-	stw  r28,((JB_GPRS+14)*4)(3)
-	stw  r29,((JB_GPRS+15)*4)(3)
-	stw  r30,((JB_GPRS+16)*4)(3)
-	stw  r31,((JB_GPRS+17)*4)(3)
+	SAVE_GP (17)
+	SAVE_GP (18)
+	SAVE_GP (19)
+	SAVE_GP (20)
+	SAVE_GP (21)
+	SAVE_GP (22)
+	SAVE_GP (23)
+	SAVE_GP (24)
+	SAVE_GP (25)
+	SAVE_GP (26)
+	SAVE_GP (27)
+	SAVE_GP (28)
+	SAVE_GP (29)
+	SAVE_GP (30)
+	SAVE_GP (31)
 #if defined NOT_IN_libc && defined IS_IN_rtld
 	li   r3,0
 	blr
diff --git a/libc/sysdeps/powerpc/powerpc32/setjmp.S b/libc/sysdeps/powerpc/powerpc32/setjmp.S
index 8a8cf0d6e..49b64ecf0 100644
--- a/libc/sysdeps/powerpc/powerpc32/setjmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/setjmp.S
@@ -25,7 +25,7 @@
 
 #else /* !NOT_IN_libc */
 /* Build a versioned object for libc.  */
-default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4)
 # define __sigsetjmp __vmx__sigsetjmp
 # define __sigjmp_save __vmx__sigjmp_save
 # include "setjmp-common.S"
@@ -35,7 +35,7 @@ default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
 #  undef __sigsetjmp
 #  undef __sigjmp_save
 #  undef JB_SIZE
-symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.0)
+compat_symbol (libc, __novmx__sigsetjmp, __sigsetjmp, GLIBC_2_0)
 #  define __sigsetjmp __novmx__sigsetjmp
 #  define __sigjmp_save __novmx__sigjmp_save
 #  include "setjmp-common.S"
diff --git a/libc/sysdeps/powerpc/powerpc32/stackguard-macros.h b/libc/sysdeps/powerpc/powerpc32/stackguard-macros.h
index 839f6a4b9..b3d0af830 100644
--- a/libc/sysdeps/powerpc/powerpc32/stackguard-macros.h
+++ b/libc/sysdeps/powerpc/powerpc32/stackguard-macros.h
@@ -2,3 +2,13 @@
 
 #define STACK_CHK_GUARD \
   ({ uintptr_t x; asm ("lwz %0,-28680(2)" : "=r" (x)); x; })
+
+#define POINTER_CHK_GUARD \
+  ({												\
+     uintptr_t x;										\
+     asm ("lwz %0,%1(2)"									\
+	  : "=r" (x)										\
+	  : "i" (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t))	\
+         );											\
+     x;												\
+   })
diff --git a/libc/sysdeps/powerpc/powerpc32/stpcpy.S b/libc/sysdeps/powerpc/powerpc32/stpcpy.S
index 03c6dddc3..7e106e0e6 100644
--- a/libc/sysdeps/powerpc/powerpc32/stpcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/stpcpy.S
@@ -62,7 +62,22 @@ L(g2):	add	rTMP, rFEFE, rWORD
 
 	mr	rALT, rWORD
 /* We've hit the end of the string.  Do the rest byte-by-byte.  */
-L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
+L(g1):
+#ifdef __LITTLE_ENDIAN__
+	rlwinm.	rTMP, rALT, 0, 24, 31
+	stbu	rALT, 4(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 24, 24, 31
+	stbu	rTMP, 1(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 16, 24, 31
+	stbu	rTMP, 1(rDEST)
+	beqlr-
+	rlwinm	rTMP, rALT, 8, 24, 31
+	stbu	rTMP, 1(rDEST)
+	blr
+#else
+	rlwinm.	rTMP, rALT, 8, 24, 31
 	stbu	rTMP, 4(rDEST)
 	beqlr-
 	rlwinm.	rTMP, rALT, 16, 24, 31
@@ -73,6 +88,7 @@ L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
 	beqlr-
 	stbu	rALT, 1(rDEST)
 	blr
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte copy.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc32/strchr.S b/libc/sysdeps/powerpc/powerpc32/strchr.S
index c9952eecc..605056577 100644
--- a/libc/sysdeps/powerpc/powerpc32/strchr.S
+++ b/libc/sysdeps/powerpc/powerpc32/strchr.S
@@ -36,6 +36,8 @@ ENTRY (strchr)
 #define rIGN	r10	/* number of bits we should ignore in the first word */
 #define rMASK	r11	/* mask with the bits to ignore set to 0 */
 #define rTMP3	r12
+#define rTMP4	rIGN
+#define rTMP5	rMASK
 
 
 	rlwimi	rCHR, rCHR, 8, 16, 23
@@ -49,64 +51,93 @@ ENTRY (strchr)
 	addi	r7F7F, r7F7F, 0x7f7f
 /* Test the first (partial?) word.  */
 	lwz	rWORD, 0(rSTR)
+#ifdef __LITTLE_ENDIAN__
+	slw	rMASK, rMASK, rIGN
+#else
 	srw	rMASK, rMASK, rIGN
+#endif
 	orc	rWORD, rWORD, rMASK
 	add	rTMP1, rFEFE, rWORD
 	nor	rTMP2, r7F7F, rWORD
-	and.	rTMP1, rTMP1, rTMP2
+	and.	rTMP4, rTMP1, rTMP2
 	xor	rTMP3, rCHR, rWORD
 	orc	rTMP3, rTMP3, rMASK
 	b	L(loopentry)
 
 /* The loop.  */
 
-L(loop):lwzu rWORD, 4(rSTR)
-	and.	rTMP1, rTMP1, rTMP2
+L(loop):
+	lwzu	rWORD, 4(rSTR)
+	and.	rTMP5, rTMP1, rTMP2
 /* Test for 0.	*/
-	add	rTMP1, rFEFE, rWORD
-	nor	rTMP2, r7F7F, rWORD
+	add	rTMP1, rFEFE, rWORD /* x - 0x01010101.  */
+	nor	rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080.  */
 	bne	L(foundit)
-	and.	rTMP1, rTMP1, rTMP2
+	and.	rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080.  */
 /* Start test for the bytes we're looking for.  */
 	xor	rTMP3, rCHR, rWORD
 L(loopentry):
 	add	rTMP1, rFEFE, rTMP3
 	nor	rTMP2, r7F7F, rTMP3
 	beq	L(loop)
+
 /* There is a zero byte in the word, but may also be a matching byte (either
    before or after the zero byte).  In fact, we may be looking for a
-   zero byte, in which case we return a match.  We guess that this hasn't
-   happened, though.  */
-L(missed):
-	and.	rTMP1, rTMP1, rTMP2
+   zero byte, in which case we return a match.  */
+	and.	rTMP5, rTMP1, rTMP2
 	li	rRTN, 0
 	beqlr
-/* It did happen. Decide which one was first...
-   I'm not sure if this is actually faster than a sequence of
-   rotates, compares, and branches (we use it anyway because it's shorter).  */
+/* At this point:
+   rTMP5 bytes are 0x80 for each match of c, 0 otherwise.
+   rTMP4 bytes are 0x80 for each match of 0, 0 otherwise.
+   But there may be false matches in the next most significant byte from
+   a true match due to carries.  This means we need to recalculate the
+   matches using a longer method for big-endian.  */
+#ifdef __LITTLE_ENDIAN__
+	addi	rTMP1, rTMP5, -1
+	andc	rTMP1, rTMP1, rTMP5
+	cntlzw	rCLZB, rTMP1
+	addi	rTMP2, rTMP4, -1
+	andc	rTMP2, rTMP2, rTMP4
+	cmplw	rTMP1, rTMP2
+	bgtlr
+	subfic	rCLZB, rCLZB, 32-7
+#else
+/* I think we could reduce this by two instructions by keeping the "nor"
+   results from the loop for reuse here.  See strlen.S tail.  Similarly
+   one instruction could be pruned from L(foundit).  */
 	and	rFEFE, r7F7F, rWORD
-	or	rMASK, r7F7F, rWORD
+	or	rTMP5, r7F7F, rWORD
 	and	rTMP1, r7F7F, rTMP3
-	or	rIGN, r7F7F, rTMP3
+	or	rTMP4, r7F7F, rTMP3
 	add	rFEFE, rFEFE, r7F7F
 	add	rTMP1, rTMP1, r7F7F
-	nor	rWORD, rMASK, rFEFE
-	nor	rTMP2, rIGN, rTMP1
+	nor	rWORD, rTMP5, rFEFE
+	nor	rTMP2, rTMP4, rTMP1
+	cntlzw	rCLZB, rTMP2
 	cmplw	rWORD, rTMP2
 	bgtlr
-	cntlzw	rCLZB, rTMP2
+#endif
 	srwi	rCLZB, rCLZB, 3
 	add	rRTN, rSTR, rCLZB
 	blr
 
 L(foundit):
+#ifdef __LITTLE_ENDIAN__
+	addi	rTMP1, rTMP5, -1
+	andc	rTMP1, rTMP1, rTMP5
+	cntlzw	rCLZB, rTMP1
+	subfic	rCLZB, rCLZB, 32-7-32
+	srawi	rCLZB, rCLZB, 3
+#else
 	and	rTMP1, r7F7F, rTMP3
-	or	rIGN, r7F7F, rTMP3
+	or	rTMP4, r7F7F, rTMP3
 	add	rTMP1, rTMP1, r7F7F
-	nor	rTMP2, rIGN, rTMP1
+	nor	rTMP2, rTMP4, rTMP1
 	cntlzw	rCLZB, rTMP2
 	subi	rSTR, rSTR, 4
 	srwi	rCLZB, rCLZB, 3
+#endif
 	add	rRTN, rSTR, rCLZB
 	blr
 END (strchr)
diff --git a/libc/sysdeps/powerpc/powerpc32/strcmp.S b/libc/sysdeps/powerpc/powerpc32/strcmp.S
index 297ca3c1b..91d60c905 100644
--- a/libc/sysdeps/powerpc/powerpc32/strcmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/strcmp.S
@@ -24,7 +24,7 @@
 
 EALIGN (strcmp, 4, 0)
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -34,6 +34,7 @@ EALIGN (strcmp, 4, 0)
 #define r7F7F	r8	/* constant 0x7f7f7f7f */
 #define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f) */
 #define rBITDIF	r10	/* bits that differ in s1 & s2 words */
+#define rTMP	r11
 
 
 	or	rTMP, rSTR2, rSTR1
@@ -56,10 +57,45 @@ L(g1):	add	rTMP, rFEFE, rWORD1
 	and.	rTMP, rTMP, rNEG
 	cmpw	cr1, rWORD1, rWORD2
 	beq+	L(g0)
-L(endstring):
+
 /* OK. We've hit the end of the string. We need to be careful that
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	addi    rTMP2, rTMP, -1
+	andc    rTMP2, rTMP2, rTMP
+	rlwimi	rTMP2, rTMP2, 1, 0, 30
+	and	rWORD2, rWORD2, rTMP2		/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rlwimi	rTMP2, rWORD2, 24, 0, 7
+	rlwimi	rTMP, rWORD1, 24, 0, 7
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr+
+	ori	rRTN, rTMP2, 1
+	blr
+
+L(different):
+	lwz	rWORD1, -4(rSTR1)
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rlwimi	rTMP2, rWORD2, 24, 0, 7
+	rlwimi	rTMP, rWORD1, 24, 0, 7
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr+
+	ori	rRTN, rTMP2, 1
+	blr
+
+#else
+L(endstring):
 	and	rTMP, r7F7F, rWORD1
 	beq	cr1, L(equal)
 	add	rTMP, rTMP, r7F7F
@@ -84,7 +120,7 @@ L(different):
 L(highbit):
 	ori	rRTN, rWORD2, 1
 	blr
-
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc32/strcpy.S b/libc/sysdeps/powerpc/powerpc32/strcpy.S
index 4ae577dbb..e938cc42a 100644
--- a/libc/sysdeps/powerpc/powerpc32/strcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/strcpy.S
@@ -62,7 +62,22 @@ L(g2):	add	rTMP, rFEFE, rWORD
 
 	mr	rALT, rWORD
 /* We've hit the end of the string.  Do the rest byte-by-byte.  */
-L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
+L(g1):
+#ifdef __LITTLE_ENDIAN__
+	rlwinm.	rTMP, rALT, 0, 24, 31
+	stb	rALT, 4(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 24, 24, 31
+	stb	rTMP, 5(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 16, 24, 31
+	stb	rTMP, 6(rDEST)
+	beqlr-
+	rlwinm	rTMP, rALT, 8, 24, 31
+	stb	rTMP, 7(rDEST)
+	blr
+#else
+	rlwinm.	rTMP, rALT, 8, 24, 31
 	stb	rTMP, 4(rDEST)
 	beqlr-
 	rlwinm.	rTMP, rALT, 16, 24, 31
@@ -73,6 +88,7 @@ L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
 	beqlr-
 	stb	rALT, 7(rDEST)
 	blr
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte copy.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc32/strlen.S b/libc/sysdeps/powerpc/powerpc32/strlen.S
index 9a6eafc38..a7153ed7a 100644
--- a/libc/sysdeps/powerpc/powerpc32/strlen.S
+++ b/libc/sysdeps/powerpc/powerpc32/strlen.S
@@ -29,7 +29,12 @@
       1 is subtracted you get a value in the range 0x00-0x7f, none of which
       have their high bit set. The expression here is
       (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
-      there were no 0x00 bytes in the word.
+      there were no 0x00 bytes in the word.  You get 0x80 in bytes that
+      match, but possibly false 0x80 matches in the next more significant
+      byte to a true match due to carries.  For little-endian this is
+      of no consequence since the least significant match is the one
+      we're interested in, but big-endian needs method 2 to find which
+      byte matches.
 
    2) Given a word 'x', we can test to see _which_ byte was zero by
       calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
@@ -72,7 +77,7 @@
 
 ENTRY (strlen)
 
-#define rTMP1	r0
+#define rTMP4	r0
 #define rRTN	r3	/* incoming STR arg, outgoing result */
 #define rSTR	r4	/* current string position */
 #define rPADN	r5	/* number of padding bits we prepend to the
@@ -82,9 +87,9 @@ ENTRY (strlen)
 #define rWORD1	r8	/* current string word */
 #define rWORD2	r9	/* next string word */
 #define rMASK	r9	/* mask for first string word */
-#define rTMP2	r10
-#define rTMP3	r11
-#define rTMP4	r12
+#define rTMP1	r10
+#define rTMP2	r11
+#define rTMP3	r12
 
 
 	clrrwi	rSTR, rRTN, 2
@@ -93,15 +98,20 @@ ENTRY (strlen)
 	lwz	rWORD1, 0(rSTR)
 	li	rMASK, -1
 	addi	r7F7F, r7F7F, 0x7f7f
-/* That's the setup done, now do the first pair of words.
-   We make an exception and use method (2) on the first two words, to reduce
-   overhead.  */
+/* We use method (2) on the first two words, because rFEFE isn't
+   required which reduces setup overhead.  Also gives a faster return
+   for small strings on big-endian due to needing to recalculate with
+   method (2) anyway.  */
+#ifdef __LITTLE_ENDIAN__
+	slw	rMASK, rMASK, rPADN
+#else
 	srw	rMASK, rMASK, rPADN
+#endif
 	and	rTMP1, r7F7F, rWORD1
 	or	rTMP2, r7F7F, rWORD1
 	add	rTMP1, rTMP1, r7F7F
-	nor	rTMP1, rTMP2, rTMP1
-	and.	rWORD1, rTMP1, rMASK
+	nor	rTMP3, rTMP2, rTMP1
+	and.	rTMP3, rTMP3, rMASK
 	mtcrf	0x01, rRTN
 	bne	L(done0)
 	lis	rFEFE, -0x101
@@ -110,11 +120,12 @@ ENTRY (strlen)
 	bt	29, L(loop)
 
 /* Handle second word of pair.  */
+/* Perhaps use method (1) here for little-endian, saving one instruction?  */
 	lwzu	rWORD1, 4(rSTR)
 	and	rTMP1, r7F7F, rWORD1
 	or	rTMP2, r7F7F, rWORD1
 	add	rTMP1, rTMP1, r7F7F
-	nor.	rWORD1, rTMP2, rTMP1
+	nor.	rTMP3, rTMP2, rTMP1
 	bne	L(done0)
 
 /* The loop.  */
@@ -128,28 +139,52 @@ L(loop):
 	add	rTMP3, rFEFE, rWORD2
 	nor	rTMP4, r7F7F, rWORD2
 	bne	L(done1)
-	and.	rTMP1, rTMP3, rTMP4
+	and.	rTMP3, rTMP3, rTMP4
 	beq	L(loop)
 
+#ifndef __LITTLE_ENDIAN__
 	and	rTMP1, r7F7F, rWORD2
 	add	rTMP1, rTMP1, r7F7F
-	andc	rWORD1, rTMP4, rTMP1
+	andc	rTMP3, rTMP4, rTMP1
 	b	L(done0)
 
 L(done1):
 	and	rTMP1, r7F7F, rWORD1
 	subi	rSTR, rSTR, 4
 	add	rTMP1, rTMP1, r7F7F
-	andc	rWORD1, rTMP2, rTMP1
+	andc	rTMP3, rTMP2, rTMP1
 
 /* When we get to here, rSTR points to the first word in the string that
-   contains a zero byte, and the most significant set bit in rWORD1 is in that
-   byte.  */
+   contains a zero byte, and rTMP3 has 0x80 for bytes that are zero,
+   and 0x00 otherwise.  */
 L(done0):
-	cntlzw	rTMP3, rWORD1
+	cntlzw	rTMP3, rTMP3
 	subf	rTMP1, rRTN, rSTR
 	srwi	rTMP3, rTMP3, 3
 	add	rRTN, rTMP1, rTMP3
 	blr
+#else
+
+L(done0):
+	addi	rTMP1, rTMP3, -1	/* Form a mask from trailing zeros.  */
+	andc	rTMP1, rTMP1, rTMP3
+	cntlzw	rTMP1, rTMP1		/* Count bits not in the mask.  */
+	subf	rTMP3, rRTN, rSTR
+	subfic	rTMP1, rTMP1, 32-7
+	srwi	rTMP1, rTMP1, 3
+	add	rRTN, rTMP1, rTMP3
+	blr
+
+L(done1):
+	addi	rTMP3, rTMP1, -1
+	andc	rTMP3, rTMP3, rTMP1
+	cntlzw	rTMP3, rTMP3
+	subf	rTMP1, rRTN, rSTR
+	subfic	rTMP3, rTMP3, 32-7-32
+	srawi	rTMP3, rTMP3, 3
+	add	rRTN, rTMP1, rTMP3
+	blr
+#endif
+
 END (strlen)
 libc_hidden_builtin_def (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc32/strncmp.S b/libc/sysdeps/powerpc/powerpc32/strncmp.S
index fa345d293..e36a160a8 100644
--- a/libc/sysdeps/powerpc/powerpc32/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/strncmp.S
@@ -24,7 +24,7 @@
 
 EALIGN (strncmp, 4, 0)
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -35,6 +35,7 @@ EALIGN (strncmp, 4, 0)
 #define r7F7F	r9	/* constant 0x7f7f7f7f */
 #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f) */
 #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
+#define rTMP	r12
 
 	dcbt	0,rSTR1
 	or	rTMP, rSTR2, rSTR1
@@ -73,12 +74,45 @@ L(g1):	add	rTMP, rFEFE, rWORD1
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
 
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	slwi	rTMP, rTMP, 1
+	addi    rTMP2, rTMP, -1
+	andc    rTMP2, rTMP2, rTMP
+	and	rWORD2, rWORD2, rTMP2		/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rlwimi	rTMP2, rWORD2, 24, 0, 7
+	rlwimi	rTMP, rWORD1, 24, 0, 7
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr+
+	ori	rRTN, rTMP2, 1
+	blr
+
+L(different):
+	lwz	rWORD1, -4(rSTR1)
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rlwimi	rTMP2, rWORD2, 24, 0, 7
+	rlwimi	rTMP, rWORD1, 24, 0, 7
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr+
+	ori	rRTN, rTMP2, 1
+	blr
+
+#else
 L(endstring):
 	and	rTMP, r7F7F, rWORD1
 	beq	cr1, L(equal)
 	add	rTMP, rTMP, r7F7F
 	xor.	rBITDIF, rWORD1, rWORD2
-
 	andc	rNEG, rNEG, rTMP
 	blt-	L(highbit)
 	cntlzw	rBITDIF, rBITDIF
@@ -86,28 +120,20 @@ L(endstring):
 	addi	rNEG, rNEG, 7
 	cmpw	cr1, rNEG, rBITDIF
 	sub	rRTN, rWORD1, rWORD2
-	blt-	cr1, L(equal)
-	srawi	rRTN, rRTN, 31
-	ori	rRTN, rRTN, 1
-	blr
+	bgelr+	cr1
 L(equal):
 	li	rRTN, 0
 	blr
 
 L(different):
-	lwzu	rWORD1, -4(rSTR1)
+	lwz	rWORD1, -4(rSTR1)
 	xor.	rBITDIF, rWORD1, rWORD2
 	sub	rRTN, rWORD1, rWORD2
-	blt-	L(highbit)
-	srawi	rRTN, rRTN, 31
-	ori	rRTN, rRTN, 1
-	blr
+	bgelr+
 L(highbit):
-	srwi	rWORD2, rWORD2, 24
-	srwi	rWORD1, rWORD1, 24
-	sub	rRTN, rWORD1, rWORD2
+	ori	rRTN, rWORD2, 1
 	blr
-
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S b/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S
index 70c370439..4f1e3c88d 100644
--- a/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S
@@ -57,7 +57,7 @@ ENTRY (__longjmp)
 	beq	L(no_vmx)
 	la	r5,((JB_VRS)*8)(3)
 	andi.	r6,r5,0xf
-	lwz	r0,((JB_VRSAVE)*8)(3)
+	lwz	r0,((JB_VRSAVE)*8)(3)	/* 32-bit VRSAVE.  */
 	mtspr	VRSAVE,r0
 	beq+	L(aligned_restore_vmx)
 	addi    r6,r5,16
@@ -153,7 +153,7 @@ L(no_vmx):
 	lfd fp21,((JB_FPRS+7)*8)(r3)
 	ld r22,((JB_GPRS+8)*8)(r3)
 	lfd fp22,((JB_FPRS+8)*8)(r3)
-	ld r0,(JB_CR*8)(r3)
+	lwz r0,((JB_CR*8)+4)(r3)	/* 32-bit CR.  */
 	ld r23,((JB_GPRS+9)*8)(r3)
 	lfd fp23,((JB_FPRS+9)*8)(r3)
 	ld r24,((JB_GPRS+10)*8)(r3)
diff --git a/libc/sysdeps/powerpc/powerpc64/dl-machine.h b/libc/sysdeps/powerpc/powerpc64/dl-machine.h
index 059fdafd5..18cf15738 100644
--- a/libc/sysdeps/powerpc/powerpc64/dl-machine.h
+++ b/libc/sysdeps/powerpc/powerpc64/dl-machine.h
@@ -561,6 +561,12 @@ elf_machine_rela (struct link_map *map,
   Elf64_Addr *const reloc_addr = reloc_addr_arg;
   const int r_type = ELF64_R_TYPE (reloc->r_info);
   const Elf64_Sym *const refsym = sym;
+  union unaligned
+    {
+      uint16_t u2;
+      uint32_t u4;
+      uint64_t u8;
+    } __attribute__ ((__packed__));
 
   if (r_type == R_PPC64_RELATIVE)
     {
@@ -741,23 +747,11 @@ elf_machine_rela (struct link_map *map,
       return;
 
     case R_PPC64_UADDR64:
-      /* We are big-endian.  */
-      ((char *) reloc_addr_arg)[0] = (value >> 56) & 0xff;
-      ((char *) reloc_addr_arg)[1] = (value >> 48) & 0xff;
-      ((char *) reloc_addr_arg)[2] = (value >> 40) & 0xff;
-      ((char *) reloc_addr_arg)[3] = (value >> 32) & 0xff;
-      ((char *) reloc_addr_arg)[4] = (value >> 24) & 0xff;
-      ((char *) reloc_addr_arg)[5] = (value >> 16) & 0xff;
-      ((char *) reloc_addr_arg)[6] = (value >> 8) & 0xff;
-      ((char *) reloc_addr_arg)[7] = (value >> 0) & 0xff;
+      ((union unaligned *) reloc_addr)->u8 = value;
       return;
 
     case R_PPC64_UADDR32:
-      /* We are big-endian.  */
-      ((char *) reloc_addr_arg)[0] = (value >> 24) & 0xff;
-      ((char *) reloc_addr_arg)[1] = (value >> 16) & 0xff;
-      ((char *) reloc_addr_arg)[2] = (value >> 8) & 0xff;
-      ((char *) reloc_addr_arg)[3] = (value >> 0) & 0xff;
+      ((union unaligned *) reloc_addr)->u4 = value;
       return;
 
     case R_PPC64_ADDR32:
@@ -781,10 +775,8 @@ elf_machine_rela (struct link_map *map,
     case R_PPC64_UADDR16:
       if (dont_expect ((value + 0x8000) >= 0x10000))
 	_dl_reloc_overflow (map, "R_PPC64_UADDR16", reloc_addr, refsym);
-      /* We are big-endian.  */
-      ((char *) reloc_addr_arg)[0] = (value >> 8) & 0xff;
-      ((char *) reloc_addr_arg)[1] = (value >> 0) & 0xff;
-      break;
+      ((union unaligned *) reloc_addr)->u2 = value;
+      return;
 
     case R_PPC64_ADDR16_DS:
       if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0))
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
index 801af5d0a..45f71d7ac 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
@@ -19,8 +19,10 @@
 #include <sysdep.h>
 
 	.section	".toc","aw"
+	.p2align 3
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.long 0x4b000000
+	.long 0x0
 	.section	".text"
 
 EALIGN (__ceilf, 4, 0)
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_floorf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
index a0a22e7eb..e85b820b2 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
@@ -19,8 +19,10 @@
 #include <sysdep.h>
 
 	.section	".toc","aw"
+	.p2align 3
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.long 0x4b000000
+	.long 0x0
 	.section	".text"
 
 EALIGN (__floorf, 4, 0)
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S
index 876707c76..b1a2b8cd6 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S
@@ -26,8 +26,10 @@
 /* float [fp1] nearbyintf(float [fp1]) */
 
 	.section	".toc","aw"
+	.p2align 3
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.long 0x4b000000
+	.long 0x0
 	.section	".text"
 
 EALIGN (__nearbyintf, 4, 0)
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
index cb28ec748..188771742 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
@@ -19,8 +19,10 @@
 #include <sysdep.h>
 
 	.section	".toc","aw"
+	.p2align 3
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.long 0x4b000000
+	.long 0x0
 	.section	".text"
 
 EALIGN (__rintf, 4, 0)
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_roundf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
index 980a77bde..4f2c85163 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
@@ -19,10 +19,12 @@
 #include <sysdep.h>
 
 	.section	".toc","aw"
+	.p2align 3
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.long 0x4b000000
 .LC1:	/* 0.5 */
-	.tc FD_3f000000_0[TC],0x3f00000000000000
+	.long 0x3f000000
+
 	.section	".text"
 
 /* float [fp1] roundf  (float x [fp1])
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_truncf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
index 5ea5f3d04..b8fd05031 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
@@ -19,8 +19,10 @@
 #include <sysdep.h>
 
 	.section	".toc","aw"
+	.p2align 3
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.long 0x4b000000
+	.long 0x0
 	.section	".text"
 
 /* float [fp1] truncf (float x [fp1])
diff --git a/libc/sysdeps/powerpc/powerpc64/memcpy.S b/libc/sysdeps/powerpc/powerpc64/memcpy.S
index b8c4cc8b1..5fc7401c9 100644
--- a/libc/sysdeps/powerpc/powerpc64/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/memcpy.S
@@ -212,15 +212,28 @@ EALIGN (memcpy, 5, 0)
     blt   cr6,5f
     srdi  7,6,16
     bgt	  cr6,3f
+#ifdef __LITTLE_ENDIAN__
+    sth   7,0(3)
+#else
     sth   6,0(3)
+#endif
     b     7f
     .align  4
 3:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,24
+    stb   6,0(3)
+    sth   7,1(3)
+#else
     stb   7,0(3)
     sth   6,1(3)
+#endif
     b     7f
     .align  4
 5:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,8
+#endif
     stb   6,0(3)
 7:
     cmpldi	cr1,10,16
@@ -328,7 +341,11 @@ EALIGN (memcpy, 5, 0)
     ld    7,8(5)
     subfic  9,10,64
     beq   2f
+#ifdef __LITTLE_ENDIAN__
+    srd   0,6,10
+#else
     sld   0,6,10
+#endif
     cmpldi  11,1
     mr    6,7
     addi  4,4,-8
@@ -336,15 +353,25 @@ EALIGN (memcpy, 5, 0)
     b     1f
 2:  addi  5,5,8
     .align  4
+#ifdef __LITTLE_ENDIAN__
+0:  srd   0,6,10
+    sld   8,7,9
+#else
 0:  sld   0,6,10
     srd   8,7,9
+#endif
     cmpldi  11,2
     ld    6,8(5)
     or    0,0,8
     addi  11,11,-2
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srd   0,7,10
+1:  sld   8,6,9
+#else
     sld   0,7,10
 1:  srd   8,6,9
+#endif
     or    0,0,8
     beq   8f
     ld    7,16(5)
diff --git a/libc/sysdeps/powerpc/powerpc64/memset.S b/libc/sysdeps/powerpc/powerpc64/memset.S
index 6acf149c8..1027a592c 100644
--- a/libc/sysdeps/powerpc/powerpc64/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/memset.S
@@ -55,14 +55,14 @@ L(_memset):
 
 /* Align to doubleword boundary.  */
 	cmpldi	cr5, rLEN, 31
-	rlwimi	rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword.  */
+	insrdi	rCHR, rCHR, 8, 48	/* Replicate byte to halfword.  */
 	beq+	L(aligned2)
 	mtcrf	0x01, rMEMP0
 	subfic	rALIGN, rALIGN, 8
 	cror	28,30,31		/* Detect odd word aligned.  */
 	add	rMEMP, rMEMP, rALIGN
 	sub	rLEN, rLEN, rALIGN
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32	/* Replicate halfword to word.  */
 	bt	29, L(g4)
 /* Process the even word of doubleword.  */
 	bf+	31, L(g2)
@@ -84,14 +84,14 @@ L(g0):
 
 /* Handle the case of size < 31.  */
 L(aligned2):
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32	/* Replicate halfword to word.  */
 L(aligned):
 	mtcrf	0x01, rLEN
 	ble	cr5, L(medium)
 /* Align to 32-byte boundary.  */
 	andi.	rALIGN, rMEMP, 0x18
 	subfic	rALIGN, rALIGN, 0x20
-	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word. */
+	insrdi	rCHR, rCHR, 32, 0	/* Replicate word to double word. */
 	beq	L(caligned)
 	mtcrf	0x01, rALIGN
 	add	rMEMP, rMEMP, rALIGN
@@ -212,7 +212,7 @@ L(le4):
 /* Memset of 0-31 bytes.  */
 	.align 5
 L(medium):
-	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word.  */
+	insrdi	rCHR, rCHR, 32, 0	/* Replicate word to double word.  */
 	cmpldi	cr1, rLEN, 16
 L(medium_tail2):
 	add	rMEMP, rMEMP, rLEN
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S b/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S
index 69caedc9f..80d67c9aa 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S
@@ -1,4 +1,4 @@
-/* Optimized strcmp implementation for PowerPC64.
+/* Optimized memcmp implementation for PowerPC64.
    Copyright (C) 2003-2013 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -18,13 +18,14 @@
 
 #include <sysdep.h>
 
-/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
+/* int [r3] memcmp (const char *s1 [r3],
+		    const char *s2 [r4],
+		    size_t size [r5])  */
 
 	.machine power4
 EALIGN (memcmp, 4, 0)
 	CALL_MCOUNT 3
 
-#define rTMP	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -35,107 +36,127 @@ EALIGN (memcmp, 4, 0)
 #define rWORD4	r9	/* next word in s2 */
 #define rWORD5	r10	/* next word in s1 */
 #define rWORD6	r11	/* next word in s2 */
-#define rBITDIF	r12	/* bits that differ in s1 & s2 words */
 #define rWORD7	r30	/* next word in s1 */
 #define rWORD8	r31	/* next word in s2 */
 
-	xor	rTMP, rSTR2, rSTR1
+	xor	r0, rSTR2, rSTR1
 	cmpldi	cr6, rN, 0
 	cmpldi	cr1, rN, 12
-	clrldi.	rTMP, rTMP, 61
-	clrldi	rBITDIF, rSTR1, 61
-	cmpldi	cr5, rBITDIF, 0
+	clrldi.	r0, r0, 61
+	clrldi	r12, rSTR1, 61
+	cmpldi	cr5, r12, 0
 	beq-	cr6, L(zeroLength)
-	dcbt	0,rSTR1
-	dcbt	0,rSTR2
+	dcbt	0, rSTR1
+	dcbt	0, rSTR2
 /* If less than 8 bytes or not aligned, use the unaligned
    byte loop.  */
 	blt	cr1, L(bytealigned)
-	std	rWORD8,-8(r1)
-	cfi_offset(rWORD8,-8)
-	std	rWORD7,-16(r1)
-	cfi_offset(rWORD7,-16)
+	std	rWORD8, -8(r1)
+	cfi_offset(rWORD8, -8)
+	std	rWORD7, -16(r1)
+	cfi_offset(rWORD7, -16)
 	bne	L(unaligned)
 /* At this point we know both strings have the same alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    3 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then we are already double word
-   aligned and can perform the DWaligned loop.
+   of r12 to 0.  If r12 == 0 then we are already double word
+   aligned and can perform the DW aligned loop.
 
    Otherwise we know the two strings have the same alignment (but not
-   yet DW).  So we can force the string addresses to the next lower DW
-   boundary and special case this first DW word using shift left to
+   yet DW).  So we force the string addresses to the next lower DW
+   boundary and special case this first DW using shift left to
    eliminate bits preceding the first byte.  Since we want to join the
-   normal (DWaligned) compare loop, starting at the second double word,
+   normal (DW aligned) compare loop, starting at the second double word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first DW. This insures that the loop count is
-   correct and the first DW (shifted) is in the expected resister pair.  */
-	.align 4
+   versioning for the first DW. This ensures that the loop count is
+   correct and the first DW (shifted) is in the expected register pair.  */
+	.align	4
 L(samealignment):
 	clrrdi	rSTR1, rSTR1, 3
 	clrrdi	rSTR2, rSTR2, 3
 	beq	cr5, L(DWaligned)
-	add	rN, rN, rBITDIF
-	sldi	r11, rBITDIF, 3
-	srdi	rTMP, rN, 5	/* Divide by 32 */
-	andi.	rBITDIF, rN, 24	/* Get the DW remainder */
+	add	rN, rN, r12
+	sldi	rWORD6, r12, 3
+	srdi	r0, rN, 5	/* Divide by 32 */
+	andi.	r12, rN, 24	/* Get the DW remainder */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 0(rSTR1)
 	ld	rWORD2, 0(rSTR2)
-	cmpldi	cr1, rBITDIF, 16
+#endif
+	cmpldi	cr1, r12, 16
 	cmpldi	cr7, rN, 32
 	clrldi	rN, rN, 61
 	beq	L(dPs4)
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
 	bgt	cr1, L(dPs3)
 	beq	cr1, L(dPs2)
 
 /* Remainder is 8 */
-	.align 3
+	.align	3
 L(dsP1):
-	sld	rWORD5, rWORD1, r11
-	sld	rWORD6, rWORD2, r11
+	sld	rWORD5, rWORD1, rWORD6
+	sld	rWORD6, rWORD2, rWORD6
 	cmpld	cr5, rWORD5, rWORD6
 	blt	cr7, L(dP1x)
 /* Do something useful in this cycle since we have to branch anyway.  */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 8(rSTR1)
 	ld	rWORD2, 8(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 	b	L(dP1e)
 /* Remainder is 16 */
-	.align 4
+	.align	4
 L(dPs2):
-	sld	rWORD5, rWORD1, r11
-	sld	rWORD6, rWORD2, r11
+	sld	rWORD5, rWORD1, rWORD6
+	sld	rWORD6, rWORD2, rWORD6
 	cmpld	cr6, rWORD5, rWORD6
 	blt	cr7, L(dP2x)
 /* Do something useful in this cycle since we have to branch anyway.  */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD7, 8(rSTR1)
 	ld	rWORD8, 8(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
 	b	L(dP2e)
 /* Remainder is 24 */
-	.align 4
+	.align	4
 L(dPs3):
-	sld	rWORD3, rWORD1, r11
-	sld	rWORD4, rWORD2, r11
+	sld	rWORD3, rWORD1, rWORD6
+	sld	rWORD4, rWORD2, rWORD6
 	cmpld	cr1, rWORD3, rWORD4
 	b	L(dP3e)
 /* Count is a multiple of 32, remainder is 0 */
-	.align 4
+	.align	4
 L(dPs4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	sld	rWORD1, rWORD1, r11
-	sld	rWORD2, rWORD2, r11
-	cmpld	cr0, rWORD1, rWORD2
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	sld	rWORD1, rWORD1, rWORD6
+	sld	rWORD2, rWORD2, rWORD6
+	cmpld	cr7, rWORD1, rWORD2
 	b	L(dP4e)
 
 /* At this point we know both strings are double word aligned and the
    compare length is at least 8 bytes.  */
-	.align 4
+	.align	4
 L(DWaligned):
-	andi.	rBITDIF, rN, 24	/* Get the DW remainder */
-	srdi	rTMP, rN, 5	/* Divide by 32 */
-	cmpldi	cr1, rBITDIF, 16
+	andi.	r12, rN, 24	/* Get the DW remainder */
+	srdi	r0, rN, 5	/* Divide by 32 */
+	cmpldi	cr1, r12, 16
 	cmpldi	cr7, rN, 32
 	clrldi	rN, rN, 61
 	beq	L(dP4)
@@ -143,174 +164,343 @@ L(DWaligned):
 	beq	cr1, L(dP2)
 
 /* Remainder is 8 */
-	.align 4
+	.align	4
 L(dP1):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
 /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
    (8-15 byte compare), we want to use only volatile registers.  This
    means we can avoid restoring non-volatile registers since we did not
    change any on the early exit path.  The key here is the non-early
    exit path only cares about the condition code (cr5), not about which
    register pair was used.  */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 0(rSTR1)
 	ld	rWORD6, 0(rSTR2)
+#endif
 	cmpld	cr5, rWORD5, rWORD6
 	blt	cr7, L(dP1x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 8(rSTR1)
 	ld	rWORD2, 8(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 L(dP1e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 16(rSTR1)
 	ld	rWORD4, 16(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 24(rSTR1)
 	ld	rWORD6, 24(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
-	bne	cr5, L(dLcr5)
-	bne	cr0, L(dLcr0)
+	bne	cr5, L(dLcr5x)
+	bne	cr7, L(dLcr7x)
 
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ldu	rWORD7, 32(rSTR1)
 	ldu	rWORD8, 32(rSTR2)
+#endif
 	bne	cr1, L(dLcr1)
 	cmpld	cr5, rWORD7, rWORD8
 	bdnz	L(dLoop)
 	bne	cr6, L(dLcr6)
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	.align 3
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+	.align	3
 L(dP1x):
 	sldi.	r12, rN, 3
-	bne	cr5, L(dLcr5)
+	bne	cr5, L(dLcr5x)
 	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
 	bne	L(d00)
 	li	rRTN, 0
 	blr
 
 /* Remainder is 16 */
-	.align 4
+	.align	4
 L(dP2):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 0(rSTR1)
 	ld	rWORD6, 0(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
 	blt	cr7, L(dP2x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD7, 8(rSTR1)
 	ld	rWORD8, 8(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
 L(dP2e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 16(rSTR1)
 	ld	rWORD2, 16(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 24(rSTR1)
 	ld	rWORD4, 24(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
+#endif
 	bne	cr6, L(dLcr6)
 	bne	cr5, L(dLcr5)
 	b	L(dLoop2)
 /* Again we are on a early exit path (16-23 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
-	.align 4
+	.align	4
 L(dP2x):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 8(rSTR1)
 	ld	rWORD4, 8(rSTR2)
-	cmpld	cr5, rWORD3, rWORD4
+#endif
+	cmpld	cr1, rWORD3, rWORD4
 	sldi.	r12, rN, 3
-	bne	cr6, L(dLcr6)
+	bne	cr6, L(dLcr6x)
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
-	bne	cr5, L(dLcr5)
+#endif
+	bne	cr1, L(dLcr1x)
 	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
 	bne	L(d00)
 	li	rRTN, 0
 	blr
 
 /* Remainder is 24 */
-	.align 4
+	.align	4
 L(dP3):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 0(rSTR1)
 	ld	rWORD4, 0(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
 L(dP3e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 8(rSTR1)
 	ld	rWORD6, 8(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
 	blt	cr7, L(dP3x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD7, 16(rSTR1)
 	ld	rWORD8, 16(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 24(rSTR1)
 	ld	rWORD2, 24(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 16
 	addi	rSTR2, rSTR2, 16
+#endif
 	bne	cr1, L(dLcr1)
 	bne	cr6, L(dLcr6)
 	b	L(dLoop1)
 /* Again we are on a early exit path (24-31 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
-	.align 4
+	.align	4
 L(dP3x):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 16(rSTR1)
 	ld	rWORD2, 16(rSTR2)
-	cmpld	cr5, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 	sldi.	r12, rN, 3
-	bne	cr1, L(dLcr1)
+	bne	cr1, L(dLcr1x)
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 16
 	addi	rSTR2, rSTR2, 16
-	bne	cr6, L(dLcr6)
+#endif
+	bne	cr6, L(dLcr6x)
 	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
-	bne	cr5, L(dLcr5)
+	bne	cr7, L(dLcr7x)
 	bne	L(d00)
 	li	rRTN, 0
 	blr
 
 /* Count is a multiple of 32, remainder is 0 */
-	.align 4
+	.align	4
 L(dP4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 0(rSTR1)
 	ld	rWORD2, 0(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 L(dP4e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 8(rSTR1)
 	ld	rWORD4, 8(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 16(rSTR1)
 	ld	rWORD6, 16(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ldu	rWORD7, 24(rSTR1)
 	ldu	rWORD8, 24(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
-	bne	cr0, L(dLcr0)
+	bne	cr7, L(dLcr7)
 	bne	cr1, L(dLcr1)
 	bdz-	L(d24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
-	.align 4
+	.align	4
 L(dLoop):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 8(rSTR1)
 	ld	rWORD2, 8(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
 	bne	cr6, L(dLcr6)
 L(dLoop1):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 16(rSTR1)
 	ld	rWORD4, 16(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
 	bne	cr5, L(dLcr5)
 L(dLoop2):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 24(rSTR1)
 	ld	rWORD6, 24(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
-	bne	cr0, L(dLcr0)
+	bne	cr7, L(dLcr7)
 L(dLoop3):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ldu	rWORD7, 32(rSTR1)
 	ldu	rWORD8, 32(rSTR2)
+#endif
 	bne-	cr1, L(dLcr1)
-	cmpld	cr0, rWORD1, rWORD2
+	cmpld	cr7, rWORD1, rWORD2
 	bdnz+	L(dLoop)
 
 L(dL4):
@@ -320,7 +510,7 @@ L(dL4):
 	bne	cr5, L(dLcr5)
 	cmpld	cr5, rWORD7, rWORD8
 L(d44):
-	bne	cr0, L(dLcr0)
+	bne	cr7, L(dLcr7)
 L(d34):
 	bne	cr1, L(dLcr1)
 L(d24):
@@ -329,60 +519,74 @@ L(d14):
 	sldi.	r12, rN, 3
 	bne	cr5, L(dLcr5)
 L(d04):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
 	beq	L(zeroLength)
 /* At this point we have a remainder of 1 to 7 bytes to compare.  Since
    we are aligned it is safe to load the whole double word, and use
    shift right double to eliminate bits beyond the compare length.  */
 L(d00):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 8(rSTR1)
 	ld	rWORD2, 8(rSTR2)
+#endif
 	srd	rWORD1, rWORD1, rN
 	srd	rWORD2, rWORD2, rN
-	cmpld	cr5, rWORD1, rWORD2
- 	bne	cr5, L(dLcr5x)
+	cmpld	cr7, rWORD1, rWORD2
+	bne	cr7, L(dLcr7x)
 	li	rRTN, 0
 	blr
-	.align 4
-L(dLcr0):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+
+	.align	4
+L(dLcr7):
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+L(dLcr7x):
 	li	rRTN, 1
-	bgtlr	cr0
+	bgtlr	cr7
 	li	rRTN, -1
 	blr
-	.align 4
+	.align	4
 L(dLcr1):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+L(dLcr1x):
 	li	rRTN, 1
 	bgtlr	cr1
 	li	rRTN, -1
 	blr
-	.align 4
+	.align	4
 L(dLcr6):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+L(dLcr6x):
 	li	rRTN, 1
 	bgtlr	cr6
 	li	rRTN, -1
 	blr
-	.align 4
+	.align	4
 L(dLcr5):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 L(dLcr5x):
 	li	rRTN, 1
 	bgtlr	cr5
 	li	rRTN, -1
 	blr
 
-	.align 4
+	.align	4
 L(bytealigned):
-	mtctr   rN	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	rN	/* Power4 wants mtctr 1st in dispatch group */
+#if 0
+/* Huh?  We've already branched on cr6!  */
 	beq-	cr6, L(zeroLength)
+#endif
 
 /* We need to prime this loop.  This loop is swing modulo scheduled
    to avoid pipe delays.  The dependent instruction latencies (load to
@@ -397,7 +601,7 @@ L(bytealigned):
 	lbz	rWORD1, 0(rSTR1)
 	lbz	rWORD2, 0(rSTR2)
 	bdz-	L(b11)
-	cmpld	cr0, rWORD1, rWORD2
+	cmpld	cr7, rWORD1, rWORD2
 	lbz	rWORD3, 1(rSTR1)
 	lbz	rWORD4, 1(rSTR2)
 	bdz-	L(b12)
@@ -405,11 +609,11 @@ L(bytealigned):
 	lbzu	rWORD5, 2(rSTR1)
 	lbzu	rWORD6, 2(rSTR2)
 	bdz-	L(b13)
-	.align 4
+	.align	4
 L(bLoop):
 	lbzu	rWORD1, 1(rSTR1)
 	lbzu	rWORD2, 1(rSTR2)
-	bne-	cr0, L(bLcr0)
+	bne-	cr7, L(bLcr7)
 
 	cmpld	cr6, rWORD5, rWORD6
 	bdz-	L(b3i)
@@ -418,7 +622,7 @@ L(bLoop):
 	lbzu	rWORD4, 1(rSTR2)
 	bne-	cr1, L(bLcr1)
 
-	cmpld	cr0, rWORD1, rWORD2
+	cmpld	cr7, rWORD1, rWORD2
 	bdz-	L(b2i)
 
 	lbzu	rWORD5, 1(rSTR1)
@@ -435,23 +639,23 @@ L(bLoop):
    tested.  In this case we must complete the pending operations
    before returning.  */
 L(b1i):
-	bne-	cr0, L(bLcr0)
+	bne-	cr7, L(bLcr7)
 	bne-	cr1, L(bLcr1)
 	b	L(bx56)
-	.align 4
+	.align	4
 L(b2i):
 	bne-	cr6, L(bLcr6)
-	bne-	cr0, L(bLcr0)
+	bne-	cr7, L(bLcr7)
 	b	L(bx34)
-	.align 4
+	.align	4
 L(b3i):
 	bne-	cr1, L(bLcr1)
 	bne-	cr6, L(bLcr6)
 	b	L(bx12)
-	.align 4
-L(bLcr0):
+	.align	4
+L(bLcr7):
 	li	rRTN, 1
-	bgtlr	cr0
+	bgtlr	cr7
 	li	rRTN, -1
 	blr
 L(bLcr1):
@@ -466,14 +670,14 @@ L(bLcr6):
 	blr
 
 L(b13):
-	bne-	cr0, L(bx12)
+	bne-	cr7, L(bx12)
 	bne-	cr1, L(bx34)
 L(bx56):
 	sub	rRTN, rWORD5, rWORD6
 	blr
 	nop
 L(b12):
-	bne-	cr0, L(bx12)
+	bne-	cr7, L(bx12)
 L(bx34):
 	sub	rRTN, rWORD3, rWORD4
 	blr
@@ -481,101 +685,106 @@ L(b11):
 L(bx12):
 	sub	rRTN, rWORD1, rWORD2
 	blr
-	.align 4
-L(zeroLengthReturn):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	.align	4
 L(zeroLength):
 	li	rRTN, 0
 	blr
 
-	.align 4
+	.align	4
 /* At this point we know the strings have different alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    3 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then rStr1 is double word
+   of r12 to 0.  If r12 == 0 then rStr1 is double word
    aligned and can perform the DWunaligned loop.
 
    Otherwise we know that rSTR1 is not already DW aligned yet.
    So we can force the string addresses to the next lower DW
-   boundary and special case this first DW word using shift left to
+   boundary and special case this first DW using shift left to
    eliminate bits preceding the first byte.  Since we want to join the
    normal (DWaligned) compare loop, starting at the second double word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first DW. This insures that the loop count is
+   versioning for the first DW. This ensures that the loop count is
    correct and the first DW (shifted) is in the expected resister pair.  */
-#define rSHL	r29	/* Unaligned shift left count.  */
-#define rSHR	r28	/* Unaligned shift right count.  */
-#define rB		r27	/* Left rotation temp for rWORD2.  */
-#define rD		r26	/* Left rotation temp for rWORD4.  */
-#define rF		r25	/* Left rotation temp for rWORD6.  */
-#define rH		r24	/* Left rotation temp for rWORD8.  */
-#define rA		r0	/* Right rotation temp for rWORD2.  */
-#define rC		r12	/* Right rotation temp for rWORD4.  */
-#define rE		r0	/* Right rotation temp for rWORD6.  */
-#define rG		r12	/* Right rotation temp for rWORD8.  */
+#define rSHL		r29	/* Unaligned shift left count.  */
+#define rSHR		r28	/* Unaligned shift right count.  */
+#define rWORD8_SHIFT	r27	/* Left rotation temp for rWORD2.  */
+#define rWORD2_SHIFT	r26	/* Left rotation temp for rWORD4.  */
+#define rWORD4_SHIFT	r25	/* Left rotation temp for rWORD6.  */
+#define rWORD6_SHIFT	r24	/* Left rotation temp for rWORD8.  */
 L(unaligned):
-	std	r29,-24(r1)
-	cfi_offset(r29,-24)
+	std	rSHL, -24(r1)
+	cfi_offset(rSHL, -24)
 	clrldi	rSHL, rSTR2, 61
 	beq-	cr6, L(duzeroLength)
-	std	r28,-32(r1)
-	cfi_offset(r28,-32)
+	std	rSHR, -32(r1)
+	cfi_offset(rSHR, -32)
 	beq	cr5, L(DWunaligned)
-	std	r27,-40(r1)
-	cfi_offset(r27,-40)
-/* Adjust the logical start of rSTR2 ro compensate for the extra bits
+	std	rWORD8_SHIFT, -40(r1)
+	cfi_offset(rWORD8_SHIFT, -40)
+/* Adjust the logical start of rSTR2 to compensate for the extra bits
    in the 1st rSTR1 DW.  */
-	sub	r27, rSTR2, rBITDIF
+	sub	rWORD8_SHIFT, rSTR2, r12
 /* But do not attempt to address the DW before that DW that contains
    the actual start of rSTR2.  */
 	clrrdi	rSTR2, rSTR2, 3
-	std	r26,-48(r1)
-	cfi_offset(r26,-48)
-/* Compute the left/right shift counts for the unalign rSTR2,
+	std	rWORD2_SHIFT, -48(r1)
+	cfi_offset(rWORD2_SHIFT, -48)
+/* Compute the left/right shift counts for the unaligned rSTR2,
    compensating for the logical (DW aligned) start of rSTR1.  */
-	clrldi	rSHL, r27, 61
+	clrldi	rSHL, rWORD8_SHIFT, 61
 	clrrdi	rSTR1, rSTR1, 3
-	std	r25,-56(r1)
-	cfi_offset(r25,-56)
+	std	rWORD4_SHIFT, -56(r1)
+	cfi_offset(rWORD4_SHIFT, -56)
 	sldi	rSHL, rSHL, 3
-	cmpld	cr5, r27, rSTR2
-	add	rN, rN, rBITDIF
-	sldi	r11, rBITDIF, 3
-	std	r24,-64(r1)
-	cfi_offset(r24,-64)
+	cmpld	cr5, rWORD8_SHIFT, rSTR2
+	add	rN, rN, r12
+	sldi	rWORD6, r12, 3
+	std	rWORD6_SHIFT, -64(r1)
+	cfi_offset(rWORD6_SHIFT, -64)
 	subfic	rSHR, rSHL, 64
-	srdi	rTMP, rN, 5	/* Divide by 32 */
-	andi.	rBITDIF, rN, 24	/* Get the DW remainder */
+	srdi	r0, rN, 5	/* Divide by 32 */
+	andi.	r12, rN, 24	/* Get the DW remainder */
 /* We normally need to load 2 DWs to start the unaligned rSTR2, but in
    this special case those bits may be discarded anyway.  Also we
    must avoid loading a DW where none of the bits are part of rSTR2 as
    this may cross a page boundary and cause a page fault.  */
 	li	rWORD8, 0
 	blt	cr5, L(dus0)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD8, 0(rSTR2)
-	la	rSTR2, 8(rSTR2)
+	addi	rSTR2, rSTR2, 8
+#endif
 	sld	rWORD8, rWORD8, rSHL
 
 L(dus0):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 0(rSTR1)
 	ld	rWORD2, 0(rSTR2)
-	cmpldi	cr1, rBITDIF, 16
+#endif
+	cmpldi	cr1, r12, 16
 	cmpldi	cr7, rN, 32
-	srd	rG, rWORD2, rSHR
+	srd	r12, rWORD2, rSHR
 	clrldi	rN, rN, 61
 	beq	L(duPs4)
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	or	rWORD8, rG, rWORD8
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	or	rWORD8, r12, rWORD8
 	bgt	cr1, L(duPs3)
 	beq	cr1, L(duPs2)
 
 /* Remainder is 8 */
-	.align 4
+	.align	4
 L(dusP1):
-	sld	rB, rWORD2, rSHL
-	sld	rWORD7, rWORD1, r11
-	sld	rWORD8, rWORD8, r11
+	sld	rWORD8_SHIFT, rWORD2, rSHL
+	sld	rWORD7, rWORD1, rWORD6
+	sld	rWORD8, rWORD8, rWORD6
 	bge	cr7, L(duP1e)
 /* At this point we exit early with the first double word compare
    complete and remainder of 0 to 7 bytes.  See L(du14) for details on
@@ -585,95 +794,133 @@ L(dusP1):
 	bne	cr5, L(duLcr5)
 	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD2, 8(rSTR2)
-	srd	rA, rWORD2, rSHR
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 16 */
-	.align 4
+	.align	4
 L(duPs2):
-	sld	rH, rWORD2, rSHL
-	sld	rWORD5, rWORD1, r11
-	sld	rWORD6, rWORD8, r11
+	sld	rWORD6_SHIFT, rWORD2, rSHL
+	sld	rWORD5, rWORD1, rWORD6
+	sld	rWORD6, rWORD8, rWORD6
 	b	L(duP2e)
 /* Remainder is 24 */
-	.align 4
+	.align	4
 L(duPs3):
-	sld	rF, rWORD2, rSHL
-	sld	rWORD3, rWORD1, r11
-	sld	rWORD4, rWORD8, r11
+	sld	rWORD4_SHIFT, rWORD2, rSHL
+	sld	rWORD3, rWORD1, rWORD6
+	sld	rWORD4, rWORD8, rWORD6
 	b	L(duP3e)
 /* Count is a multiple of 32, remainder is 0 */
-	.align 4
+	.align	4
 L(duPs4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	or	rWORD8, rG, rWORD8
-	sld	rD, rWORD2, rSHL
-	sld	rWORD1, rWORD1, r11
-	sld	rWORD2, rWORD8, r11
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	or	rWORD8, r12, rWORD8
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	sld	rWORD1, rWORD1, rWORD6
+	sld	rWORD2, rWORD8, rWORD6
 	b	L(duP4e)
 
 /* At this point we know rSTR1 is double word aligned and the
    compare length is at least 8 bytes.  */
-	.align 4
+	.align	4
 L(DWunaligned):
-	std	r27,-40(r1)
-	cfi_offset(r27,-40)
+	std	rWORD8_SHIFT, -40(r1)
+	cfi_offset(rWORD8_SHIFT, -40)
 	clrrdi	rSTR2, rSTR2, 3
-	std	r26,-48(r1)
-	cfi_offset(r26,-48)
-	srdi	rTMP, rN, 5	/* Divide by 32 */
-	std	r25,-56(r1)
-	cfi_offset(r25,-56)
-	andi.	rBITDIF, rN, 24	/* Get the DW remainder */
-	std	r24,-64(r1)
-	cfi_offset(r24,-64)
+	std	rWORD2_SHIFT, -48(r1)
+	cfi_offset(rWORD2_SHIFT, -48)
+	srdi	r0, rN, 5	/* Divide by 32 */
+	std	rWORD4_SHIFT, -56(r1)
+	cfi_offset(rWORD4_SHIFT, -56)
+	andi.	r12, rN, 24	/* Get the DW remainder */
+	std	rWORD6_SHIFT, -64(r1)
+	cfi_offset(rWORD6_SHIFT, -64)
 	sldi	rSHL, rSHL, 3
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD6, 0(rSTR2)
 	ldu	rWORD8, 8(rSTR2)
-	cmpldi	cr1, rBITDIF, 16
+#endif
+	cmpldi	cr1, r12, 16
 	cmpldi	cr7, rN, 32
 	clrldi	rN, rN, 61
 	subfic	rSHR, rSHL, 64
-	sld	rH, rWORD6, rSHL
+	sld	rWORD6_SHIFT, rWORD6, rSHL
 	beq	L(duP4)
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
 	bgt	cr1, L(duP3)
 	beq	cr1, L(duP2)
 
 /* Remainder is 8 */
-	.align 4
+	.align	4
 L(duP1):
-	srd	rG, rWORD8, rSHR
+	srd	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
 	ld	rWORD7, 0(rSTR1)
-	sld	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+#endif
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	blt	cr7, L(duP1x)
 L(duP1e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 8(rSTR1)
 	ld	rWORD2, 8(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
-	srd	rA, rWORD2, rSHR
-	sld	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 16(rSTR1)
 	ld	rWORD4, 16(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
-	srd	rC, rWORD4, rSHR
-	sld	rF, rWORD4, rSHL
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
 	bne	cr5, L(duLcr5)
-	or	rWORD4, rC, rD
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 24(rSTR1)
 	ld	rWORD6, 24(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
-	srd	rE, rWORD6, rSHR
-	sld	rH, rWORD6, rSHL
-	bne	cr0, L(duLcr0)
-	or	rWORD6, rE, rF
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	bne	cr7, L(duLcr7)
+	or	rWORD6, r0, rWORD4_SHIFT
 	cmpld	cr6, rWORD5, rWORD6
 	b	L(duLoop3)
-	.align 4
+	.align	4
 /* At this point we exit early with the first double word compare
    complete and remainder of 0 to 7 bytes.  See L(du14) for details on
    how we handle the remaining bytes.  */
@@ -683,186 +930,321 @@ L(duP1x):
 	bne	cr5, L(duLcr5)
 	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD2, 8(rSTR2)
-	srd	rA, rWORD2, rSHR
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 16 */
-	.align 4
+	.align	4
 L(duP2):
-	srd	rE, rWORD8, rSHR
+	srd	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
 	ld	rWORD5, 0(rSTR1)
-	or	rWORD6, rE, rH
-	sld	rH, rWORD8, rSHL
+#endif
+	or	rWORD6, r0, rWORD6_SHIFT
+	sld	rWORD6_SHIFT, rWORD8, rSHL
 L(duP2e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD7, 8(rSTR1)
 	ld	rWORD8, 8(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
-	srd	rG, rWORD8, rSHR
-	sld	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	blt	cr7, L(duP2x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 16(rSTR1)
 	ld	rWORD2, 16(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
 	bne	cr6, L(duLcr6)
-	srd	rA, rWORD2, rSHR
-	sld	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 24(rSTR1)
 	ld	rWORD4, 24(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 	bne	cr5, L(duLcr5)
-	srd	rC, rWORD4, rSHR
-	sld	rF, rWORD4, rSHL
-	or	rWORD4, rC, rD
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
+#endif
 	cmpld	cr1, rWORD3, rWORD4
 	b	L(duLoop2)
-	.align 4
+	.align	4
 L(duP2x):
 	cmpld	cr5, rWORD7, rWORD8
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
+#endif
 	bne	cr6, L(duLcr6)
 	sldi.	rN, rN, 3
 	bne	cr5, L(duLcr5)
 	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD2, 8(rSTR2)
-	srd	rA, rWORD2, rSHR
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Remainder is 24 */
-	.align 4
+	.align	4
 L(duP3):
-	srd	rC, rWORD8, rSHR
+	srd	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
 	ld	rWORD3, 0(rSTR1)
-	sld	rF, rWORD8, rSHL
-	or	rWORD4, rC, rH
+#endif
+	sld	rWORD4_SHIFT, rWORD8, rSHL
+	or	rWORD4, r12, rWORD6_SHIFT
 L(duP3e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 8(rSTR1)
 	ld	rWORD6, 8(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
-	srd	rE, rWORD6, rSHR
-	sld	rH, rWORD6, rSHL
-	or	rWORD6, rE, rF
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD7, 16(rSTR1)
 	ld	rWORD8, 16(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
 	bne	cr1, L(duLcr1)
-	srd	rG, rWORD8, rSHR
-	sld	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	blt	cr7, L(duP3x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 24(rSTR1)
 	ld	rWORD2, 24(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
 	bne	cr6, L(duLcr6)
-	srd	rA, rWORD2, rSHR
-	sld	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 16
 	addi	rSTR2, rSTR2, 16
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 	b	L(duLoop1)
-	.align 4
+	.align	4
 L(duP3x):
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 16
 	addi	rSTR2, rSTR2, 16
+#endif
+#if 0
+/* Huh?  We've already branched on cr1!  */
 	bne	cr1, L(duLcr1)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
 	bne	cr6, L(duLcr6)
 	sldi.	rN, rN, 3
 	bne	cr5, L(duLcr5)
 	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD2, 8(rSTR2)
-	srd	rA, rWORD2, rSHR
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Count is a multiple of 32, remainder is 0 */
-	.align 4
+	.align	4
 L(duP4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	srd	rA, rWORD8, rSHR
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	srd	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
 	ld	rWORD1, 0(rSTR1)
-	sld	rD, rWORD8, rSHL
-	or	rWORD2, rA, rH
+#endif
+	sld	rWORD2_SHIFT, rWORD8, rSHL
+	or	rWORD2, r0, rWORD6_SHIFT
 L(duP4e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 8(rSTR1)
 	ld	rWORD4, 8(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
-	srd	rC, rWORD4, rSHR
-	sld	rF, rWORD4, rSHL
-	or	rWORD4, rC, rD
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 16(rSTR1)
 	ld	rWORD6, 16(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
-	bne	cr0, L(duLcr0)
-	srd	rE, rWORD6, rSHR
-	sld	rH, rWORD6, rSHL
-	or	rWORD6, rE, rF
+	bne	cr7, L(duLcr7)
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ldu	rWORD7, 24(rSTR1)
 	ldu	rWORD8, 24(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
 	bne	cr1, L(duLcr1)
-	srd	rG, rWORD8, rSHR
-	sld	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	cmpld	cr5, rWORD7, rWORD8
 	bdz-	L(du24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
-	.align 4
+	.align	4
 L(duLoop):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 8(rSTR1)
 	ld	rWORD2, 8(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
 	bne	cr6, L(duLcr6)
-	srd	rA, rWORD2, rSHR
-	sld	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
 L(duLoop1):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 16(rSTR1)
 	ld	rWORD4, 16(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
 	bne	cr5, L(duLcr5)
-	srd	rC, rWORD4, rSHR
-	sld	rF, rWORD4, rSHL
-	or	rWORD4, rC, rD
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
 L(duLoop2):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 24(rSTR1)
 	ld	rWORD6, 24(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
-	bne	cr0, L(duLcr0)
-	srd	rE, rWORD6, rSHR
-	sld	rH, rWORD6, rSHL
-	or	rWORD6, rE, rF
+	bne	cr7, L(duLcr7)
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
 L(duLoop3):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ldu	rWORD7, 32(rSTR1)
 	ldu	rWORD8, 32(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 	bne-	cr1, L(duLcr1)
-	srd	rG, rWORD8, rSHR
-	sld	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	bdnz+	L(duLoop)
 
 L(duL4):
+#if 0
+/* Huh?  We've already branched on cr1!  */
 	bne	cr1, L(duLcr1)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
 	bne	cr6, L(duLcr6)
 	cmpld	cr6, rWORD5, rWORD6
 	bne	cr5, L(duLcr5)
 	cmpld	cr5, rWORD7, rWORD8
 L(du44):
-	bne	cr0, L(duLcr0)
+	bne	cr7, L(duLcr7)
 L(du34):
 	bne	cr1, L(duLcr1)
 L(du24):
@@ -872,103 +1254,110 @@ L(du14):
 	bne	cr5, L(duLcr5)
 /* At this point we have a remainder of 1 to 7 bytes to compare.  We use
    shift right double to eliminate bits beyond the compare length.
-   This allows the use of double word subtract to compute the final
-   result.
 
    However it may not be safe to load rWORD2 which may be beyond the
    string length. So we compare the bit length of the remainder to
    the right shift count (rSHR). If the bit count is less than or equal
    we do not need to load rWORD2 (all significant bits are already in
-   rB).  */
+   rWORD8_SHIFT).  */
 	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD2, 8(rSTR2)
-	srd	rA, rWORD2, rSHR
-	.align 4
+#endif
+	srd	r0, rWORD2, rSHR
+	.align	4
 L(dutrim):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+#else
 	ld	rWORD1, 8(rSTR1)
-	ld	rWORD8,-8(r1)
+#endif
+	ld	rWORD8, -8(r1)
 	subfic	rN, rN, 64	/* Shift count is 64 - (rN * 8).  */
-	or	rWORD2, rA, rB
-	ld	rWORD7,-16(r1)
-	ld	r29,-24(r1)
+	or	rWORD2, r0, rWORD8_SHIFT
+	ld	rWORD7, -16(r1)
+	ld	rSHL, -24(r1)
 	srd	rWORD1, rWORD1, rN
 	srd	rWORD2, rWORD2, rN
-	ld	r28,-32(r1)
-	ld	r27,-40(r1)
+	ld	rSHR, -32(r1)
+	ld	rWORD8_SHIFT, -40(r1)
 	li	rRTN, 0
-	cmpld	cr0, rWORD1, rWORD2
-	ld	r26,-48(r1)
-	ld	r25,-56(r1)
- 	beq	cr0, L(dureturn24)
+	cmpld	cr7, rWORD1, rWORD2
+	ld	rWORD2_SHIFT, -48(r1)
+	ld	rWORD4_SHIFT, -56(r1)
+	beq	cr7, L(dureturn24)
 	li	rRTN, 1
-	ld	r24,-64(r1)
-	bgtlr	cr0
+	ld	rWORD6_SHIFT, -64(r1)
+	bgtlr	cr7
 	li	rRTN, -1
 	blr
-	.align 4
-L(duLcr0):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	.align	4
+L(duLcr7):
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 	li	rRTN, 1
-	bgt	cr0, L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
+	bgt	cr7, L(dureturn29)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
-	.align 4
+	.align	4
 L(duLcr1):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 	li	rRTN, 1
 	bgt	cr1, L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
-	.align 4
+	.align	4
 L(duLcr6):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 	li	rRTN, 1
 	bgt	cr6, L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
-	.align 4
+	.align	4
 L(duLcr5):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 	li	rRTN, 1
 	bgt	cr5, L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
 	.align	3
 L(duZeroReturn):
-	li	rRTN,0
+	li	rRTN, 0
 	.align	4
 L(dureturn):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 L(dureturn29):
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
 L(dureturn27):
-	ld	r27,-40(r1)
+	ld	rWORD8_SHIFT, -40(r1)
 L(dureturn26):
-	ld	r26,-48(r1)
+	ld	rWORD2_SHIFT, -48(r1)
 L(dureturn25):
-	ld	r25,-56(r1)
+	ld	rWORD4_SHIFT, -56(r1)
 L(dureturn24):
-	ld	r24,-64(r1)
+	ld	rWORD6_SHIFT, -64(r1)
 	blr
 L(duzeroLength):
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 END (memcmp)
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S
index 4317c7e78..f9a7260dc 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S
@@ -214,15 +214,28 @@ EALIGN (memcpy, 5, 0)
     blt   cr6,5f
     srdi  7,6,16
     bgt	  cr6,3f
+#ifdef __LITTLE_ENDIAN__
+    sth   7,0(3)
+#else
     sth   6,0(3)
+#endif
     b     7f
     .align  4
 3:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,24
+    stb   6,0(3)
+    sth   7,1(3)
+#else
     stb   7,0(3)
     sth   6,1(3)
+#endif
     b     7f
     .align  4
 5:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,8
+#endif
     stb   6,0(3)
 7:
     cmpldi	cr1,10,16
@@ -334,13 +347,23 @@ EALIGN (memcpy, 5, 0)
     bf      30,1f
 
     /* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+    srd     0,6,10
+    sld     8,7,9
+#else
     sld     0,6,10
     srd     8,7,9
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srd     0,7,10
+    sld     8,6,9
+#else
     sld     0,7,10
     srd     8,6,9
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -349,8 +372,13 @@ EALIGN (memcpy, 5, 0)
     blt     cr6,8f  /* if total DWs = 3, then bypass loop */
     bf      31,4f
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srd     0,6,10
+    sld     8,7,9
+#else
     sld     0,6,10
     srd     8,7,9
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -361,8 +389,13 @@ EALIGN (memcpy, 5, 0)
     b       4f
     .align 4
 1:
+#ifdef __LITTLE_ENDIAN__
+    srd     0,6,10
+    sld     8,7,9
+#else
     sld     0,6,10
     srd     8,7,9
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,4f
@@ -373,23 +406,44 @@ EALIGN (memcpy, 5, 0)
     addi    4,4,8
     .align 4
 /* copy 32 bytes at a time */
-4:  sld   0,6,10
+4:
+#ifdef __LITTLE_ENDIAN__
+    srd   0,6,10
+    sld   8,7,9
+#else
+    sld   0,6,10
     srd   8,7,9
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srd   0,7,10
+    sld   8,6,9
+#else
     sld   0,7,10
     srd   8,6,9
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srd   0,6,10
+    sld   8,7,9
+#else
     sld   0,6,10
     srd   8,7,9
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srd   0,7,10
+    sld   8,6,9
+#else
     sld   0,7,10
     srd   8,6,9
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -399,8 +453,13 @@ EALIGN (memcpy, 5, 0)
     .align 4
 8:
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srd   0,6,10
+    sld   8,7,9
+#else
     sld   0,6,10
     srd   8,7,9
+#endif
     or    0,0,8
     std   0,0(4)
 3:
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memset.S b/libc/sysdeps/powerpc/powerpc64/power4/memset.S
index dbecee8b9..ad0d38128 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/memset.S
@@ -50,14 +50,14 @@ L(_memset):
 
 /* Align to doubleword boundary.  */
 	cmpldi	cr5, rLEN, 31
-	rlwimi	rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword.  */
+	insrdi	rCHR, rCHR, 8, 48	/* Replicate byte to halfword.  */
 	beq+	L(aligned2)
 	mtcrf	0x01, rMEMP0
 	subfic	rALIGN, rALIGN, 8
 	cror	28,30,31		/* Detect odd word aligned.  */
 	add	rMEMP, rMEMP, rALIGN
 	sub	rLEN, rLEN, rALIGN
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32	/* Replicate halfword to word.  */
 	bt	29, L(g4)
 /* Process the even word of doubleword.  */
 	bf+	31, L(g2)
@@ -79,14 +79,14 @@ L(g0):
 
 /* Handle the case of size < 31.  */
 L(aligned2):
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32	/* Replicate halfword to word.  */
 L(aligned):
 	mtcrf	0x01, rLEN
 	ble	cr5, L(medium)
 /* Align to 32-byte boundary.  */
 	andi.	rALIGN, rMEMP, 0x18
 	subfic	rALIGN, rALIGN, 0x20
-	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word. */
+	insrdi	rCHR, rCHR, 32, 0	/* Replicate word to double word. */
 	beq	L(caligned)
 	mtcrf	0x01, rALIGN
 	add	rMEMP, rMEMP, rALIGN
@@ -146,24 +146,24 @@ L(zloopstart):
 L(getCacheAligned):
 	cmpldi	cr1,rLEN,32
 	andi.	rTMP,rMEMP,127
-	blt		cr1,L(handletail32)
-	beq		L(cacheAligned)
+	blt	cr1,L(handletail32)
+	beq	L(cacheAligned)
 	addi	rMEMP,rMEMP,32
 	addi	rLEN,rLEN,-32
-	std		rCHR,-32(rMEMP)
-	std		rCHR,-24(rMEMP)
-	std		rCHR,-16(rMEMP)
-	std		rCHR,-8(rMEMP)
-	b		L(getCacheAligned)
+	std	rCHR,-32(rMEMP)
+	std	rCHR,-24(rMEMP)
+	std	rCHR,-16(rMEMP)
+	std	rCHR,-8(rMEMP)
+	b	L(getCacheAligned)
 
 /* Now we are aligned to the cache line and can use dcbz.  */
 L(cacheAligned):
 	cmpld	cr1,rLEN,rCLS
-	blt		cr1,L(handletail32)
+	blt	cr1,L(handletail32)
 	dcbz	0,rMEMP
 	subf	rLEN,rCLS,rLEN
-	add		rMEMP,rMEMP,rCLS
-	b		L(cacheAligned)
+	add	rMEMP,rMEMP,rCLS
+	b	L(cacheAligned)
 
 /* We are here because the cache line size was set and was not 32-bytes
    and the remainder (rLEN) is less than the actual cache line size.
@@ -200,7 +200,7 @@ L(le4):
 /* Memset of 0-31 bytes.  */
 	.align 5
 L(medium):
-	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word.  */
+	insrdi	rCHR, rCHR, 32, 0	/* Replicate word to double word.  */
 	cmpldi	cr1, rLEN, 16
 L(medium_tail2):
 	add	rMEMP, rMEMP, rLEN
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S b/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S
index 1276e16a5..5d136cfa2 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S
@@ -25,7 +25,7 @@
 EALIGN (strncmp, 4, 0)
 	CALL_MCOUNT 3
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -38,6 +38,7 @@ EALIGN (strncmp, 4, 0)
 #define r7F7F	r9	/* constant 0x7f7f7f7f7f7f7f7f */
 #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
 #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
+#define rTMP	r12
 
 	dcbt	0,rSTR1
 	or	rTMP, rSTR2, rSTR1
@@ -79,12 +80,59 @@ L(g1):	add	rTMP, rFEFE, rWORD1
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
 
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	addi    rTMP2, rTMP, -1
+	beq	cr1, L(equal)
+	andc    rTMP2, rTMP2, rTMP
+	rldimi	rTMP2, rTMP2, 1, 0
+	and	rWORD2, rWORD2, rTMP2	/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	cmpd	cr1, rWORD1, rWORD2
+	beq	cr1, L(equal)
+	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
+	neg	rNEG, rBITDIF
+	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
+	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
+	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
+	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
+	sld	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt-	L(highbit)
+	sradi	rRTN, rRTN, 63		/* must return an int.  */
+	ori	rRTN, rRTN, 1
+	blr
+L(equal):
+	li	rRTN, 0
+	blr
+
+L(different):
+	ld	rWORD1, -8(rSTR1)
+	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
+	neg	rNEG, rBITDIF
+	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
+	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
+	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
+	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
+	sld	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt-	L(highbit)
+	sradi	rRTN, rRTN, 63
+	ori	rRTN, rRTN, 1
+	blr
+L(highbit):
+	sradi	rRTN, rWORD2, 63
+	ori	rRTN, rRTN, 1
+	blr
+
+#else
 L(endstring):
 	and	rTMP, r7F7F, rWORD1
 	beq	cr1, L(equal)
 	add	rTMP, rTMP, r7F7F
 	xor.	rBITDIF, rWORD1, rWORD2
-
 	andc	rNEG, rNEG, rTMP
 	blt-	L(highbit)
 	cntlzd	rBITDIF, rBITDIF
@@ -93,7 +141,7 @@ L(endstring):
 	cmpd	cr1, rNEG, rBITDIF
 	sub	rRTN, rWORD1, rWORD2
 	blt-	cr1, L(equal)
-	sradi	rRTN, rRTN, 63
+	sradi	rRTN, rRTN, 63		/* must return an int.  */
 	ori	rRTN, rRTN, 1
 	blr
 L(equal):
@@ -101,7 +149,7 @@ L(equal):
 	blr
 
 L(different):
-	ldu	rWORD1, -8(rSTR1)
+	ld	rWORD1, -8(rSTR1)
 	xor.	rBITDIF, rWORD1, rWORD2
 	sub	rRTN, rWORD1, rWORD2
 	blt-	L(highbit)
@@ -109,11 +157,10 @@ L(different):
 	ori	rRTN, rRTN, 1
 	blr
 L(highbit):
-	srdi	rWORD2, rWORD2, 56
-	srdi	rWORD1, rWORD1, 56
-	sub	rRTN, rWORD1, rWORD2
+	sradi	rRTN, rWORD2, 63
+	ori	rRTN, rRTN, 1
 	blr
-
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S
index d6d242d29..e3f3d8a30 100644
--- a/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S
@@ -400,15 +400,28 @@ L(das_tail2):
     blt   cr6,5f
     srdi  7,6,16
     bgt	  cr6,3f
+#ifdef __LITTLE_ENDIAN__
+    sth   7,0(3)
+#else
     sth   6,0(3)
+#endif
     b     7f
     .align  4
 3:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,24
+    stb   6,0(3)
+    sth   7,1(3)
+#else
     stb   7,0(3)
     sth   6,1(3)
+#endif
     b     7f
     .align  4
 5:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,8
+#endif
     stb   6,0(3)
 7:
     cmpldi	cr1,10,16
@@ -595,13 +608,24 @@ L(du1_do):
     bf      30,L(du1_1dw)
 
     /* there are at least two DWs to copy */
+    /* FIXME: can combine last shift and "or" into "rldimi" */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 8
+    sldi     8,7, 64-8
+#else
     sldi     0,6, 8
     srdi     8,7, 64-8
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,7, 8
+    sldi     8,6, 64-8
+#else
     sldi     0,7, 8
     srdi     8,6, 64-8
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -610,8 +634,13 @@ L(du1_do):
     blt     cr6,L(du1_fini)  /* if total DWs = 3, then bypass loop */
     bf      31,L(du1_loop)
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 8
+    sldi     8,7, 64-8
+#else
     sldi     0,6, 8
     srdi     8,7, 64-8
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -622,8 +651,13 @@ L(du1_do):
     b       L(du1_loop)
     .align 4
 L(du1_1dw):
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 8
+    sldi     8,7, 64-8
+#else
     sldi     0,6, 8
     srdi     8,7, 64-8
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,L(du1_loop)
@@ -635,23 +669,43 @@ L(du1_1dw):
     .align 4
 /* copy 32 bytes at a time */
 L(du1_loop):
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 8
+    sldi   8,7, 64-8
+#else
     sldi   0,6, 8
     srdi   8,7, 64-8
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 8
+    sldi   8,6, 64-8
+#else
     sldi   0,7, 8
     srdi   8,6, 64-8
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 8
+    sldi   8,7, 64-8
+#else
     sldi   0,6, 8
     srdi   8,7, 64-8
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 8
+    sldi   8,6, 64-8
+#else
     sldi   0,7, 8
     srdi   8,6, 64-8
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -661,8 +715,13 @@ L(du1_loop):
     .align 4
 L(du1_fini):
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 8
+    sldi   8,7, 64-8
+#else
     sldi   0,6, 8
     srdi   8,7, 64-8
+#endif
     or    0,0,8
     std   0,0(4)
     b     L(du_done)
@@ -672,13 +731,23 @@ L(du2_do):
     bf      30,L(du2_1dw)
 
     /* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 16
+    sldi     8,7, 64-16
+#else
     sldi     0,6, 16
     srdi     8,7, 64-16
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,7, 16
+    sldi     8,6, 64-16
+#else
     sldi     0,7, 16
     srdi     8,6, 64-16
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -687,8 +756,13 @@ L(du2_do):
     blt     cr6,L(du2_fini)  /* if total DWs = 3, then bypass loop */
     bf      31,L(du2_loop)
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 16
+    sldi     8,7, 64-16
+#else
     sldi     0,6, 16
     srdi     8,7, 64-16
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -699,8 +773,13 @@ L(du2_do):
     b       L(du2_loop)
     .align 4
 L(du2_1dw):
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 16
+    sldi     8,7, 64-16
+#else
     sldi     0,6, 16
     srdi     8,7, 64-16
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,L(du2_loop)
@@ -712,23 +791,43 @@ L(du2_1dw):
     .align 4
 /* copy 32 bytes at a time */
 L(du2_loop):
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 16
+    sldi   8,7, 64-16
+#else
     sldi   0,6, 16
     srdi   8,7, 64-16
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 16
+    sldi   8,6, 64-16
+#else
     sldi   0,7, 16
     srdi   8,6, 64-16
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 16
+    sldi   8,7, 64-16
+#else
     sldi   0,6, 16
     srdi   8,7, 64-16
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 16
+    sldi   8,6, 64-16
+#else
     sldi   0,7, 16
     srdi   8,6, 64-16
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -738,8 +837,13 @@ L(du2_loop):
     .align 4
 L(du2_fini):
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 16
+    sldi   8,7, 64-16
+#else
     sldi   0,6, 16
     srdi   8,7, 64-16
+#endif
     or    0,0,8
     std   0,0(4)
     b     L(du_done)
@@ -749,13 +853,23 @@ L(du3_do):
     bf      30,L(du3_1dw)
 
     /* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 24
+    sldi     8,7, 64-24
+#else
     sldi     0,6, 24
     srdi     8,7, 64-24
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,7, 24
+    sldi     8,6, 64-24
+#else
     sldi     0,7, 24
     srdi     8,6, 64-24
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -764,8 +878,13 @@ L(du3_do):
     blt     cr6,L(du3_fini)  /* if total DWs = 3, then bypass loop */
     bf      31,L(du3_loop)
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 24
+    sldi     8,7, 64-24
+#else
     sldi     0,6, 24
     srdi     8,7, 64-24
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -776,8 +895,13 @@ L(du3_do):
     b       L(du3_loop)
     .align 4
 L(du3_1dw):
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 24
+    sldi     8,7, 64-24
+#else
     sldi     0,6, 24
     srdi     8,7, 64-24
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,L(du3_loop)
@@ -789,23 +913,43 @@ L(du3_1dw):
     .align 4
 /* copy 32 bytes at a time */
 L(du3_loop):
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 24
+    sldi   8,7, 64-24
+#else
     sldi   0,6, 24
     srdi   8,7, 64-24
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 24
+    sldi   8,6, 64-24
+#else
     sldi   0,7, 24
     srdi   8,6, 64-24
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 24
+    sldi   8,7, 64-24
+#else
     sldi   0,6, 24
     srdi   8,7, 64-24
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 24
+    sldi   8,6, 64-24
+#else
     sldi   0,7, 24
     srdi   8,6, 64-24
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -815,8 +959,13 @@ L(du3_loop):
     .align 4
 L(du3_fini):
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 24
+    sldi   8,7, 64-24
+#else
     sldi   0,6, 24
     srdi   8,7, 64-24
+#endif
     or    0,0,8
     std   0,0(4)
     b     L(du_done)
@@ -832,13 +981,23 @@ L(du4_dox):
     bf      30,L(du4_1dw)
 
     /* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 32
+    sldi     8,7, 64-32
+#else
     sldi     0,6, 32
     srdi     8,7, 64-32
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,7, 32
+    sldi     8,6, 64-32
+#else
     sldi     0,7, 32
     srdi     8,6, 64-32
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -847,8 +1006,13 @@ L(du4_dox):
     blt     cr6,L(du4_fini)  /* if total DWs = 3, then bypass loop */
     bf      31,L(du4_loop)
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 32
+    sldi     8,7, 64-32
+#else
     sldi     0,6, 32
     srdi     8,7, 64-32
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -859,8 +1023,13 @@ L(du4_dox):
     b       L(du4_loop)
     .align 4
 L(du4_1dw):
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 32
+    sldi     8,7, 64-32
+#else
     sldi     0,6, 32
     srdi     8,7, 64-32
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,L(du4_loop)
@@ -872,23 +1041,43 @@ L(du4_1dw):
     .align 4
 /* copy 32 bytes at a time */
 L(du4_loop):
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 32
+    sldi   8,7, 64-32
+#else
     sldi   0,6, 32
     srdi   8,7, 64-32
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 32
+    sldi   8,6, 64-32
+#else
     sldi   0,7, 32
     srdi   8,6, 64-32
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 32
+    sldi   8,7, 64-32
+#else
     sldi   0,6, 32
     srdi   8,7, 64-32
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 32
+    sldi   8,6, 64-32
+#else
     sldi   0,7, 32
     srdi   8,6, 64-32
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -898,8 +1087,13 @@ L(du4_loop):
     .align 4
 L(du4_fini):
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 32
+    sldi   8,7, 64-32
+#else
     sldi   0,6, 32
     srdi   8,7, 64-32
+#endif
     or    0,0,8
     std   0,0(4)
     b     L(du_done)
@@ -909,13 +1103,23 @@ L(du5_do):
     bf      30,L(du5_1dw)
 
     /* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 40
+    sldi     8,7, 64-40
+#else
     sldi     0,6, 40
     srdi     8,7, 64-40
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,7, 40
+    sldi     8,6, 64-40
+#else
     sldi     0,7, 40
     srdi     8,6, 64-40
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -924,8 +1128,13 @@ L(du5_do):
     blt     cr6,L(du5_fini)  /* if total DWs = 3, then bypass loop */
     bf      31,L(du5_loop)
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 40
+    sldi     8,7, 64-40
+#else
     sldi     0,6, 40
     srdi     8,7, 64-40
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -936,8 +1145,13 @@ L(du5_do):
     b       L(du5_loop)
     .align 4
 L(du5_1dw):
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 40
+    sldi     8,7, 64-40
+#else
     sldi     0,6, 40
     srdi     8,7, 64-40
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,L(du5_loop)
@@ -949,23 +1163,43 @@ L(du5_1dw):
     .align 4
 /* copy 32 bytes at a time */
 L(du5_loop):
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 40
+    sldi   8,7, 64-40
+#else
     sldi   0,6, 40
     srdi   8,7, 64-40
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 40
+    sldi   8,6, 64-40
+#else
     sldi   0,7, 40
     srdi   8,6, 64-40
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 40
+    sldi   8,7, 64-40
+#else
     sldi   0,6, 40
     srdi   8,7, 64-40
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 40
+    sldi   8,6, 64-40
+#else
     sldi   0,7, 40
     srdi   8,6, 64-40
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -975,8 +1209,13 @@ L(du5_loop):
     .align 4
 L(du5_fini):
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 40
+    sldi   8,7, 64-40
+#else
     sldi   0,6, 40
     srdi   8,7, 64-40
+#endif
     or    0,0,8
     std   0,0(4)
     b     L(du_done)
@@ -986,13 +1225,23 @@ L(du6_do):
     bf      30,L(du6_1dw)
 
     /* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 48
+    sldi     8,7, 64-48
+#else
     sldi     0,6, 48
     srdi     8,7, 64-48
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,7, 48
+    sldi     8,6, 64-48
+#else
     sldi     0,7, 48
     srdi     8,6, 64-48
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -1001,8 +1250,13 @@ L(du6_do):
     blt     cr6,L(du6_fini)  /* if total DWs = 3, then bypass loop */
     bf      31,L(du6_loop)
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 48
+    sldi     8,7, 64-48
+#else
     sldi     0,6, 48
     srdi     8,7, 64-48
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -1013,8 +1267,13 @@ L(du6_do):
     b       L(du6_loop)
     .align 4
 L(du6_1dw):
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 48
+    sldi     8,7, 64-48
+#else
     sldi     0,6, 48
     srdi     8,7, 64-48
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,L(du6_loop)
@@ -1026,23 +1285,43 @@ L(du6_1dw):
     .align 4
 /* copy 32 bytes at a time */
 L(du6_loop):
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 48
+    sldi   8,7, 64-48
+#else
     sldi   0,6, 48
     srdi   8,7, 64-48
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 48
+    sldi   8,6, 64-48
+#else
     sldi   0,7, 48
     srdi   8,6, 64-48
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 48
+    sldi   8,7, 64-48
+#else
     sldi   0,6, 48
     srdi   8,7, 64-48
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 48
+    sldi   8,6, 64-48
+#else
     sldi   0,7, 48
     srdi   8,6, 64-48
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -1052,8 +1331,13 @@ L(du6_loop):
     .align 4
 L(du6_fini):
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 48
+    sldi   8,7, 64-48
+#else
     sldi   0,6, 48
     srdi   8,7, 64-48
+#endif
     or    0,0,8
     std   0,0(4)
     b     L(du_done)
@@ -1063,13 +1347,23 @@ L(du7_do):
     bf      30,L(du7_1dw)
 
     /* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 56
+    sldi     8,7, 64-56
+#else
     sldi     0,6, 56
     srdi     8,7, 64-56
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,7, 56
+    sldi     8,6, 64-56
+#else
     sldi     0,7, 56
     srdi     8,6, 64-56
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -1078,8 +1372,13 @@ L(du7_do):
     blt     cr6,L(du7_fini)  /* if total DWs = 3, then bypass loop */
     bf      31,L(du7_loop)
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 56
+    sldi     8,7, 64-56
+#else
     sldi     0,6, 56
     srdi     8,7, 64-56
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -1090,8 +1389,13 @@ L(du7_do):
     b       L(du7_loop)
     .align 4
 L(du7_1dw):
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 56
+    sldi     8,7, 64-56
+#else
     sldi     0,6, 56
     srdi     8,7, 64-56
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,L(du7_loop)
@@ -1103,23 +1407,43 @@ L(du7_1dw):
     .align 4
 /* copy 32 bytes at a time */
 L(du7_loop):
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 56
+    sldi   8,7, 64-56
+#else
     sldi   0,6, 56
     srdi   8,7, 64-56
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 56
+    sldi   8,6, 64-56
+#else
     sldi   0,7, 56
     srdi   8,6, 64-56
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 56
+    sldi   8,7, 64-56
+#else
     sldi   0,6, 56
     srdi   8,7, 64-56
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 56
+    sldi   8,6, 64-56
+#else
     sldi   0,7, 56
     srdi   8,6, 64-56
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -1129,8 +1453,13 @@ L(du7_loop):
     .align 4
 L(du7_fini):
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 56
+    sldi   8,7, 64-56
+#else
     sldi   0,6, 56
     srdi   8,7, 64-56
+#endif
     or    0,0,8
     std   0,0(4)
     b     L(du_done)
diff --git a/libc/sysdeps/powerpc/powerpc64/power6/memset.S b/libc/sysdeps/powerpc/powerpc64/power6/memset.S
index 3e8ae2d25..d61988a19 100644
--- a/libc/sysdeps/powerpc/powerpc64/power6/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/power6/memset.S
@@ -47,14 +47,14 @@ L(_memset):
 
 /* Align to doubleword boundary.  */
 	cmpldi	cr5, rLEN, 31
-	rlwimi	rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword.  */
+	insrdi	rCHR, rCHR, 8, 48	/* Replicate byte to halfword.  */
 	beq+	L(aligned2)
 	mtcrf	0x01, rMEMP0
 	subfic	rALIGN, rALIGN, 8
 	cror	28,30,31		/* Detect odd word aligned.  */
 	add	rMEMP, rMEMP, rALIGN
 	sub	rLEN, rLEN, rALIGN
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32	/* Replicate halfword to word.  */
 	bt	29, L(g4)
 /* Process the even word of doubleword.  */
 	bf+	31, L(g2)
@@ -76,14 +76,14 @@ L(g0):
 
 /* Handle the case of size < 31.  */
 L(aligned2):
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32	/* Replicate halfword to word.  */
 L(aligned):
 	mtcrf	0x01, rLEN
 	ble	cr5, L(medium)
 /* Align to 32-byte boundary.  */
 	andi.	rALIGN, rMEMP, 0x18
 	subfic	rALIGN, rALIGN, 0x20
-	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word. */
+	insrdi	rCHR, rCHR, 32, 0	/* Replicate word to double word. */
 	beq	L(caligned)
 	mtcrf	0x01, rALIGN
 	add	rMEMP, rMEMP, rALIGN
@@ -344,7 +344,7 @@ L(le4):
 /* Memset of 0-31 bytes.  */
 	.align 5
 L(medium):
-	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word.  */
+	insrdi	rCHR, rCHR, 32, 0	/* Replicate word to double word.  */
 	cmpldi	cr1, rLEN, 16
 L(medium_tail2):
 	add	rMEMP, rMEMP, rLEN
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
index d0071c765..ebec0e0ba 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
@@ -39,10 +39,8 @@ EALIGN (__finite, 4, 0)
 
 	stfd    fp1,-16(r1)   /* Transfer FP to GPR's.  */
 	ori	2,2,0	      /* Force a new dispatch group.  */
-
-	lhz     r4,-16(r1)    /* Fetch the upper portion of the high word of
-			      the FP value (where the exponent and sign bits
-			      are).  */
+	lhz     r4,-16+HISHORT(r1)  /* Fetch the upper 16 bits of the FP value
+				    (biased exponent and sign bit).  */
 	clrlwi  r4,r4,17      /* r4 = abs(r4).  */
 	cmpwi   cr7,r4,0x7ff0 /* r4 == 0x7ff0?  */
 	bltlr   cr7	      /* LT means finite, other non-finite.  */
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
index 1aea12383..8d088db5a 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
@@ -38,9 +38,8 @@ EALIGN (__isinf, 4, 0)
 
 	stfd    fp1,-16(r1)   /* Transfer FP to GPR's.  */
 	ori	2,2,0	      /* Force a new dispatch group.  */
-	lhz	r4,-16(r1)    /* Fetch the upper portion of the high word of
-			      the FP value (where the exponent and sign bits
-			      are).  */
+	lhz	r4,-16+HISHORT(r1)  /* Fetch the upper 16 bits of the FP value
+				    (biased exponent and sign bit).  */
 	cmpwi	cr7,r4,0x7ff0 /* r4 == 0x7ff0?  */
 	li	r3,1
 	beqlr   cr7	      /* EQ means INF, otherwise -INF.  */
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memchr.S b/libc/sysdeps/powerpc/powerpc64/power7/memchr.S
index 3416897f5..5076dd0c1 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memchr.S
@@ -25,109 +25,112 @@ ENTRY (__memchr)
 	CALL_MCOUNT 2
 	dcbt	0,r3
 	clrrdi  r8,r3,3
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
 	add	r7,r3,r5      /* Calculate the last acceptable address.  */
+	insrdi	r4,r4,16,32
 	cmpldi	r5,32
+	li	r9, -1
+	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
 	insrdi  r4,r4,32,0
+	addi	r7,r7,-1
+#ifdef __LITTLE_ENDIAN__
+	sld	r9,r9,r6
+#else
+	srd	r9,r9,r6
+#endif
 	ble	L(small_range)
 
-	cmpld	cr7,r3,r7     /* Compare the starting address (r3) with the
-				 ending address (r7).  If (r3 >= r7),
-				 the size passed in was zero or negative.  */
-	ble	cr7,L(proceed)
-
-	li	r7,-1         /* Artificially set our ending address (r7)
-				 such that we will exit early.  */
-
-L(proceed):
-	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
-	cmpldi	cr6,r6,0      /* cr6 == Do we have padding?  */
 	ld	r12,0(r8)     /* Load doubleword from memory.  */
-	cmpb	r10,r12,r4    /* Check for BYTEs in DWORD1.  */
-	beq	cr6,L(proceed_no_padding)
-	sld	r10,r10,r6
-	srd	r10,r10,r6
-L(proceed_no_padding):
-	cmpldi	cr7,r10,0     /* Does r10 indicate we got a hit?  */
+	cmpb	r3,r12,r4     /* Check for BYTEs in DWORD1.  */
+	and	r3,r3,r9
+	clrldi	r5,r7,61      /* Byte count - 1 in last dword.  */
+	clrrdi	r7,r7,3       /* Address of last doubleword.  */
+	cmpldi	cr7,r3,0      /* Does r3 indicate we got a hit?  */
 	bne	cr7,L(done)
 
-	/* See if we are at the last acceptable address yet.  */
-	addi	r9,r8,8
-	cmpld	cr6,r9,r7
-	bge	cr6,L(null)
-
 	mtcrf   0x01,r8
 	/* Are we now aligned to a quadword boundary?  If so, skip to
 	   the main loop.  Otherwise, go through the alignment code.  */
-
 	bt	28,L(loop_setup)
 
 	/* Handle DWORD2 of pair.  */
 	ldu	r12,8(r8)
-	cmpb	r10,r12,r4
-	cmpldi	cr7,r10,0
+	cmpb	r3,r12,r4
+	cmpldi	cr7,r3,0
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,8
-	cmpld	cr6,r9,r7
-	bge	cr6,L(null)
-
 L(loop_setup):
-	sub	r5,r7,r9
-	srdi	r6,r5,4	      /* Number of loop iterations.  */
+	/* The last dword we want to read in the loop below is the one
+	   containing the last byte of the string, ie. the dword at
+	   (s + size - 1) & ~7, or r7.  The first dword read is at
+	   r8 + 8, we read 2 * cnt dwords, so the last dword read will
+	   be at r8 + 8 + 16 * cnt - 8.  Solving for cnt gives
+	   cnt = (r7 - r8) / 16  */
+	sub	r6,r7,r8
+	srdi	r6,r6,4	      /* Number of loop iterations.  */
 	mtctr	r6            /* Setup the counter.  */
-	b	L(loop)
-	/* Main loop to look for BYTE backwards in the string.  Since
-	   it's a small loop (< 8 instructions), align it to 32-bytes.  */
-	.p2align  5
+
+	/* Main loop to look for BYTE in the string.  Since
+	   it's a small loop (8 instructions), align it to 32-bytes.  */
+	.align	5
 L(loop):
 	/* Load two doublewords, compare and merge in a
 	   single register for speed.  This is an attempt
 	   to speed up the byte-checking process for bigger strings.  */
 	ld	r12,8(r8)
 	ldu	r11,16(r8)
-	cmpb	r10,r12,r4
+	cmpb	r3,r12,r4
 	cmpb	r9,r11,r4
-	or	r5,r9,r10     /* Merge everything in one doubleword.  */
-	cmpldi	cr7,r5,0
+	or	r6,r9,r3      /* Merge everything in one doubleword.  */
+	cmpldi	cr7,r6,0
 	bne	cr7,L(found)
 	bdnz	L(loop)
 
-	/* We're here because the counter reached 0, and that means we
-	   didn't have any matches for BYTE in the whole range.  */
-	subi	r11,r7,8
-	cmpld	cr6,r8,r11
-	blt	cr6,L(loop_small)
-	b	L(null)
+	/* We may have one more dword to read.  */
+	cmpld	r8,r7
+	beqlr
 
+	ldu	r12,8(r8)
+	cmpb	r3,r12,r4
+	cmpldi	cr6,r3,0
+	bne	cr6,L(done)
+	blr
+
+	.align	4
+L(found):
 	/* OK, one (or both) of the doublewords contains BYTE.  Check
 	   the first doubleword and decrement the address in case the first
 	   doubleword really contains BYTE.  */
-	.align	4
-L(found):
-	cmpldi	cr6,r10,0
+	cmpldi	cr6,r3,0
 	addi	r8,r8,-8
 	bne	cr6,L(done)
 
 	/* BYTE must be in the second doubleword.  Adjust the address
-	   again and move the result of cmpb to r10 so we can calculate the
+	   again and move the result of cmpb to r3 so we can calculate the
 	   pointer.  */
 
-	mr	r10,r9
+	mr	r3,r9
 	addi	r8,r8,8
 
-	/* r10 has the output of the cmpb instruction, that is, it contains
+	/* r3 has the output of the cmpb instruction, that is, it contains
 	   0xff in the same position as BYTE in the original
 	   doubleword from the string.  Use that to calculate the pointer.
 	   We need to make sure BYTE is *before* the end of the range.  */
 L(done):
-	cntlzd	r0,r10	      /* Count leading zeroes before the match.  */
-	srdi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r3,-1
+	andc    r0,r0,r3
+	popcntd	r0,r0	      /* Count trailing zeros.  */
+#else
+	cntlzd	r0,r3	      /* Count leading zeros before the match.  */
+#endif
+	cmpld	r8,r7         /* Are we on the last dword?  */
+	srdi	r0,r0,3	      /* Convert leading/trailing zeros to bytes.  */
 	add	r3,r8,r0
-	cmpld	r3,r7
-	bge	L(null)
+	cmpld	cr7,r0,r5     /* If on the last dword, check byte offset.  */
+	bnelr
+	blelr	cr7
+	li	r3,0
 	blr
 
 	.align	4
@@ -139,67 +142,44 @@ L(null):
 	.align	4
 L(small_range):
 	cmpldi	r5,0
-	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
-	beq	L(null)       /* This branch is for the cmpldi r5,0 above.  */
+	beq	L(null)
 	ld	r12,0(r8)     /* Load word from memory.  */
-	cmpldi	cr6,r6,0      /* cr6 == Do we have padding?  */
-	cmpb	r10,r12,r4    /* Check for BYTE in DWORD1.  */
-			      /* If no padding, skip the shifts.  */
-	beq	cr6,L(small_no_padding)
-	sld	r10,r10,r6
-	srd	r10,r10,r6
-L(small_no_padding):
-	cmpldi	cr7,r10,0
+	cmpb	r3,r12,r4     /* Check for BYTE in DWORD1.  */
+	and	r3,r3,r9
+	cmpldi	cr7,r3,0
+	clrldi	r5,r7,61      /* Byte count - 1 in last dword.  */
+	clrrdi	r7,r7,3       /* Address of last doubleword.  */
+	cmpld	r8,r7         /* Are we done already?  */
 	bne	cr7,L(done)
+	beqlr
 
-	/* Are we done already?  */
-	addi    r9,r8,8
-	cmpld	r9,r7
-	bge	L(null)
-	/* If we're not done, drop through into loop_small.  */
-
-L(loop_small):                /* loop_small has been unrolled.  */
 	ldu	r12,8(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,8
-	cmpldi	cr6,r10,0
-	cmpld	r9,r7
+	cmpb	r3,r12,r4
+	cmpldi	cr6,r3,0
+	cmpld	r8,r7
 	bne	cr6,L(done)   /* Found something.  */
-	bge	L(null)       /* Hit end of string (length).  */
+	beqlr		      /* Hit end of string (length).  */
 
 	ldu	r12,8(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,8
-	cmpldi	cr6,r10,0
-	cmpld	r9,r7
-	bne	cr6,L(done)   /* Found something.  */
-	bge	L(null)
+	cmpb	r3,r12,r4
+	cmpldi	cr6,r3,0
+	cmpld	r8,r7
+	bne	cr6,L(done)
+	beqlr
 
 	ldu	r12,8(r8)
-	subi	r11,r7,8
-	cmpb	r10,r12,r4
-	cmpldi	cr6,r10,0
-	ori	r2,r2,0       /* Force a dispatch group.  */
+	cmpb	r3,r12,r4
+	cmpldi	cr6,r3,0
+	cmpld	r8,r7
 	bne	cr6,L(done)
+	beqlr
 
-	cmpld	r8,r11        /* At end of range?  */
-	bge	L(null)
-
-	/* For most cases we will never get here.  Under some combinations of
-	   padding + length there is a leftover double that still needs to be
-	   checked.  */
 	ldu	r12,8(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,8
-	cmpldi	cr6,r10,0
-	cmpld	r9,r7
-	bne	cr6,L(done)   /* Found something.  */
-
-	/* Save a branch and exit directly.  */
-	li	r3,0
+	cmpb	r3,r12,r4
+	cmpldi	cr6,r3,0
+	bne	cr6,L(done)
 	blr
 
-
 END (__memchr)
 weak_alias (__memchr, memchr)
 libc_hidden_builtin_def (memchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S b/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S
index f190c6461..6851cdc75 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S
@@ -23,10 +23,9 @@
 		    size_t size [r5])  */
 
 	.machine power7
-EALIGN (memcmp,4,0)
+EALIGN (memcmp, 4, 0)
 	CALL_MCOUNT 3
 
-#define rTMP	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -37,354 +36,557 @@ EALIGN (memcmp,4,0)
 #define rWORD4	r9	/* next word in s2 */
 #define rWORD5	r10	/* next word in s1 */
 #define rWORD6	r11	/* next word in s2 */
-#define rBITDIF	r12	/* bits that differ in s1 & s2 words */
 #define rWORD7	r30	/* next word in s1 */
 #define rWORD8	r31	/* next word in s2 */
 
-	xor	rTMP,rSTR2,rSTR1
-	cmpldi	cr6,rN,0
-	cmpldi	cr1,rN,12
-	clrldi.	rTMP,rTMP,61
-	clrldi	rBITDIF,rSTR1,61
-	cmpldi	cr5,rBITDIF,0
-	beq-	cr6,L(zeroLength)
-	dcbt	0,rSTR1
-	dcbt	0,rSTR2
+	xor	r0, rSTR2, rSTR1
+	cmpldi	cr6, rN, 0
+	cmpldi	cr1, rN, 12
+	clrldi.	r0, r0, 61
+	clrldi	r12, rSTR1, 61
+	cmpldi	cr5, r12, 0
+	beq-	cr6, L(zeroLength)
+	dcbt	0, rSTR1
+	dcbt	0, rSTR2
 /* If less than 8 bytes or not aligned, use the unaligned
    byte loop.  */
-	blt	cr1,L(bytealigned)
-	std	rWORD8,-8(r1)
-	cfi_offset(rWORD8,-8)
-	std	rWORD7,-16(r1)
-	cfi_offset(rWORD7,-16)
+	blt	cr1, L(bytealigned)
+	std	rWORD8, -8(r1)
+	cfi_offset(rWORD8, -8)
+	std	rWORD7, -16(r1)
+	cfi_offset(rWORD7, -16)
 	bne	L(unaligned)
 /* At this point we know both strings have the same alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    3 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then we are already double word
-   aligned and can perform the DWaligned loop.
+   of r12 to 0.  If r12 == 0 then we are already double word
+   aligned and can perform the DW aligned loop.
 
    Otherwise we know the two strings have the same alignment (but not
-   yet DW).  So we can force the string addresses to the next lower DW
-   boundary and special case this first DW word using shift left to
+   yet DW).  So we force the string addresses to the next lower DW
+   boundary and special case this first DW using shift left to
    eliminate bits preceding the first byte.  Since we want to join the
-   normal (DWaligned) compare loop, starting at the second double word,
+   normal (DW aligned) compare loop, starting at the second double word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first DW. This insures that the loop count is
-   correct and the first DW (shifted) is in the expected resister pair.  */
+   versioning for the first DW. This ensures that the loop count is
+   correct and the first DW (shifted) is in the expected register pair.  */
 	.align	4
 L(samealignment):
-	clrrdi	rSTR1,rSTR1,3
-	clrrdi	rSTR2,rSTR2,3
-	beq	cr5,L(DWaligned)
-	add	rN,rN,rBITDIF
-	sldi	r11,rBITDIF,3
-	srdi	rTMP,rN,5	/* Divide by 32 */
-	andi.	rBITDIF,rN,24	/* Get the DW remainder */
-	ld	rWORD1,0(rSTR1)
-	ld	rWORD2,0(rSTR2)
-	cmpldi	cr1,rBITDIF,16
-	cmpldi	cr7,rN,32
-	clrldi	rN,rN,61
+	clrrdi	rSTR1, rSTR1, 3
+	clrrdi	rSTR2, rSTR2, 3
+	beq	cr5, L(DWaligned)
+	add	rN, rN, r12
+	sldi	rWORD6, r12, 3
+	srdi	r0, rN, 5	/* Divide by 32 */
+	andi.	r12, rN, 24	/* Get the DW remainder */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 0(rSTR1)
+	ld	rWORD2, 0(rSTR2)
+#endif
+	cmpldi	cr1, r12, 16
+	cmpldi	cr7, rN, 32
+	clrldi	rN, rN, 61
 	beq	L(dPs4)
-	mtctr	rTMP
-	bgt	cr1,L(dPs3)
-	beq	cr1,L(dPs2)
+	mtctr	r0
+	bgt	cr1, L(dPs3)
+	beq	cr1, L(dPs2)
 
 /* Remainder is 8 */
 	.align	3
 L(dsP1):
-	sld	rWORD5,rWORD1,r11
-	sld	rWORD6,rWORD2,r11
-	cmpld	cr5,rWORD5,rWORD6
-	blt	cr7,L(dP1x)
+	sld	rWORD5, rWORD1, rWORD6
+	sld	rWORD6, rWORD2, rWORD6
+	cmpld	cr5, rWORD5, rWORD6
+	blt	cr7, L(dP1x)
 /* Do something useful in this cycle since we have to branch anyway.  */
-	ld	rWORD1,8(rSTR1)
-	ld	rWORD2,8(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 	b	L(dP1e)
 /* Remainder is 16 */
 	.align	4
 L(dPs2):
-	sld	rWORD5,rWORD1,r11
-	sld	rWORD6,rWORD2,r11
-	cmpld	cr6,rWORD5,rWORD6
-	blt	cr7,L(dP2x)
+	sld	rWORD5, rWORD1, rWORD6
+	sld	rWORD6, rWORD2, rWORD6
+	cmpld	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP2x)
 /* Do something useful in this cycle since we have to branch anyway.  */
-	ld	rWORD7,8(rSTR1)
-	ld	rWORD8,8(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD7, 8(rSTR1)
+	ld	rWORD8, 8(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
 	b	L(dP2e)
 /* Remainder is 24 */
 	.align	4
 L(dPs3):
-	sld	rWORD3,rWORD1,r11
-	sld	rWORD4,rWORD2,r11
-	cmpld	cr1,rWORD3,rWORD4
+	sld	rWORD3, rWORD1, rWORD6
+	sld	rWORD4, rWORD2, rWORD6
+	cmpld	cr1, rWORD3, rWORD4
 	b	L(dP3e)
 /* Count is a multiple of 32, remainder is 0 */
 	.align	4
 L(dPs4):
-	mtctr	rTMP
-	sld	rWORD1,rWORD1,r11
-	sld	rWORD2,rWORD2,r11
-	cmpld	cr0,rWORD1,rWORD2
+	mtctr	r0
+	sld	rWORD1, rWORD1, rWORD6
+	sld	rWORD2, rWORD2, rWORD6
+	cmpld	cr7, rWORD1, rWORD2
 	b	L(dP4e)
 
 /* At this point we know both strings are double word aligned and the
    compare length is at least 8 bytes.  */
 	.align	4
 L(DWaligned):
-	andi.	rBITDIF,rN,24	/* Get the DW remainder */
-	srdi	rTMP,rN,5	/* Divide by 32 */
-	cmpldi	cr1,rBITDIF,16
-	cmpldi	cr7,rN,32
-	clrldi	rN,rN,61
+	andi.	r12, rN, 24	/* Get the DW remainder */
+	srdi	r0, rN, 5	/* Divide by 32 */
+	cmpldi	cr1, r12, 16
+	cmpldi	cr7, rN, 32
+	clrldi	rN, rN, 61
 	beq	L(dP4)
-	bgt	cr1,L(dP3)
-	beq	cr1,L(dP2)
+	bgt	cr1, L(dP3)
+	beq	cr1, L(dP2)
 
 /* Remainder is 8 */
 	.align	4
 L(dP1):
-	mtctr	rTMP
+	mtctr	r0
 /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
    (8-15 byte compare), we want to use only volatile registers.  This
    means we can avoid restoring non-volatile registers since we did not
    change any on the early exit path.  The key here is the non-early
    exit path only cares about the condition code (cr5), not about which
    register pair was used.  */
-	ld	rWORD5,0(rSTR1)
-	ld	rWORD6,0(rSTR2)
-	cmpld	cr5,rWORD5,rWORD6
-	blt	cr7,L(dP1x)
-	ld	rWORD1,8(rSTR1)
-	ld	rWORD2,8(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 0(rSTR1)
+	ld	rWORD6, 0(rSTR2)
+#endif
+	cmpld	cr5, rWORD5, rWORD6
+	blt	cr7, L(dP1x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 L(dP1e):
-	ld	rWORD3,16(rSTR1)
-	ld	rWORD4,16(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	ld	rWORD5,24(rSTR1)
-	ld	rWORD6,24(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	bne	cr5,L(dLcr5)
-	bne	cr0,L(dLcr0)
-
-	ldu	rWORD7,32(rSTR1)
-	ldu	rWORD8,32(rSTR2)
-	bne	cr1,L(dLcr1)
-	cmpld	cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 16(rSTR1)
+	ld	rWORD4, 16(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 24(rSTR1)
+	ld	rWORD6, 24(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5x)
+	bne	cr7, L(dLcr7x)
+
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ldu	rWORD7, 32(rSTR1)
+	ldu	rWORD8, 32(rSTR2)
+#endif
+	bne	cr1, L(dLcr1)
+	cmpld	cr5, rWORD7, rWORD8
 	bdnz	L(dLoop)
-	bne	cr6,L(dLcr6)
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	bne	cr6, L(dLcr6)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 	.align	3
 L(dP1x):
-	sldi.	r12,rN,3
-	bne	cr5,L(dLcr5)
-	subfic	rN,r12,64	/* Shift count is 64 - (rN * 8).  */
+	sldi.	r12, rN, 3
+	bne	cr5, L(dLcr5x)
+	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
 	bne	L(d00)
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 /* Remainder is 16 */
 	.align	4
 L(dP2):
-	mtctr	rTMP
-	ld	rWORD5,0(rSTR1)
-	ld	rWORD6,0(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	blt	cr7,L(dP2x)
-	ld	rWORD7,8(rSTR1)
-	ld	rWORD8,8(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
+	mtctr	r0
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 0(rSTR1)
+	ld	rWORD6, 0(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP2x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD7, 8(rSTR1)
+	ld	rWORD8, 8(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
 L(dP2e):
-	ld	rWORD1,16(rSTR1)
-	ld	rWORD2,16(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
-	ld	rWORD3,24(rSTR1)
-	ld	rWORD4,24(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	bne	cr6,L(dLcr6)
-	bne	cr5,L(dLcr5)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 16(rSTR1)
+	ld	rWORD2, 16(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 24(rSTR1)
+	ld	rWORD4, 24(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+	bne	cr6, L(dLcr6)
+	bne	cr5, L(dLcr5)
 	b	L(dLoop2)
 /* Again we are on a early exit path (16-23 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
 	.align	4
 L(dP2x):
-	ld	rWORD3,8(rSTR1)
-	ld	rWORD4,8(rSTR2)
-	cmpld	cr5,rWORD3,rWORD4
-	sldi.	r12,rN,3
-	bne	cr6,L(dLcr6)
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	bne	cr5,L(dLcr5)
-	subfic	rN,r12,64	/* Shift count is 64 - (rN * 8).  */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 8(rSTR1)
+	ld	rWORD4, 8(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+	sldi.	r12, rN, 3
+	bne	cr6, L(dLcr6x)
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+	bne	cr1, L(dLcr1x)
+	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
 	bne	L(d00)
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 /* Remainder is 24 */
 	.align	4
 L(dP3):
-	mtctr	rTMP
-	ld	rWORD3,0(rSTR1)
-	ld	rWORD4,0(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
+	mtctr	r0
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 0(rSTR1)
+	ld	rWORD4, 0(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
 L(dP3e):
-	ld	rWORD5,8(rSTR1)
-	ld	rWORD6,8(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	blt	cr7,L(dP3x)
-	ld	rWORD7,16(rSTR1)
-	ld	rWORD8,16(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
-	ld	rWORD1,24(rSTR1)
-	ld	rWORD2,24(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
-	addi	rSTR1,rSTR1,16
-	addi	rSTR2,rSTR2,16
-	bne	cr1,L(dLcr1)
-	bne	cr6,L(dLcr6)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 8(rSTR1)
+	ld	rWORD6, 8(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP3x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD7, 16(rSTR1)
+	ld	rWORD8, 16(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 24(rSTR1)
+	ld	rWORD2, 24(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 16
+	addi	rSTR2, rSTR2, 16
+#endif
+	bne	cr1, L(dLcr1)
+	bne	cr6, L(dLcr6)
 	b	L(dLoop1)
 /* Again we are on a early exit path (24-31 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
 	.align	4
 L(dP3x):
-	ld	rWORD1,16(rSTR1)
-	ld	rWORD2,16(rSTR2)
-	cmpld	cr5,rWORD1,rWORD2
-	sldi.	r12,rN,3
-	bne	cr1,L(dLcr1)
-	addi	rSTR1,rSTR1,16
-	addi	rSTR2,rSTR2,16
-	bne	cr6,L(dLcr6)
-	subfic	rN,r12,64	/* Shift count is 64 - (rN * 8).  */
-	bne	cr5,L(dLcr5)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 16(rSTR1)
+	ld	rWORD2, 16(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+	sldi.	r12, rN, 3
+	bne	cr1, L(dLcr1x)
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 16
+	addi	rSTR2, rSTR2, 16
+#endif
+	bne	cr6, L(dLcr6x)
+	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
+	bne	cr7, L(dLcr7x)
 	bne	L(d00)
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 /* Count is a multiple of 32, remainder is 0 */
 	.align	4
 L(dP4):
-	mtctr	rTMP
-	ld	rWORD1,0(rSTR1)
-	ld	rWORD2,0(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
+	mtctr	r0
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 0(rSTR1)
+	ld	rWORD2, 0(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 L(dP4e):
-	ld	rWORD3,8(rSTR1)
-	ld	rWORD4,8(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	ld	rWORD5,16(rSTR1)
-	ld	rWORD6,16(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	ldu	rWORD7,24(rSTR1)
-	ldu	rWORD8,24(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
-	bne	cr0,L(dLcr0)
-	bne	cr1,L(dLcr1)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 8(rSTR1)
+	ld	rWORD4, 8(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 16(rSTR1)
+	ld	rWORD6, 16(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ldu	rWORD7, 24(rSTR1)
+	ldu	rWORD8, 24(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr7, L(dLcr7)
+	bne	cr1, L(dLcr1)
 	bdz-	L(d24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
 	.align	4
 L(dLoop):
-	ld	rWORD1,8(rSTR1)
-	ld	rWORD2,8(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	bne	cr6,L(dLcr6)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+	bne	cr6, L(dLcr6)
 L(dLoop1):
-	ld	rWORD3,16(rSTR1)
-	ld	rWORD4,16(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	bne	cr5,L(dLcr5)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 16(rSTR1)
+	ld	rWORD4, 16(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5)
 L(dLoop2):
-	ld	rWORD5,24(rSTR1)
-	ld	rWORD6,24(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
-	bne	cr0,L(dLcr0)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 24(rSTR1)
+	ld	rWORD6, 24(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr7, L(dLcr7)
 L(dLoop3):
-	ldu	rWORD7,32(rSTR1)
-	ldu	rWORD8,32(rSTR2)
-	bne	cr1,L(dLcr1)
-	cmpld	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ldu	rWORD7, 32(rSTR1)
+	ldu	rWORD8, 32(rSTR2)
+#endif
+	bne	cr1, L(dLcr1)
+	cmpld	cr7, rWORD1, rWORD2
 	bdnz	L(dLoop)
 
 L(dL4):
-	cmpld	cr1,rWORD3,rWORD4
-	bne	cr6,L(dLcr6)
-	cmpld	cr6,rWORD5,rWORD6
-	bne	cr5,L(dLcr5)
-	cmpld	cr5,rWORD7,rWORD8
+	cmpld	cr1, rWORD3, rWORD4
+	bne	cr6, L(dLcr6)
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5)
+	cmpld	cr5, rWORD7, rWORD8
 L(d44):
-	bne	cr0,L(dLcr0)
+	bne	cr7, L(dLcr7)
 L(d34):
-	bne	cr1,L(dLcr1)
+	bne	cr1, L(dLcr1)
 L(d24):
-	bne	cr6,L(dLcr6)
+	bne	cr6, L(dLcr6)
 L(d14):
-	sldi.	r12,rN,3
-	bne	cr5,L(dLcr5)
+	sldi.	r12, rN, 3
+	bne	cr5, L(dLcr5)
 L(d04):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	subfic	rN,r12,64	/* Shift count is 64 - (rN * 8).  */
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
 	beq	L(zeroLength)
 /* At this point we have a remainder of 1 to 7 bytes to compare.  Since
    we are aligned it is safe to load the whole double word, and use
    shift right double to eliminate bits beyond the compare length.  */
 L(d00):
-	ld	rWORD1,8(rSTR1)
-	ld	rWORD2,8(rSTR2)
-	srd	rWORD1,rWORD1,rN
-	srd	rWORD2,rWORD2,rN
-	cmpld	cr5,rWORD1,rWORD2
-	bne	cr5,L(dLcr5x)
-	li	rRTN,0
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+#endif
+	srd	rWORD1, rWORD1, rN
+	srd	rWORD2, rWORD2, rN
+	cmpld	cr7, rWORD1, rWORD2
+	bne	cr7, L(dLcr7x)
+	li	rRTN, 0
 	blr
+
 	.align	4
-L(dLcr0):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	li	rRTN,1
-	bgtlr	cr0
-	li	rRTN,-1
+L(dLcr7):
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+L(dLcr7x):
+	li	rRTN, 1
+	bgtlr	cr7
+	li	rRTN, -1
 	blr
 	.align	4
 L(dLcr1):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	li	rRTN,1
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+L(dLcr1x):
+	li	rRTN, 1
 	bgtlr	cr1
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 	.align	4
 L(dLcr6):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	li	rRTN,1
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+L(dLcr6x):
+	li	rRTN, 1
 	bgtlr	cr6
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 	.align	4
 L(dLcr5):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 L(dLcr5x):
-	li	rRTN,1
+	li	rRTN, 1
 	bgtlr	cr5
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 
 	.align	4
 L(bytealigned):
 	mtctr	rN
-	beq	cr6,L(zeroLength)
+#if 0
+/* Huh?  We've already branched on cr6!  */
+	beq	cr6, L(zeroLength)
+#endif
 
 /* We need to prime this loop.  This loop is swing modulo scheduled
    to avoid pipe delays.  The dependent instruction latencies (load to
@@ -396,38 +598,38 @@ L(bytealigned):
    So we must precondition some registers and condition codes so that
    we don't exit the loop early on the first iteration.  */
 
-	lbz	rWORD1,0(rSTR1)
-	lbz	rWORD2,0(rSTR2)
+	lbz	rWORD1, 0(rSTR1)
+	lbz	rWORD2, 0(rSTR2)
 	bdz	L(b11)
-	cmpld	cr0,rWORD1,rWORD2
-	lbz	rWORD3,1(rSTR1)
-	lbz	rWORD4,1(rSTR2)
+	cmpld	cr7, rWORD1, rWORD2
+	lbz	rWORD3, 1(rSTR1)
+	lbz	rWORD4, 1(rSTR2)
 	bdz	L(b12)
-	cmpld	cr1,rWORD3,rWORD4
-	lbzu	rWORD5,2(rSTR1)
-	lbzu	rWORD6,2(rSTR2)
+	cmpld	cr1, rWORD3, rWORD4
+	lbzu	rWORD5, 2(rSTR1)
+	lbzu	rWORD6, 2(rSTR2)
 	bdz	L(b13)
 	.align	4
 L(bLoop):
-	lbzu	rWORD1,1(rSTR1)
-	lbzu	rWORD2,1(rSTR2)
-	bne	cr0,L(bLcr0)
+	lbzu	rWORD1, 1(rSTR1)
+	lbzu	rWORD2, 1(rSTR2)
+	bne	cr7, L(bLcr7)
 
-	cmpld	cr6,rWORD5,rWORD6
+	cmpld	cr6, rWORD5, rWORD6
 	bdz	L(b3i)
 
-	lbzu	rWORD3,1(rSTR1)
-	lbzu	rWORD4,1(rSTR2)
-	bne	cr1,L(bLcr1)
+	lbzu	rWORD3, 1(rSTR1)
+	lbzu	rWORD4, 1(rSTR2)
+	bne	cr1, L(bLcr1)
 
-	cmpld	cr0,rWORD1,rWORD2
+	cmpld	cr7, rWORD1, rWORD2
 	bdz	L(b2i)
 
-	lbzu	rWORD5,1(rSTR1)
-	lbzu	rWORD6,1(rSTR2)
-	bne	cr6,L(bLcr6)
+	lbzu	rWORD5, 1(rSTR1)
+	lbzu	rWORD6, 1(rSTR2)
+	bne	cr6, L(bLcr6)
 
-	cmpld	cr1,rWORD3,rWORD4
+	cmpld	cr1, rWORD3, rWORD4
 	bdnz	L(bLoop)
 
 /* We speculatively loading bytes before we have tested the previous
@@ -437,542 +639,727 @@ L(bLoop):
    tested.  In this case we must complete the pending operations
    before returning.  */
 L(b1i):
-	bne	cr0,L(bLcr0)
-	bne	cr1,L(bLcr1)
+	bne	cr7, L(bLcr7)
+	bne	cr1, L(bLcr1)
 	b	L(bx56)
 	.align	4
 L(b2i):
-	bne	cr6,L(bLcr6)
-	bne	cr0,L(bLcr0)
+	bne	cr6, L(bLcr6)
+	bne	cr7, L(bLcr7)
 	b	L(bx34)
 	.align	4
 L(b3i):
-	bne	cr1,L(bLcr1)
-	bne	cr6,L(bLcr6)
+	bne	cr1, L(bLcr1)
+	bne	cr6, L(bLcr6)
 	b	L(bx12)
 	.align	4
-L(bLcr0):
-	li	rRTN,1
-	bgtlr	cr0
-	li	rRTN,-1
+L(bLcr7):
+	li	rRTN, 1
+	bgtlr	cr7
+	li	rRTN, -1
 	blr
 L(bLcr1):
-	li	rRTN,1
+	li	rRTN, 1
 	bgtlr	cr1
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 L(bLcr6):
-	li	rRTN,1
+	li	rRTN, 1
 	bgtlr	cr6
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 
 L(b13):
-	bne	cr0,L(bx12)
-	bne	cr1,L(bx34)
+	bne	cr7, L(bx12)
+	bne	cr1, L(bx34)
 L(bx56):
-	sub	rRTN,rWORD5,rWORD6
+	sub	rRTN, rWORD5, rWORD6
 	blr
 	nop
 L(b12):
-	bne	cr0,L(bx12)
+	bne	cr7, L(bx12)
 L(bx34):
-	sub	rRTN,rWORD3,rWORD4
+	sub	rRTN, rWORD3, rWORD4
 	blr
 L(b11):
 L(bx12):
-	sub	rRTN,rWORD1,rWORD2
+	sub	rRTN, rWORD1, rWORD2
 	blr
 	.align	4
-L(zeroLengthReturn):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
 L(zeroLength):
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 	.align	4
 /* At this point we know the strings have different alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    3 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then rStr1 is double word
+   of r12 to 0.  If r12 == 0 then rStr1 is double word
    aligned and can perform the DWunaligned loop.
 
    Otherwise we know that rSTR1 is not already DW aligned yet.
    So we can force the string addresses to the next lower DW
-   boundary and special case this first DW word using shift left to
+   boundary and special case this first DW using shift left to
    eliminate bits preceding the first byte.  Since we want to join the
    normal (DWaligned) compare loop, starting at the second double word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first DW. This insures that the loop count is
+   versioning for the first DW. This ensures that the loop count is
    correct and the first DW (shifted) is in the expected resister pair.  */
-#define rSHL	r29	/* Unaligned shift left count.  */
-#define rSHR	r28	/* Unaligned shift right count.  */
-#define rB		r27	/* Left rotation temp for rWORD2.  */
-#define rD		r26	/* Left rotation temp for rWORD4.  */
-#define rF		r25	/* Left rotation temp for rWORD6.  */
-#define rH		r24	/* Left rotation temp for rWORD8.  */
-#define rA		r0	/* Right rotation temp for rWORD2.  */
-#define rC		r12	/* Right rotation temp for rWORD4.  */
-#define rE		r0	/* Right rotation temp for rWORD6.  */
-#define rG		r12	/* Right rotation temp for rWORD8.  */
+#define rSHL		r29	/* Unaligned shift left count.  */
+#define rSHR		r28	/* Unaligned shift right count.  */
+#define rWORD8_SHIFT	r27	/* Left rotation temp for rWORD2.  */
+#define rWORD2_SHIFT	r26	/* Left rotation temp for rWORD4.  */
+#define rWORD4_SHIFT	r25	/* Left rotation temp for rWORD6.  */
+#define rWORD6_SHIFT	r24	/* Left rotation temp for rWORD8.  */
 L(unaligned):
-	std	r29,-24(r1)
-	cfi_offset(r29,-24)
-	clrldi	rSHL,rSTR2,61
-	beq	cr6,L(duzeroLength)
-	std	r28,-32(r1)
-	cfi_offset(r28,-32)
-	beq	cr5,L(DWunaligned)
-	std	r27,-40(r1)
-	cfi_offset(r27,-40)
-/* Adjust the logical start of rSTR2 ro compensate for the extra bits
+	std	rSHL, -24(r1)
+	cfi_offset(rSHL, -24)
+	clrldi	rSHL, rSTR2, 61
+	beq	cr6, L(duzeroLength)
+	std	rSHR, -32(r1)
+	cfi_offset(rSHR, -32)
+	beq	cr5, L(DWunaligned)
+	std	rWORD8_SHIFT, -40(r1)
+	cfi_offset(rWORD8_SHIFT, -40)
+/* Adjust the logical start of rSTR2 to compensate for the extra bits
    in the 1st rSTR1 DW.  */
-	sub	r27,rSTR2,rBITDIF
+	sub	rWORD8_SHIFT, rSTR2, r12
 /* But do not attempt to address the DW before that DW that contains
    the actual start of rSTR2.  */
-	clrrdi	rSTR2,rSTR2,3
-	std	r26,-48(r1)
-	cfi_offset(r26,-48)
+	clrrdi	rSTR2, rSTR2, 3
+	std	rWORD2_SHIFT, -48(r1)
+	cfi_offset(rWORD2_SHIFT, -48)
 /* Compute the left/right shift counts for the unaligned rSTR2,
    compensating for the logical (DW aligned) start of rSTR1.  */
-	clrldi	rSHL,r27,61
-	clrrdi	rSTR1,rSTR1,3
-	std	r25,-56(r1)
-	cfi_offset(r25,-56)
-	sldi	rSHL,rSHL,3
-	cmpld	cr5,r27,rSTR2
-	add	rN,rN,rBITDIF
-	sldi	r11,rBITDIF,3
-	std	r24,-64(r1)
-	cfi_offset(r24,-64)
-	subfic	rSHR,rSHL,64
-	srdi	rTMP,rN,5	/* Divide by 32 */
-	andi.	rBITDIF,rN,24	/* Get the DW remainder */
+	clrldi	rSHL, rWORD8_SHIFT, 61
+	clrrdi	rSTR1, rSTR1, 3
+	std	rWORD4_SHIFT, -56(r1)
+	cfi_offset(rWORD4_SHIFT, -56)
+	sldi	rSHL, rSHL, 3
+	cmpld	cr5, rWORD8_SHIFT, rSTR2
+	add	rN, rN, r12
+	sldi	rWORD6, r12, 3
+	std	rWORD6_SHIFT, -64(r1)
+	cfi_offset(rWORD6_SHIFT, -64)
+	subfic	rSHR, rSHL, 64
+	srdi	r0, rN, 5	/* Divide by 32 */
+	andi.	r12, rN, 24	/* Get the DW remainder */
 /* We normally need to load 2 DWs to start the unaligned rSTR2, but in
    this special case those bits may be discarded anyway.  Also we
    must avoid loading a DW where none of the bits are part of rSTR2 as
    this may cross a page boundary and cause a page fault.  */
-	li	rWORD8,0
-	blt	cr5,L(dus0)
-	ld	rWORD8,0(rSTR2)
-	la	rSTR2,8(rSTR2)
-	sld	rWORD8,rWORD8,rSHL
+	li	rWORD8, 0
+	blt	cr5, L(dus0)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD8, 0(rSTR2)
+	addi	rSTR2, rSTR2, 8
+#endif
+	sld	rWORD8, rWORD8, rSHL
 
 L(dus0):
-	ld	rWORD1,0(rSTR1)
-	ld	rWORD2,0(rSTR2)
-	cmpldi	cr1,rBITDIF,16
-	cmpldi	cr7,rN,32
-	srd	rG,rWORD2,rSHR
-	clrldi	rN,rN,61
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 0(rSTR1)
+	ld	rWORD2, 0(rSTR2)
+#endif
+	cmpldi	cr1, r12, 16
+	cmpldi	cr7, rN, 32
+	srd	r12, rWORD2, rSHR
+	clrldi	rN, rN, 61
 	beq	L(duPs4)
-	mtctr	rTMP
-	or	rWORD8,rG,rWORD8
-	bgt	cr1,L(duPs3)
-	beq	cr1,L(duPs2)
+	mtctr	r0
+	or	rWORD8, r12, rWORD8
+	bgt	cr1, L(duPs3)
+	beq	cr1, L(duPs2)
 
 /* Remainder is 8 */
 	.align	4
 L(dusP1):
-	sld	rB,rWORD2,rSHL
-	sld	rWORD7,rWORD1,r11
-	sld	rWORD8,rWORD8,r11
-	bge	cr7,L(duP1e)
+	sld	rWORD8_SHIFT, rWORD2, rSHL
+	sld	rWORD7, rWORD1, rWORD6
+	sld	rWORD8, rWORD8, rWORD6
+	bge	cr7, L(duP1e)
 /* At this point we exit early with the first double word compare
    complete and remainder of 0 to 7 bytes.  See L(du14) for details on
    how we handle the remaining bytes.  */
-	cmpld	cr5,rWORD7,rWORD8
-	sldi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmpld	cr7,rN,rSHR
+	cmpld	cr5, rWORD7, rWORD8
+	sldi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	ld	rWORD2,8(rSTR2)
-	srd	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD2, 8(rSTR2)
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 16 */
 	.align	4
 L(duPs2):
-	sld	rH,rWORD2,rSHL
-	sld	rWORD5,rWORD1,r11
-	sld	rWORD6,rWORD8,r11
+	sld	rWORD6_SHIFT, rWORD2, rSHL
+	sld	rWORD5, rWORD1, rWORD6
+	sld	rWORD6, rWORD8, rWORD6
 	b	L(duP2e)
 /* Remainder is 24 */
 	.align	4
 L(duPs3):
-	sld	rF,rWORD2,rSHL
-	sld	rWORD3,rWORD1,r11
-	sld	rWORD4,rWORD8,r11
+	sld	rWORD4_SHIFT, rWORD2, rSHL
+	sld	rWORD3, rWORD1, rWORD6
+	sld	rWORD4, rWORD8, rWORD6
 	b	L(duP3e)
 /* Count is a multiple of 32, remainder is 0 */
 	.align	4
 L(duPs4):
-	mtctr	rTMP
-	or	rWORD8,rG,rWORD8
-	sld	rD,rWORD2,rSHL
-	sld	rWORD1,rWORD1,r11
-	sld	rWORD2,rWORD8,r11
+	mtctr	r0
+	or	rWORD8, r12, rWORD8
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	sld	rWORD1, rWORD1, rWORD6
+	sld	rWORD2, rWORD8, rWORD6
 	b	L(duP4e)
 
 /* At this point we know rSTR1 is double word aligned and the
    compare length is at least 8 bytes.  */
 	.align	4
 L(DWunaligned):
-	std	r27,-40(r1)
-	cfi_offset(r27,-40)
-	clrrdi	rSTR2,rSTR2,3
-	std	r26,-48(r1)
-	cfi_offset(r26,-48)
-	srdi	rTMP,rN,5	/* Divide by 32 */
-	std	r25,-56(r1)
-	cfi_offset(r25,-56)
-	andi.	rBITDIF,rN,24	/* Get the DW remainder */
-	std	r24,-64(r1)
-	cfi_offset(r24,-64)
-	sldi	rSHL,rSHL,3
-	ld	rWORD6,0(rSTR2)
-	ldu	rWORD8,8(rSTR2)
-	cmpldi	cr1,rBITDIF,16
-	cmpldi	cr7,rN,32
-	clrldi	rN,rN,61
-	subfic	rSHR,rSHL,64
-	sld	rH,rWORD6,rSHL
+	std	rWORD8_SHIFT, -40(r1)
+	cfi_offset(rWORD8_SHIFT, -40)
+	clrrdi	rSTR2, rSTR2, 3
+	std	rWORD2_SHIFT, -48(r1)
+	cfi_offset(rWORD2_SHIFT, -48)
+	srdi	r0, rN, 5	/* Divide by 32 */
+	std	rWORD4_SHIFT, -56(r1)
+	cfi_offset(rWORD4_SHIFT, -56)
+	andi.	r12, rN, 24	/* Get the DW remainder */
+	std	rWORD6_SHIFT, -64(r1)
+	cfi_offset(rWORD6_SHIFT, -64)
+	sldi	rSHL, rSHL, 3
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD6, 0(rSTR2)
+	ldu	rWORD8, 8(rSTR2)
+#endif
+	cmpldi	cr1, r12, 16
+	cmpldi	cr7, rN, 32
+	clrldi	rN, rN, 61
+	subfic	rSHR, rSHL, 64
+	sld	rWORD6_SHIFT, rWORD6, rSHL
 	beq	L(duP4)
-	mtctr	rTMP
-	bgt	cr1,L(duP3)
-	beq	cr1,L(duP2)
+	mtctr	r0
+	bgt	cr1, L(duP3)
+	beq	cr1, L(duP2)
 
 /* Remainder is 8 */
 	.align	4
 L(duP1):
-	srd	rG,rWORD8,rSHR
-	ld	rWORD7,0(rSTR1)
-	sld	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	blt	cr7,L(duP1x)
+	srd	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
+	ld	rWORD7, 0(rSTR1)
+#endif
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	blt	cr7, L(duP1x)
 L(duP1e):
-	ld	rWORD1,8(rSTR1)
-	ld	rWORD2,8(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
-	srd	rA,rWORD2,rSHR
-	sld	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
-	ld	rWORD3,16(rSTR1)
-	ld	rWORD4,16(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
-	srd	rC,rWORD4,rSHR
-	sld	rF,rWORD4,rSHL
-	bne	cr5,L(duLcr5)
-	or	rWORD4,rC,rD
-	ld	rWORD5,24(rSTR1)
-	ld	rWORD6,24(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	srd	rE,rWORD6,rSHR
-	sld	rH,rWORD6,rSHL
-	bne	cr0,L(duLcr0)
-	or	rWORD6,rE,rF
-	cmpld	cr6,rWORD5,rWORD6
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 16(rSTR1)
+	ld	rWORD4, 16(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
+	bne	cr5, L(duLcr5)
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 24(rSTR1)
+	ld	rWORD6, 24(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	bne	cr7, L(duLcr7)
+	or	rWORD6, r0, rWORD4_SHIFT
+	cmpld	cr6, rWORD5, rWORD6
 	b	L(duLoop3)
 	.align	4
 /* At this point we exit early with the first double word compare
    complete and remainder of 0 to 7 bytes.  See L(du14) for details on
    how we handle the remaining bytes.  */
 L(duP1x):
-	cmpld	cr5,rWORD7,rWORD8
-	sldi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmpld	cr7,rN,rSHR
+	cmpld	cr5, rWORD7, rWORD8
+	sldi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	ld	rWORD2,8(rSTR2)
-	srd	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD2, 8(rSTR2)
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 16 */
 	.align	4
 L(duP2):
-	srd	rE,rWORD8,rSHR
-	ld	rWORD5,0(rSTR1)
-	or	rWORD6,rE,rH
-	sld	rH,rWORD8,rSHL
+	srd	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
+	ld	rWORD5, 0(rSTR1)
+#endif
+	or	rWORD6, r0, rWORD6_SHIFT
+	sld	rWORD6_SHIFT, rWORD8, rSHL
 L(duP2e):
-	ld	rWORD7,8(rSTR1)
-	ld	rWORD8,8(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	srd	rG,rWORD8,rSHR
-	sld	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	blt	cr7,L(duP2x)
-	ld	rWORD1,16(rSTR1)
-	ld	rWORD2,16(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
-	bne	cr6,L(duLcr6)
-	srd	rA,rWORD2,rSHR
-	sld	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
-	ld	rWORD3,24(rSTR1)
-	ld	rWORD4,24(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
-	bne	cr5,L(duLcr5)
-	srd	rC,rWORD4,rSHR
-	sld	rF,rWORD4,rSHL
-	or	rWORD4,rC,rD
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	cmpld	cr1,rWORD3,rWORD4
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD7, 8(rSTR1)
+	ld	rWORD8, 8(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	blt	cr7, L(duP2x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 16(rSTR1)
+	ld	rWORD2, 16(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 24(rSTR1)
+	ld	rWORD4, 24(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+	bne	cr5, L(duLcr5)
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+	cmpld	cr1, rWORD3, rWORD4
 	b	L(duLoop2)
 	.align	4
 L(duP2x):
-	cmpld	cr5,rWORD7,rWORD8
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	bne	cr6,L(duLcr6)
-	sldi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmpld	cr7,rN,rSHR
+	cmpld	cr5, rWORD7, rWORD8
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+	bne	cr6, L(duLcr6)
+	sldi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	ld	rWORD2,8(rSTR2)
-	srd	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD2, 8(rSTR2)
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Remainder is 24 */
 	.align	4
 L(duP3):
-	srd	rC,rWORD8,rSHR
-	ld	rWORD3,0(rSTR1)
-	sld	rF,rWORD8,rSHL
-	or	rWORD4,rC,rH
+	srd	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
+	ld	rWORD3, 0(rSTR1)
+#endif
+	sld	rWORD4_SHIFT, rWORD8, rSHL
+	or	rWORD4, r12, rWORD6_SHIFT
 L(duP3e):
-	ld	rWORD5,8(rSTR1)
-	ld	rWORD6,8(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	srd	rE,rWORD6,rSHR
-	sld	rH,rWORD6,rSHL
-	or	rWORD6,rE,rF
-	ld	rWORD7,16(rSTR1)
-	ld	rWORD8,16(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	bne	cr1,L(duLcr1)
-	srd	rG,rWORD8,rSHR
-	sld	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	blt	cr7,L(duP3x)
-	ld	rWORD1,24(rSTR1)
-	ld	rWORD2,24(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
-	bne	cr6,L(duLcr6)
-	srd	rA,rWORD2,rSHR
-	sld	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
-	addi	rSTR1,rSTR1,16
-	addi	rSTR2,rSTR2,16
-	cmpld	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 8(rSTR1)
+	ld	rWORD6, 8(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD7, 16(rSTR1)
+	ld	rWORD8, 16(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr1, L(duLcr1)
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	blt	cr7, L(duP3x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 24(rSTR1)
+	ld	rWORD2, 24(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 16
+	addi	rSTR2, rSTR2, 16
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 	b	L(duLoop1)
 	.align	4
 L(duP3x):
-	addi	rSTR1,rSTR1,16
-	addi	rSTR2,rSTR2,16
-	bne	cr1,L(duLcr1)
-	cmpld	cr5,rWORD7,rWORD8
-	bne	cr6,L(duLcr6)
-	sldi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmpld	cr7,rN,rSHR
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 16
+	addi	rSTR2, rSTR2, 16
+#endif
+#if 0
+/* Huh?  We've already branched on cr1!  */
+	bne	cr1, L(duLcr1)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	sldi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	ld	rWORD2,8(rSTR2)
-	srd	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD2, 8(rSTR2)
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Count is a multiple of 32, remainder is 0 */
 	.align	4
 L(duP4):
-	mtctr	rTMP
-	srd	rA,rWORD8,rSHR
-	ld	rWORD1,0(rSTR1)
-	sld	rD,rWORD8,rSHL
-	or	rWORD2,rA,rH
+	mtctr	r0
+	srd	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
+	ld	rWORD1, 0(rSTR1)
+#endif
+	sld	rWORD2_SHIFT, rWORD8, rSHL
+	or	rWORD2, r0, rWORD6_SHIFT
 L(duP4e):
-	ld	rWORD3,8(rSTR1)
-	ld	rWORD4,8(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
-	srd	rC,rWORD4,rSHR
-	sld	rF,rWORD4,rSHL
-	or	rWORD4,rC,rD
-	ld	rWORD5,16(rSTR1)
-	ld	rWORD6,16(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	bne	cr0,L(duLcr0)
-	srd	rE,rWORD6,rSHR
-	sld	rH,rWORD6,rSHL
-	or	rWORD6,rE,rF
-	ldu	rWORD7,24(rSTR1)
-	ldu	rWORD8,24(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	bne	cr1,L(duLcr1)
-	srd	rG,rWORD8,rSHR
-	sld	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	cmpld	cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 8(rSTR1)
+	ld	rWORD4, 8(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 16(rSTR1)
+	ld	rWORD6, 16(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+	bne	cr7, L(duLcr7)
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ldu	rWORD7, 24(rSTR1)
+	ldu	rWORD8, 24(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr1, L(duLcr1)
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	cmpld	cr5, rWORD7, rWORD8
 	bdz	L(du24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
 	.align	4
 L(duLoop):
-	ld	rWORD1,8(rSTR1)
-	ld	rWORD2,8(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	bne	cr6,L(duLcr6)
-	srd	rA,rWORD2,rSHR
-	sld	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+	bne	cr6, L(duLcr6)
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
 L(duLoop1):
-	ld	rWORD3,16(rSTR1)
-	ld	rWORD4,16(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	bne	cr5,L(duLcr5)
-	srd	rC,rWORD4,rSHR
-	sld	rF,rWORD4,rSHL
-	or	rWORD4,rC,rD
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 16(rSTR1)
+	ld	rWORD4, 16(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr5, L(duLcr5)
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
 L(duLoop2):
-	ld	rWORD5,24(rSTR1)
-	ld	rWORD6,24(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
-	bne	cr0,L(duLcr0)
-	srd	rE,rWORD6,rSHR
-	sld	rH,rWORD6,rSHL
-	or	rWORD6,rE,rF
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 24(rSTR1)
+	ld	rWORD6, 24(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr7, L(duLcr7)
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
 L(duLoop3):
-	ldu	rWORD7,32(rSTR1)
-	ldu	rWORD8,32(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
-	bne-	cr1,L(duLcr1)
-	srd	rG,rWORD8,rSHR
-	sld	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ldu	rWORD7, 32(rSTR1)
+	ldu	rWORD8, 32(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+	bne	cr1, L(duLcr1)
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	bdnz	L(duLoop)
 
 L(duL4):
-	bne	cr1,L(duLcr1)
-	cmpld	cr1,rWORD3,rWORD4
-	bne	cr6,L(duLcr6)
-	cmpld	cr6,rWORD5,rWORD6
-	bne	cr5,L(duLcr5)
-	cmpld	cr5,rWORD7,rWORD8
+#if 0
+/* Huh?  We've already branched on cr1!  */
+	bne	cr1, L(duLcr1)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+	bne	cr6, L(duLcr6)
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr5, L(duLcr5)
+	cmpld	cr5, rWORD7, rWORD8
 L(du44):
-	bne	cr0,L(duLcr0)
+	bne	cr7, L(duLcr7)
 L(du34):
-	bne	cr1,L(duLcr1)
+	bne	cr1, L(duLcr1)
 L(du24):
-	bne	cr6,L(duLcr6)
+	bne	cr6, L(duLcr6)
 L(du14):
-	sldi.	rN,rN,3
-	bne	cr5,L(duLcr5)
+	sldi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
 /* At this point we have a remainder of 1 to 7 bytes to compare.  We use
    shift right double to eliminate bits beyond the compare length.
-   This allows the use of double word subtract to compute the final
-   result.
 
    However it may not be safe to load rWORD2 which may be beyond the
    string length. So we compare the bit length of the remainder to
    the right shift count (rSHR). If the bit count is less than or equal
    we do not need to load rWORD2 (all significant bits are already in
-   rB).  */
-	cmpld	cr7,rN,rSHR
+   rWORD8_SHIFT).  */
+	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	ld	rWORD2,8(rSTR2)
-	srd	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD2, 8(rSTR2)
+#endif
+	srd	r0, rWORD2, rSHR
 	.align	4
 L(dutrim):
-	ld	rWORD1,8(rSTR1)
-	ld	rWORD8,-8(r1)
-	subfic	rN,rN,64	/* Shift count is 64 - (rN * 8).  */
-	or	rWORD2,rA,rB
-	ld	rWORD7,-16(r1)
-	ld	r29,-24(r1)
-	srd	rWORD1,rWORD1,rN
-	srd	rWORD2,rWORD2,rN
-	ld	r28,-32(r1)
-	ld	r27,-40(r1)
-	li	rRTN,0
-	cmpld	cr0,rWORD1,rWORD2
-	ld	r26,-48(r1)
-	ld	r25,-56(r1)
-	beq	cr0,L(dureturn24)
-	li	rRTN,1
-	ld	r24,-64(r1)
-	bgtlr	cr0
-	li	rRTN,-1
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+#else
+	ld	rWORD1, 8(rSTR1)
+#endif
+	ld	rWORD8, -8(r1)
+	subfic	rN, rN, 64	/* Shift count is 64 - (rN * 8).  */
+	or	rWORD2, r0, rWORD8_SHIFT
+	ld	rWORD7, -16(r1)
+	ld	rSHL, -24(r1)
+	srd	rWORD1, rWORD1, rN
+	srd	rWORD2, rWORD2, rN
+	ld	rSHR, -32(r1)
+	ld	rWORD8_SHIFT, -40(r1)
+	li	rRTN, 0
+	cmpld	cr7, rWORD1, rWORD2
+	ld	rWORD2_SHIFT, -48(r1)
+	ld	rWORD4_SHIFT, -56(r1)
+	beq	cr7, L(dureturn24)
+	li	rRTN, 1
+	ld	rWORD6_SHIFT, -64(r1)
+	bgtlr	cr7
+	li	rRTN, -1
 	blr
 	.align	4
-L(duLcr0):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	li	rRTN,1
-	bgt	cr0,L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
-	li	rRTN,-1
+L(duLcr7):
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+	li	rRTN, 1
+	bgt	cr7, L(dureturn29)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	4
 L(duLcr1):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	li	rRTN,1
-	bgt	cr1,L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
-	li	rRTN,-1
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+	li	rRTN, 1
+	bgt	cr1, L(dureturn29)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	4
 L(duLcr6):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	li	rRTN,1
-	bgt	cr6,L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
-	li	rRTN,-1
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+	li	rRTN, 1
+	bgt	cr6, L(dureturn29)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	4
 L(duLcr5):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	li	rRTN,1
-	bgt	cr5,L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
-	li	rRTN,-1
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+	li	rRTN, 1
+	bgt	cr5, L(dureturn29)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	3
 L(duZeroReturn):
-	li	rRTN,0
+	li	rRTN, 0
 	.align	4
 L(dureturn):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 L(dureturn29):
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
 L(dureturn27):
-	ld	r27,-40(r1)
+	ld	rWORD8_SHIFT, -40(r1)
 L(dureturn26):
-	ld	r26,-48(r1)
+	ld	rWORD2_SHIFT, -48(r1)
 L(dureturn25):
-	ld	r25,-56(r1)
+	ld	rWORD4_SHIFT, -56(r1)
 L(dureturn24):
-	ld	r24,-64(r1)
+	ld	rWORD6_SHIFT, -64(r1)
 	blr
 L(duzeroLength):
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 END (memcmp)
 libc_hidden_builtin_def (memcmp)
-weak_alias (memcmp,bcmp)
+weak_alias (memcmp, bcmp)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S
index 800a9f1bb..e8df75f59 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S
@@ -23,418 +23,361 @@
 /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
    Returns 'dst'.  */
 
+#define dst 11		/* Use r11 so r3 kept unchanged.  */
+#define src 4
+#define cnt 5
+
 	.machine power7
 EALIGN (memcpy, 5, 0)
 	CALL_MCOUNT 3
 
-	cmpldi  cr1,5,31
+	cmpldi	cr1,cnt,31
 	neg	0,3
-	std	3,-16(1)
-	std	31,-8(1)
-	cfi_offset(31,-8)
 	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
 				    code.  */
 
-	andi.   11,3,7	      /* Check alignment of DST.  */
-
-
-	clrldi  10,4,61       /* Check alignment of SRC.  */
-	cmpld   cr6,10,11     /* SRC and DST alignments match?  */
-	mr	12,4
-	mr	31,5
+#ifdef __LITTLE_ENDIAN__
+/* In little-endian mode, power7 takes an alignment trap on any lxvd2x
+   or stxvd2x crossing a 32-byte boundary, so ensure the aligned_copy
+   loop is only used for quadword aligned copies.  */
+	andi.	10,3,15
+	clrldi	11,4,60
+#else
+	andi.	10,3,7		/* Check alignment of DST.  */
+	clrldi	11,4,61		/* Check alignment of SRC.  */
+#endif
+	cmpld	cr6,10,11	/* SRC and DST alignments match?  */
+
+	mr	dst,3
 	bne	cr6,L(copy_GE_32_unaligned)
+	beq	L(aligned_copy)
 
-	srdi    9,5,3	      /* Number of full quadwords remaining.  */
-
-	beq    L(copy_GE_32_aligned_cont)
-
-	clrldi  0,0,61
-	mtcrf   0x01,0
-	subf    31,0,5
-
-	/* Get the SRC aligned to 8 bytes.  */
-
-1:	bf	31,2f
-	lbz	6,0(12)
-	addi    12,12,1
-	stb	6,0(3)
-	addi    3,3,1
-2:	bf      30,4f
-	lhz     6,0(12)
-	addi    12,12,2
-	sth     6,0(3)
-	addi    3,3,2
-4:	bf      29,0f
-	lwz     6,0(12)
-	addi    12,12,4
-	stw     6,0(3)
-	addi    3,3,4
-0:
-	clrldi  10,12,61      /* Check alignment of SRC again.  */
-	srdi    9,31,3	      /* Number of full doublewords remaining.  */
-
-L(copy_GE_32_aligned_cont):
-
-	clrldi  11,31,61
-	mtcrf   0x01,9
-
-	srdi    8,31,5
-	cmpldi  cr1,9,4
-	cmpldi  cr6,11,0
-	mr	11,12
-
-	/* Copy 1~3 doublewords so the main loop starts
-	at a multiple of 32 bytes.  */
+	mtocrf	0x01,0
+#ifdef __LITTLE_ENDIAN__
+	clrldi	0,0,60
+#else
+	clrldi	0,0,61
+#endif
 
-	bf	30,1f
-	ld      6,0(12)
-	ld      7,8(12)
-	addi    11,12,16
-	mtctr   8
-	std     6,0(3)
-	std     7,8(3)
-	addi    10,3,16
-	bf      31,4f
-	ld      0,16(12)
-	std     0,16(3)
-	blt     cr1,3f
-	addi    11,12,24
-	addi    10,3,24
-	b       4f
-
-	.align  4
-1:	/* Copy 1 doubleword and set the counter.  */
-	mr	10,3
-	mtctr   8
-	bf      31,4f
-	ld      6,0(12)
-	addi    11,12,8
-	std     6,0(3)
-	addi    10,3,8
-
-L(aligned_copy):
-	/* Main aligned copy loop. Copies up to 128-bytes at a time. */
-	.align  4
+/* Get the DST and SRC aligned to 8 bytes (16 for little-endian).  */
+1:
+	bf	31,2f
+	lbz	6,0(src)
+	addi	src,src,1
+	stb	6,0(dst)
+	addi	dst,dst,1
+2:
+	bf	30,4f
+	lhz	6,0(src)
+	addi	src,src,2
+	sth	6,0(dst)
+	addi	dst,dst,2
 4:
-	/* check for any 32-byte or 64-byte lumps that are outside of a
-	   nice 128-byte range.  R8 contains the number of 32-byte
-	   lumps, so drop this into the CR, and use the SO/EQ bits to help
-	   handle the 32- or 64- byte lumps.  Then handle the rest with an
-	   unrolled 128-bytes-at-a-time copy loop. */
-	mtocrf	1,8
-	li	6,16	# 16() index
-	li	7,32	# 32() index
-	li	8,48	# 48() index
-
-L(aligned_32byte):
-	/* if the SO bit (indicating a 32-byte lump) is not set, move along. */
-	bns	cr7,L(aligned_64byte)
-	lxvd2x	6,0,11
-	lxvd2x	7,11,6
-	addi	11,11,32
-	stxvd2x	6,0,10
-	stxvd2x	7,10,6
-	addi	10,10,32
-
-L(aligned_64byte):
-	/* if the EQ bit (indicating a 64-byte lump) is not set, move along. */
-	bne	cr7,L(aligned_128setup)
-	lxvd2x	6,0,11
-	lxvd2x	7,11,6
-	lxvd2x	8,11,7
-	lxvd2x	9,11,8
-	addi	11,11,64
-	stxvd2x	6,0,10
-	stxvd2x	7,10,6
-	stxvd2x	8,10,7
-	stxvd2x	9,10,8
-	addi	10,10,64
-
-L(aligned_128setup):
-	/* Set up for the 128-byte at a time copy loop.  */
-	srdi	8,31,7
-	cmpdi	8,0	# Any 4x lumps left?
-	beq	3f	# if not, move along.
-	lxvd2x	6,0,11
-	lxvd2x	7,11,6
-	mtctr	8	# otherwise, load the ctr and begin.
-	li	8,48	# 48() index
+	bf	29,8f
+	lwz	6,0(src)
+	addi	src,src,4
+	stw	6,0(dst)
+	addi	dst,dst,4
+8:
+#ifdef __LITTLE_ENDIAN__
+	bf	28,16f
+	ld	6,0(src)
+	addi	src,src,8
+	std	6,0(dst)
+	addi	dst,dst,8
+16:
+#endif
+	subf	cnt,0,cnt
+
+/* Main aligned copy loop. Copies 128 bytes at a time. */
+L(aligned_copy):
+	li	6,16
+	li	7,32
+	li	8,48
+	mtocrf	0x02,cnt
+	srdi	12,cnt,7
+	cmpdi	12,0
+	beq	L(aligned_tail)
+	lxvd2x	6,0,src
+	lxvd2x	7,src,6
+	mtctr	12
 	b	L(aligned_128loop)
 
+	.align  4
 L(aligned_128head):
 	/* for the 2nd + iteration of this loop. */
-	lxvd2x	6,0,11
-	lxvd2x	7,11,6
+	lxvd2x	6,0,src
+	lxvd2x	7,src,6
 L(aligned_128loop):
-	lxvd2x	8,11,7
-	lxvd2x	9,11,8
-	stxvd2x	6,0,10
-	addi	11,11,64
-	stxvd2x	7,10,6
-	stxvd2x	8,10,7
-	stxvd2x	9,10,8
-	lxvd2x	6,0,11
-	lxvd2x	7,11,6
-	addi	10,10,64
-	lxvd2x	8,11,7
-	lxvd2x	9,11,8
-	addi	11,11,64
-	stxvd2x	6,0,10
-	stxvd2x	7,10,6
-	stxvd2x	8,10,7
-	stxvd2x	9,10,8
-	addi	10,10,64
+	lxvd2x	8,src,7
+	lxvd2x	9,src,8
+	stxvd2x	6,0,dst
+	addi	src,src,64
+	stxvd2x	7,dst,6
+	stxvd2x	8,dst,7
+	stxvd2x	9,dst,8
+	lxvd2x	6,0,src
+	lxvd2x	7,src,6
+	addi	dst,dst,64
+	lxvd2x	8,src,7
+	lxvd2x	9,src,8
+	addi	src,src,64
+	stxvd2x	6,0,dst
+	stxvd2x	7,dst,6
+	stxvd2x	8,dst,7
+	stxvd2x	9,dst,8
+	addi	dst,dst,64
 	bdnz	L(aligned_128head)
 
-3:
-	/* Check for tail bytes.  */
-	rldicr  0,31,0,60
-	mtcrf   0x01,31
-	beq	cr6,0f
-
-.L9:
-	add	3,3,0
-	add	12,12,0
-
-	/*  At this point we have a tail of 0-7 bytes and we know that the
-	destination is doubleword-aligned.  */
-4:	/* Copy 4 bytes.  */
-	bf	29,2f
-
-	lwz     6,0(12)
-	addi    12,12,4
-	stw     6,0(3)
-	addi    3,3,4
-2:	/* Copy 2 bytes.  */
-	bf	30,1f
-
-	lhz     6,0(12)
-	addi    12,12,2
-	sth     6,0(3)
-	addi    3,3,2
-1:	/* Copy 1 byte.  */
-	bf	31,0f
-
-	lbz	6,0(12)
-	stb	6,0(3)
-0:	/* Return original DST pointer.  */
-	ld	31,-8(1)
-	ld	3,-16(1)
+L(aligned_tail):
+	mtocrf	0x01,cnt
+	bf	25,32f
+	lxvd2x	6,0,src
+	lxvd2x	7,src,6
+	lxvd2x	8,src,7
+	lxvd2x	9,src,8
+	addi	src,src,64
+	stxvd2x	6,0,dst
+	stxvd2x	7,dst,6
+	stxvd2x	8,dst,7
+	stxvd2x	9,dst,8
+	addi	dst,dst,64
+32:
+	bf	26,16f
+	lxvd2x	6,0,src
+	lxvd2x	7,src,6
+	addi	src,src,32
+	stxvd2x	6,0,dst
+	stxvd2x	7,dst,6
+	addi	dst,dst,32
+16:
+	bf	27,8f
+	lxvd2x	6,0,src
+	addi	src,src,16
+	stxvd2x	6,0,dst
+	addi	dst,dst,16
+8:
+	bf	28,4f
+	ld	6,0(src)
+	addi	src,src,8
+	std     6,0(dst)
+	addi	dst,dst,8
+4:	/* Copies 4~7 bytes.  */
+	bf	29,L(tail2)
+	lwz	6,0(src)
+	stw     6,0(dst)
+	bf      30,L(tail5)
+	lhz     7,4(src)
+	sth     7,4(dst)
+	bflr	31
+	lbz     8,6(src)
+	stb     8,6(dst)
+	/* Return original DST pointer.  */
 	blr
 
-	/* Handle copies of 0~31 bytes.  */
-	.align  4
+
+/* Handle copies of 0~31 bytes.  */
+	.align	4
 L(copy_LT_32):
-	cmpldi  cr6,5,8
-	mr	12,4
-	mtcrf   0x01,5
+	mr	dst,3
+	cmpldi	cr6,cnt,8
+	mtocrf	0x01,cnt
 	ble	cr6,L(copy_LE_8)
 
 	/* At least 9 bytes to go.  */
 	neg	8,4
-	clrrdi  11,4,2
-	andi.   0,8,3
-	cmpldi  cr1,5,16
-	mr	10,5
+	andi.	0,8,3
+	cmpldi	cr1,cnt,16
 	beq	L(copy_LT_32_aligned)
 
-	/* Force 4-bytes alignment for SRC.  */
-	mtocrf  0x01,0
-	subf    10,0,5
-2:	bf	30,1f
-
-	lhz	6,0(12)
-	addi    12,12,2
-	sth	6,0(3)
-	addi    3,3,2
-1:	bf	31,L(end_4bytes_alignment)
-
-	lbz	6,0(12)
-	addi    12,12,1
-	stb	6,0(3)
-	addi    3,3,1
-
-	.align  4
+	/* Force 4-byte alignment for SRC.  */
+	mtocrf	0x01,0
+	subf	cnt,0,cnt
+2:
+	bf	30,1f
+	lhz	6,0(src)
+	addi	src,src,2
+	sth	6,0(dst)
+	addi	dst,dst,2
+1:
+	bf	31,L(end_4bytes_alignment)
+	lbz	6,0(src)
+	addi	src,src,1
+	stb	6,0(dst)
+	addi	dst,dst,1
+
+	.align	4
 L(end_4bytes_alignment):
-	cmpldi  cr1,10,16
-	mtcrf   0x01,10
+	cmpldi	cr1,cnt,16
+	mtocrf	0x01,cnt
 
 L(copy_LT_32_aligned):
 	/* At least 6 bytes to go, and SRC is word-aligned.  */
 	blt	cr1,8f
 
 	/* Copy 16 bytes.  */
-	lwz	6,0(12)
-	lwz     7,4(12)
-	stw     6,0(3)
-	lwz     8,8(12)
-	stw     7,4(3)
-	lwz     6,12(12)
-	addi    12,12,16
-	stw     8,8(3)
-	stw     6,12(3)
-	addi    3,3,16
+	lwz	6,0(src)
+	lwz	7,4(src)
+	stw	6,0(dst)
+	lwz	8,8(src)
+	stw	7,4(dst)
+	lwz	6,12(src)
+	addi	src,src,16
+	stw	8,8(dst)
+	stw	6,12(dst)
+	addi	dst,dst,16
 8:	/* Copy 8 bytes.  */
-	bf	28,4f
+	bf	28,L(tail4)
+	lwz	6,0(src)
+	lwz	7,4(src)
+	addi	src,src,8
+	stw	6,0(dst)
+	stw	7,4(dst)
+	addi	dst,dst,8
+
+	.align	4
+/* Copies 4~7 bytes.  */
+L(tail4):
+	bf	29,L(tail2)
+	lwz	6,0(src)
+	stw	6,0(dst)
+	bf	30,L(tail5)
+	lhz	7,4(src)
+	sth	7,4(dst)
+	bflr	31
+	lbz	8,6(src)
+	stb	8,6(dst)
+	/* Return original DST pointer.  */
+	blr
 
-	lwz     6,0(12)
-	lwz     7,4(12)
-	addi    12,12,8
-	stw     6,0(3)
-	stw     7,4(3)
-	addi    3,3,8
-4:	/* Copy 4 bytes.  */
-	bf	29,2f
-
-	lwz     6,0(12)
-	addi    12,12,4
-	stw     6,0(3)
-	addi    3,3,4
-2:	/* Copy 2-3 bytes.  */
+	.align	4
+/* Copies 2~3 bytes.  */
+L(tail2):
 	bf	30,1f
-
-	lhz     6,0(12)
-	sth     6,0(3)
-	bf      31,0f
-	lbz     7,2(12)
-	stb     7,2(3)
-	ld	3,-16(1)
+	lhz	6,0(src)
+	sth	6,0(dst)
+	bflr	31
+	lbz	7,2(src)
+	stb	7,2(dst)
 	blr
 
-	.align  4
-1:	/* Copy 1 byte.  */
-	bf	31,0f
+	.align	4
+L(tail5):
+	bflr	31
+	lbz	6,4(src)
+	stb	6,4(dst)
+	blr
 
-	lbz	6,0(12)
-	stb	6,0(3)
-0:	/* Return original DST pointer.  */
-	ld	3,-16(1)
+	.align	4
+1:
+	bflr	31
+	lbz	6,0(src)
+	stb	6,0(dst)
+	/* Return original DST pointer.  */
 	blr
 
-	/* Handles copies of 0~8 bytes.  */
-	.align  4
+
+/* Handles copies of 0~8 bytes.  */
+	.align	4
 L(copy_LE_8):
-	bne	cr6,4f
+	bne	cr6,L(tail4)
 
 	/* Though we could've used ld/std here, they are still
 	slow for unaligned cases.  */
 
-	lwz	6,0(4)
-	lwz     7,4(4)
-	stw     6,0(3)
-	stw     7,4(3)
-	ld      3,-16(1)      /* Return original DST pointers.  */
+	lwz	6,0(src)
+	lwz	7,4(src)
+	stw	6,0(dst)
+	stw	7,4(dst)
 	blr
 
-	.align  4
-4:	/* Copies 4~7 bytes.  */
-	bf	29,2b
-
-	lwz	6,0(4)
-	stw     6,0(3)
-	bf      30,5f
-	lhz     7,4(4)
-	sth     7,4(3)
-	bf      31,0f
-	lbz     8,6(4)
-	stb     8,6(3)
-	ld	3,-16(1)
-	blr
-
-	.align  4
-5:	/* Copy 1 byte.  */
-	bf	31,0f
-
-	lbz	6,4(4)
-	stb	6,4(3)
-
-0:	/* Return original DST pointer.  */
-	ld	3,-16(1)
-	blr
 
-	/* Handle copies of 32+ bytes where DST is aligned (to quadword) but
-	SRC is not.  Use aligned quadword loads from SRC, shifted to realign
-	the data, allowing for aligned DST stores.  */
-	.align  4
+/* Handle copies of 32+ bytes where DST is aligned (to quadword) but
+   SRC is not.	Use aligned quadword loads from SRC, shifted to realign
+   the data, allowing for aligned DST stores.  */
+	.align	4
 L(copy_GE_32_unaligned):
-	clrldi  0,0,60	      /* Number of bytes until the 1st
-			      quadword.  */
-	andi.   11,3,15       /* Check alignment of DST (against
-			      quadwords).  */
-	srdi    9,5,4	      /* Number of full quadwords remaining.  */
+	clrldi	0,0,60	      /* Number of bytes until the 1st dst quadword.  */
+#ifndef __LITTLE_ENDIAN__
+	andi.	10,3,15	      /* Check alignment of DST (against quadwords).  */
+#endif
+	srdi	9,cnt,4	      /* Number of full quadwords remaining.  */
 
 	beq	L(copy_GE_32_unaligned_cont)
 
-	/* SRC is not quadword aligned, get it aligned.  */
+	/* DST is not quadword aligned, get it aligned.  */
 
-	mtcrf   0x01,0
-	subf    31,0,5
+	mtocrf	0x01,0
+	subf	cnt,0,cnt
 
 	/* Vector instructions work best when proper alignment (16-bytes)
 	is present.  Move 0~15 bytes as needed to get DST quadword-aligned.  */
-1:	/* Copy 1 byte.  */
+1:
 	bf	31,2f
-
-	lbz	6,0(12)
-	addi    12,12,1
-	stb	6,0(3)
-	addi    3,3,1
-2:	/* Copy 2 bytes.  */
+	lbz	6,0(src)
+	addi	src,src,1
+	stb	6,0(dst)
+	addi	dst,dst,1
+2:
 	bf	30,4f
-
-	lhz     6,0(12)
-	addi    12,12,2
-	sth     6,0(3)
-	addi    3,3,2
-4:	/* Copy 4 bytes.  */
+	lhz	6,0(src)
+	addi	src,src,2
+	sth	6,0(dst)
+	addi	dst,dst,2
+4:
 	bf	29,8f
-
-	lwz     6,0(12)
-	addi    12,12,4
-	stw     6,0(3)
-	addi    3,3,4
-8:	/* Copy 8 bytes.  */
+	lwz	6,0(src)
+	addi	src,src,4
+	stw	6,0(dst)
+	addi	dst,dst,4
+8:
 	bf	28,0f
-
-	ld	6,0(12)
-	addi    12,12,8
-	std	6,0(3)
-	addi    3,3,8
+	ld	6,0(src)
+	addi	src,src,8
+	std	6,0(dst)
+	addi	dst,dst,8
 0:
-	clrldi  10,12,60      /* Check alignment of SRC.  */
-	srdi    9,31,4	      /* Number of full quadwords remaining.  */
+	srdi	9,cnt,4	      /* Number of full quadwords remaining.  */
 
 	/* The proper alignment is present, it is OK to copy the bytes now.  */
 L(copy_GE_32_unaligned_cont):
 
 	/* Setup two indexes to speed up the indexed vector operations.  */
-	clrldi  11,31,60
-	li      6,16	      /* Index for 16-bytes offsets.  */
+	clrldi	10,cnt,60
+	li	6,16	      /* Index for 16-bytes offsets.  */
 	li	7,32	      /* Index for 32-bytes offsets.  */
-	cmpldi  cr1,11,0
-	srdi    8,31,5	      /* Setup the loop counter.  */
-	mr      10,3
-	mr      11,12
-	mtcrf   0x01,9
-	cmpldi  cr6,9,1
-	lvsl    5,0,12
-	lvx     3,0,12
-	bf      31,L(setup_unaligned_loop)
-
-	/* Copy another 16 bytes to align to 32-bytes due to the loop .  */
-	lvx     4,12,6
-	vperm   6,3,4,5
-	addi    11,12,16
-	addi    10,3,16
-	stvx    6,0,3
+	cmpldi	cr1,10,0
+	srdi	8,cnt,5	      /* Setup the loop counter.  */
+	mtocrf	0x01,9
+	cmpldi	cr6,9,1
+#ifdef __LITTLE_ENDIAN__
+	lvsr	5,0,src
+#else
+	lvsl	5,0,src
+#endif
+	lvx	3,0,src
+	li	0,0
+	bf	31,L(setup_unaligned_loop)
+
+	/* Copy another 16 bytes to align to 32-bytes due to the loop.  */
+	lvx	4,src,6
+#ifdef __LITTLE_ENDIAN__
+	vperm	6,4,3,5
+#else
+	vperm	6,3,4,5
+#endif
+	addi	src,src,16
+	stvx	6,0,dst
+	addi	dst,dst,16
 	vor	3,4,4
+	clrrdi	0,src,60
 
 L(setup_unaligned_loop):
-	mtctr   8
-	ble     cr6,L(end_unaligned_loop)
+	mtctr	8
+	ble	cr6,L(end_unaligned_loop)
 
 	/* Copy 32 bytes at a time using vector instructions.  */
-	.align  4
+	.align	4
 L(unaligned_loop):
 
 	/* Note: vr6/vr10 may contain data that was already copied,
@@ -442,62 +385,55 @@ L(unaligned_loop):
 	some portions again. This is faster than having unaligned
 	vector instructions though.  */
 
-	lvx	4,11,6	      /* vr4 = r11+16.  */
-	vperm   6,3,4,5	      /* Merge the correctly-aligned portions
-			      of vr3/vr4 into vr6.  */
-	lvx	3,11,7	      /* vr3 = r11+32.  */
-	vperm   10,4,3,5      /* Merge the correctly-aligned portions
-			      of vr3/vr4 into vr10.  */
-	addi    11,11,32
-	stvx    6,0,10
-	stvx    10,10,6
-	addi    10,10,32
-
+	lvx	4,src,6
+#ifdef __LITTLE_ENDIAN__
+	vperm	6,4,3,5
+#else
+	vperm	6,3,4,5
+#endif
+	lvx	3,src,7
+#ifdef __LITTLE_ENDIAN__
+	vperm	10,3,4,5
+#else
+	vperm	10,4,3,5
+#endif
+	addi	src,src,32
+	stvx	6,0,dst
+	stvx	10,dst,6
+	addi	dst,dst,32
 	bdnz	L(unaligned_loop)
 
-	.align  4
+	clrrdi	0,src,60
+
+	.align	4
 L(end_unaligned_loop):
 
 	/* Check for tail bytes.  */
-	rldicr  0,31,0,59
-	mtcrf   0x01,31
-	beq	cr1,0f
+	mtocrf	0x01,cnt
+	beqlr	cr1
 
-	add	3,3,0
-	add	12,12,0
+	add	src,src,0
 
 	/*  We have 1~15 tail bytes to copy, and DST is quadword aligned.  */
-8:	/* Copy 8 bytes.  */
+	/* Copy 8 bytes.  */
 	bf	28,4f
-
-	lwz	6,0(12)
-	lwz	7,4(12)
-	addi    12,12,8
-	stw	6,0(3)
-	stw	7,4(3)
-	addi    3,3,8
-4:	/* Copy 4 bytes.  */
-	bf	29,2f
-
-	lwz	6,0(12)
-	addi    12,12,4
-	stw	6,0(3)
-	addi    3,3,4
-2:	/* Copy 2~3 bytes.  */
-	bf	30,1f
-
-	lhz	6,0(12)
-	addi    12,12,2
-	sth	6,0(3)
-	addi    3,3,2
-1:	/* Copy 1 byte.  */
-	bf	31,0f
-
-	lbz	6,0(12)
-	stb	6,0(3)
-0:	/* Return original DST pointer.  */
-	ld	31,-8(1)
-	ld	3,-16(1)
+	lwz	6,0(src)
+	lwz	7,4(src)
+	addi	src,src,8
+	stw	6,0(dst)
+	stw	7,4(dst)
+	addi	dst,dst,8
+4:	/* Copy 4~7 bytes.  */
+	bf	29,L(tail2)
+	lwz	6,0(src)
+	stw	6,0(dst)
+	bf	30,L(tail5)
+	lhz	7,4(src)
+	sth	7,4(dst)
+	bflr	31
+	lbz	8,6(src)
+	stb	8,6(dst)
+	/* Return original DST pointer.  */
 	blr
 
 END_GEN_TB (memcpy,TB_TOCLESS)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S b/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S
index f20be938d..b93ab7da5 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S
@@ -365,13 +365,21 @@ L(copy_GE_32_unaligned_cont):
 	mr	11,12
 	mtcrf	0x01,9
 	cmpldi	cr6,9,1
-	lvsl	5,0,12
+#ifdef __LITTLE_ENDIAN__
+	lvsr    5,0,12
+#else
+	lvsl    5,0,12
+#endif
 	lvx	3,0,12
 	bf	31,L(setup_unaligned_loop)
 
 	/* Copy another 16 bytes to align to 32-bytes due to the loop .  */
 	lvx	4,12,6
-	vperm	6,3,4,5
+#ifdef __LITTLE_ENDIAN__
+	vperm   6,4,3,5
+#else
+	vperm   6,3,4,5
+#endif
 	addi	11,12,16
 	addi	10,3,16
 	stvx	6,0,3
@@ -391,11 +399,17 @@ L(unaligned_loop):
 	vector instructions though.  */
 
 	lvx	4,11,6	      /* vr4 = r11+16.  */
-	vperm	6,3,4,5	      /* Merge the correctly-aligned portions
-				 of vr3/vr4 into vr6.  */
+#ifdef __LITTLE_ENDIAN__
+	vperm   6,4,3,5
+#else
+	vperm   6,3,4,5
+#endif
 	lvx	3,11,7	      /* vr3 = r11+32.  */
-	vperm	10,4,3,5      /* Merge the correctly-aligned portions
-				 of vr3/vr4 into vr10.  */
+#ifdef __LITTLE_ENDIAN__
+	vperm   10,3,4,5
+#else
+	vperm   10,4,3,5
+#endif
 	addi	11,11,32
 	stvx	6,0,10
 	stvx	10,10,6
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S b/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S
index c49995210..a9e86cb19 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S
@@ -23,118 +23,132 @@
 	.machine  power7
 ENTRY (__memrchr)
 	CALL_MCOUNT
-	dcbt	0,r3
-	mr	r7,r3
-	add	r3,r7,r5      /* Calculate the last acceptable address.  */
-	cmpld	cr7,r3,r7     /* Is the address equal or less than r3?  */
+	add	r7,r3,r5      /* Calculate the last acceptable address.  */
+	neg	r0,r7
+	addi	r7,r7,-1
+	mr	r10,r3
+	clrrdi	r6,r7,7
+	li	r9,3<<5
+	dcbt	r9,r6,16      /* Stream hint, decreasing addresses.  */
 
 	/* Replicate BYTE to doubleword.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
 	insrdi  r4,r4,32,0
-	bge	cr7,L(proceed)
-
-	li	r3,-1	      /* Make r11 the biggest if r4 <= 0.  */
-L(proceed):
 	li	r6,-8
-	addi	r9,r3,-1
-	clrrdi  r8,r9,3
-	addi	r8,r8,8
-	neg	r0,r3
+	li	r9,-1
 	rlwinm	r0,r0,3,26,28 /* Calculate padding.  */
-
+	clrrdi	r8,r7,3
+	srd	r9,r9,r0
 	cmpldi	r5,32
+	clrrdi	r0,r10,3
 	ble	L(small_range)
 
-	ldbrx	r12,r8,r6     /* Load reversed doubleword from memory.  */
-	cmpb	r10,r12,r4    /* Check for BYTE in DWORD1.  */
-	sld	r10,r10,r0
-	srd	r10,r10,r0
-	cmpldi	cr7,r10,0     /* If r10 == 0, no BYTEs have been found.  */
+#ifdef __LITTLE_ENDIAN__
+	ldx	r12,0,r8
+#else
+	ldbrx	r12,0,r8      /* Load reversed doubleword from memory.  */
+#endif
+	cmpb	r3,r12,r4     /* Check for BYTE in DWORD1.  */
+	and	r3,r3,r9
+	cmpldi	cr7,r3,0      /* If r3 == 0, no BYTEs have been found.  */
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,-8
-	cmpld	cr6,r9,r7
-	ble	cr6,L(null)
-
 	mtcrf   0x01,r8
-	/* Are we now aligned to a doubleword boundary?  If so, skip to
+	/* Are we now aligned to a quadword boundary?  If so, skip to
 	   the main loop.  Otherwise, go through the alignment code.  */
-	mr	r8,r9
-	bt	28,L(loop_setup)
+	bf	28,L(loop_setup)
 
 	/* Handle DWORD2 of pair.  */
+#ifdef __LITTLE_ENDIAN__
+	ldx	r12,r8,r6
+#else
 	ldbrx	r12,r8,r6
-	cmpb	r10,r12,r4
-	cmpldi	cr7,r10,0
-	bne	cr7,L(done)
-
-	/* Are we done already.  */
+#endif
 	addi	r8,r8,-8
-	cmpld	cr6,r8,r7
-	ble	cr6,L(null)
+	cmpb	r3,r12,r4
+	cmpldi	cr7,r3,0
+	bne	cr7,L(done)
 
 L(loop_setup):
-	li	r0,-16
-	sub	r5,r8,r7
-	srdi	r9,r5,4	      /* Number of loop iterations.  */
+	/* The last dword we want to read in the loop below is the one
+	   containing the first byte of the string, ie. the dword at
+	   s & ~7, or r0.  The first dword read is at r8 - 8, we
+	   read 2 * cnt dwords, so the last dword read will be at
+	   r8 - 8 - 16 * cnt + 8.  Solving for cnt gives
+	   cnt = (r8 - r0) / 16  */
+	sub	r5,r8,r0
+	addi	r8,r8,-8
+	srdi	r9,r5,4       /* Number of loop iterations.  */
 	mtctr	r9	      /* Setup the counter.  */
-	b	L(loop)
-	/* Main loop to look for BYTE backwards in the string.  Since it's a
-	   small loop (< 8 instructions), align it to 32-bytes.  */
-	.p2align  5
+
+	/* Main loop to look for BYTE backwards in the string.
+	   FIXME: Investigate whether 32 byte align helps with this
+	   9 instruction loop.  */
+	.align	5
 L(loop):
 	/* Load two doublewords, compare and merge in a
 	   single register for speed.  This is an attempt
 	   to speed up the byte-checking process for bigger strings.  */
 
-	ldbrx	r12,r8,r6
-	ldbrx	r11,r8,r0
-	addi	r8,r8,-8
-	cmpb	r10,r12,r4
+#ifdef __LITTLE_ENDIAN__
+	ldx	r12,0,r8
+	ldx	r11,r8,r6
+#else
+	ldbrx	r12,0,r8
+	ldbrx	r11,r8,r6
+#endif
+	cmpb	r3,r12,r4
 	cmpb	r9,r11,r4
-	or	r5,r9,r10     /* Merge everything in one doubleword.  */
+	or	r5,r9,r3      /* Merge everything in one doubleword.  */
 	cmpldi	cr7,r5,0
 	bne	cr7,L(found)
-	addi	r8,r8,-8
+	addi	r8,r8,-16
 	bdnz	L(loop)
-	/* We're here because the counter reached 0, and that means we
-	   didn't have any matches for BYTE in the whole range.  Just return
-	   the original range.  */
-	addi	r8,r8,8
-	cmpld	cr6,r8,r7
-	bgt	cr6,L(loop_small)
-	b	L(null)
-
-	/* OK, one (or both) of the words contains BYTE.  Check
-	   the first word and decrement the address in case the first
-	   word really contains BYTE.  */
+
+	/* We may have one more word to read.  */
+	cmpld	r8,r0
+	bnelr
+
+#ifdef __LITTLE_ENDIAN__
+	ldx	r12,0,r8
+#else
+	ldbrx	r12,0,r8
+#endif
+	cmpb	r3,r12,r4
+	cmpldi	cr7,r3,0
+	bne	cr7,L(done)
+	blr
+
 	.align	4
 L(found):
-	cmpldi	cr6,r10,0
-	addi	r8,r8,8
+	/* OK, one (or both) of the dwords contains BYTE.  Check
+	   the first dword.  */
+	cmpldi	cr6,r3,0
 	bne	cr6,L(done)
 
 	/* BYTE must be in the second word.  Adjust the address
-	   again and move the result of cmpb to r10 so we can calculate the
+	   again and move the result of cmpb to r3 so we can calculate the
 	   pointer.  */
 
-	mr	r10,r9
+	mr	r3,r9
 	addi	r8,r8,-8
 
-	/* r10 has the output of the cmpb instruction, that is, it contains
-	   0xff in the same position as the BYTE in the original
+	/* r3 has the output of the cmpb instruction, that is, it contains
+	   0xff in the same position as BYTE in the original
 	   word from the string.  Use that to calculate the pointer.
 	   We need to make sure BYTE is *before* the end of the
 	   range.  */
 L(done):
-	cntlzd	r0,r10	      /* Count leading zeroes before the match.  */
-	srdi	r6,r0,3	      /* Convert leading zeroes to bytes.  */
-	addi	r0,r6,1
+	cntlzd	r9,r3	      /* Count leading zeros before the match.  */
+	cmpld	r8,r0         /* Are we on the last word?  */
+	srdi	r6,r9,3	      /* Convert leading zeros to bytes.  */
+	addi	r0,r6,-7
 	sub	r3,r8,r0
-	cmpld	r3,r7
-	blt	L(null)
+	cmpld	cr7,r3,r10
+	bnelr
+	bgelr	cr7
+	li	r3,0
 	blr
 
 	.align	4
@@ -148,29 +162,35 @@ L(small_range):
 	cmpldi	r5,0
 	beq	L(null)
 
-	ldbrx	r12,r8,r6     /* Load reversed doubleword from memory.  */
-	cmpb	r10,r12,r4    /* Check for BYTE in DWORD1.  */
-	sld	r10,r10,r0
-	srd	r10,r10,r0
-	cmpldi	cr7,r10,0
+#ifdef __LITTLE_ENDIAN__
+	ldx	r12,0,r8
+#else
+	ldbrx	r12,0,r8      /* Load reversed doubleword from memory.  */
+#endif
+	cmpb	r3,r12,r4     /* Check for BYTE in DWORD1.  */
+	and	r3,r3,r9
+	cmpldi	cr7,r3,0
 	bne	cr7,L(done)
 
 	/* Are we done already?  */
+	cmpld	r8,r0
 	addi	r8,r8,-8
-	cmpld	r8,r7
-	ble	L(null)
-	b	L(loop_small)
+	beqlr
 
-	.p2align  5
+	.align	5
 L(loop_small):
-	ldbrx	r12,r8,r6
-	cmpb	r10,r12,r4
-	cmpldi	cr6,r10,0
-	bne	cr6,L(done)
+#ifdef __LITTLE_ENDIAN__
+	ldx	r12,0,r8
+#else
+	ldbrx	r12,0,r8
+#endif
+	cmpb	r3,r12,r4
+	cmpld	r8,r0
+	cmpldi	cr7,r3,0
+	bne	cr7,L(done)
 	addi	r8,r8,-8
-	cmpld	r8,r7
-	ble	L(null)
-	b	L(loop_small)
+	bne	L(loop_small)
+	blr
 
 END (__memrchr)
 weak_alias (__memrchr, memrchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memset.S b/libc/sysdeps/powerpc/powerpc64/power7/memset.S
index b24cfa163..8b081e87c 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memset.S
@@ -32,8 +32,8 @@ L(_memset):
 	mr	10,3
 
 	/* Replicate byte to word.  */
-	rlwimi	4,4,8,16,23
-	rlwimi	4,4,16,0,15
+	insrdi	4,4,8,48
+	insrdi	4,4,16,32
 	ble	cr6,L(small)	/* If length <= 8, use short copy code.  */
 
 	neg	0,3
@@ -321,7 +321,7 @@ L(medium):
 	clrldi	0,0,62
 	beq	L(medium_aligned)
 
-	/* Force 4-bytes alignment for SRC.  */
+	/* Force 4-bytes alignment for DST.  */
 	mtocrf	0x01,0
 	subf	5,0,5
 1:	/* Copy 1 byte.  */
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S b/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S
index 50a33d8fa..547aed771 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S
@@ -27,8 +27,8 @@ ENTRY (__rawmemchr)
 	clrrdi	r8,r3,3	      /* Align the address to doubleword boundary.  */
 
 	/* Replicate byte to doubleword.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
 	insrdi	r4,r4,32,0
 
 	/* Now r4 has a doubleword of c bytes.  */
@@ -36,8 +36,13 @@ ENTRY (__rawmemchr)
 	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
 	ld	r12,0(r8)     /* Load doubleword from memory.  */
 	cmpb	r5,r12,r4     /* Compare each byte against c byte.  */
+#ifdef __LITTLE_ENDIAN__
+	srd	r5,r5,r6
+	sld	r5,r5,r6
+#else
 	sld	r5,r5,r6      /* Move left to discard ignored bits.  */
 	srd	r5,r5,r6      /* Bring the bits back as zeros.  */
+#endif
 	cmpdi	cr7,r5,0      /* If r5 == 0, no c bytes have been found.  */
 	bne	cr7,L(done)
 
@@ -91,8 +96,14 @@ L(loop):
 	   doubleword from the string.  Use that fact to find out what is
 	   the position of the byte inside the string.  */
 L(done):
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r5,-1
+	andc    r0,r0,r5
+	popcntd	r0,r0	      /* Count trailing zeros.  */
+#else
 	cntlzd	r0,r5	      /* Count leading zeros before the match.  */
-	srdi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
+#endif
+	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching char.  */
 	blr
 END (__rawmemchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strchr.S b/libc/sysdeps/powerpc/powerpc64/power7/strchr.S
index 3ffe7a188..4679a158f 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strchr.S
@@ -35,8 +35,8 @@ ENTRY (strchr)
 	beq	cr7,L(null_match)
 
 	/* Replicate byte to doubleword.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
 	insrdi  r4,r4,32,0
 
 	/* Now r4 has a doubleword of c bytes and r0 has
@@ -47,11 +47,17 @@ ENTRY (strchr)
 
 	/* Move the doublewords left and right to discard the bits that are
 	   not part of the string and bring them back as zeros.  */
-
+#ifdef __LITTLE_ENDIAN__
+	srd	r10,r10,r6
+	srd	r11,r11,r6
+	sld	r10,r10,r6
+	sld	r11,r11,r6
+#else
 	sld	r10,r10,r6
 	sld	r11,r11,r6
 	srd	r10,r10,r6
 	srd	r11,r11,r6
+#endif
 	or	r5,r10,r11    /* OR the results to speed things up.  */
 	cmpdi	cr7,r5,0      /* If r5 == 0, no c or null bytes
 				 have been found.  */
@@ -108,15 +114,24 @@ L(loop):
 	mr	r11,r7
 	addi	r8,r8,8
 
-	/* r5 has the output of the cmpb instruction, that is, it contains
+	/* r10/r11 have the output of the cmpb instructions, that is,
 	   0xff in the same position as the c/null byte in the original
 	   doubleword from the string.  Use that to calculate the pointer.  */
 L(done):
-	cntlzd	r4,r10	      /* Count leading zeroes before c matches.  */
-	cntlzd	r0,r11	      /* Count leading zeroes before null matches.  */
-	cmpld	cr7,r4,r0
+#ifdef __LITTLE_ENDIAN__
+	addi    r3,r10,-1
+	andc    r3,r3,r10
+	popcntd	r0,r3
+	addi    r4,r11,-1
+	andc    r4,r4,r11
+	cmpld	cr7,r3,r4
 	bgt	cr7,L(no_match)
-	srdi	r0,r4,3	      /* Convert leading zeroes to bytes.  */
+#else
+	cntlzd	r0,r10	      /* Count leading zeros before c matches.  */
+	cmpld	cr7,r11,r10
+	bgt	cr7,L(no_match)
+#endif
+	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching c byte
 				 or null in case c was not found.  */
 	blr
@@ -135,9 +150,13 @@ L(null_match):
 
 	/* Move the doublewords left and right to discard the bits that are
 	   not part of the string and bring them back as zeros.  */
-
+#ifdef __LITTLE_ENDIAN__
+	srd	r5,r5,r6
+	sld	r5,r5,r6
+#else
 	sld	r5,r5,r6
 	srd	r5,r5,r6
+#endif
 	cmpdi	cr7,r5,0      /* If r10 == 0, no c or null bytes
 				 have been found.  */
 	bne	cr7,L(done_null)
@@ -192,7 +211,13 @@ L(loop_null):
 	   0xff in the same position as the null byte in the original
 	   doubleword from the string.  Use that to calculate the pointer.  */
 L(done_null):
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r5,-1
+	andc    r0,r0,r5
+	popcntd	r0,r0
+#else
 	cntlzd	r0,r5	      /* Count leading zeros before the match.  */
+#endif
 	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching null byte.  */
 	blr
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S b/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S
index 9dbc51b0d..df457525e 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S
@@ -27,8 +27,8 @@ ENTRY (__strchrnul)
 	clrrdi	r8,r3,3	      /* Align the address to doubleword boundary.  */
 
 	/* Replicate byte to doubleword.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
 	insrdi	r4,r4,32,0
 
 	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
@@ -44,10 +44,17 @@ ENTRY (__strchrnul)
 
 	/* Move the doublewords left and right to discard the bits that are
 	   not part of the string and to bring them back as zeros.  */
+#ifdef __LITTLE_ENDIAN__
+	srd	r10,r10,r6
+	srd	r9,r9,r6
+	sld	r10,r10,r6
+	sld	r9,r9,r6
+#else
 	sld	r10,r10,r6
 	sld	r9,r9,r6
 	srd	r10,r10,r6
 	srd	r9,r9,r6
+#endif
 	or	r5,r9,r10     /* OR the results to speed things up.  */
 	cmpdi	cr7,r5,0      /* If r5 == 0, no c or null bytes
 				 have been found.  */
@@ -97,7 +104,7 @@ L(loop):
 	bne	cr6,L(done)
 
 	/* The c/null byte must be in the second doubleword.  Adjust the
-	   address again and move the result of cmpb to r10 so we can calculate
+	   address again and move the result of cmpb to r5 so we can calculate
 	   the pointer.  */
 	mr	r5,r10
 	addi	r8,r8,8
@@ -106,7 +113,13 @@ L(loop):
 	   0xff in the same position as the c/null byte in the original
 	   doubleword from the string.  Use that to calculate the pointer.  */
 L(done):
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r5,-1
+	andc    r0,r0,r5
+	popcntd	r0,r0
+#else
 	cntlzd	r0,r5	      /* Count leading zeros before the match.  */
+#endif
 	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of matching c/null byte.  */
 	blr
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strlen.S b/libc/sysdeps/powerpc/powerpc64/power7/strlen.S
index 343216952..807ef1082 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strlen.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strlen.S
@@ -30,7 +30,11 @@ ENTRY (strlen)
 				 with cmpb.  */
 	li	r5,-1	      /* MASK = 0xffffffffffffffff.  */
 	ld	r12,0(r4)     /* Load doubleword from memory.  */
+#ifdef __LITTLE_ENDIAN__
+	sld	r5,r5,r6
+#else
 	srd	r5,r5,r6      /* MASK = MASK >> padding.  */
+#endif
 	orc	r9,r12,r5     /* Mask bits that are not part of the string.  */
 	cmpb	r10,r9,r0     /* Check for null bytes in DWORD1.  */
 	cmpdi	cr7,r10,0     /* If r10 == 0, no null's have been found.  */
@@ -48,9 +52,6 @@ ENTRY (strlen)
 	cmpb	r10,r12,r0
 	cmpdi	cr7,r10,0
 	bne	cr7,L(done)
-	b	L(loop)	      /* We branch here (rather than falling through)
-				 to skip the nops due to heavy alignment
-				 of the loop below.  */
 
 	/* Main loop to look for the end of the string.  Since it's a
 	   small loop (< 8 instructions), align it to 32-bytes.  */
@@ -87,9 +88,15 @@ L(loop):
 	   0xff in the same position as the null byte in the original
 	   doubleword from the string.  Use that to calculate the length.  */
 L(done):
-	cntlzd	r0,r10	      /* Count leading zeroes before the match.  */
+#ifdef __LITTLE_ENDIAN__
+	addi	r9, r10, -1   /* Form a mask from trailing zeros.  */
+	andc	r9, r9, r10
+	popcntd r0, r9	      /* Count the bits in the mask.  */
+#else
+	cntlzd	r0,r10	      /* Count leading zeros before the match.  */
+#endif
 	subf	r5,r3,r4
-	srdi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
+	srdi	r0,r0,3	      /* Convert leading/trailing zeros to bytes.  */
 	add	r3,r5,r0      /* Compute final length.  */
 	blr
 END (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S b/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S
index 77ecad5ab..e618b010b 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S
@@ -27,7 +27,7 @@
 EALIGN (strncmp,5,0)
 	CALL_MCOUNT 3
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -40,6 +40,7 @@ EALIGN (strncmp,5,0)
 #define r7F7F	r9	/* constant 0x7f7f7f7f7f7f7f7f */
 #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
 #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
+#define rTMP	r12
 
 	dcbt	0,rSTR1
 	nop
@@ -83,12 +84,57 @@ L(g1):	add	rTMP,rFEFE,rWORD1
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
 
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	addi    rTMP2, rTMP, -1
+	beq	cr1, L(equal)
+	andc    rTMP2, rTMP2, rTMP
+	rldimi	rTMP2, rTMP2, 1, 0
+	and	rWORD2, rWORD2, rTMP2	/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	cmpd	cr1, rWORD1, rWORD2
+	beq	cr1, L(equal)
+	cmpb	rBITDIF, rWORD1, rWORD2	/* 0xff on equal bytes.  */
+	addi	rNEG, rBITDIF, 1
+	orc	rNEG, rNEG, rBITDIF	/* 0's below LS differing byte.  */
+	sldi	rNEG, rNEG, 8		/* 1's above LS differing byte.  */
+	andc	rWORD1, rWORD1, rNEG	/* mask off MS bytes.  */
+	andc	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt	L(highbit)
+	sradi	rRTN, rRTN, 63		/* must return an int.  */
+	ori	rRTN, rRTN, 1
+	blr
+L(equal):
+	li	rRTN, 0
+	blr
+
+L(different):
+	ld	rWORD1, -8(rSTR1)
+	cmpb	rBITDIF, rWORD1, rWORD2	/* 0xff on equal bytes.  */
+	addi	rNEG, rBITDIF, 1
+	orc	rNEG, rNEG, rBITDIF	/* 0's below LS differing byte.  */
+	sldi	rNEG, rNEG, 8		/* 1's above LS differing byte.  */
+	andc	rWORD1, rWORD1, rNEG	/* mask off MS bytes.  */
+	andc	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt	L(highbit)
+	sradi	rRTN, rRTN, 63
+	ori	rRTN, rRTN, 1
+	blr
+L(highbit):
+	sradi	rRTN, rWORD2, 63
+	ori	rRTN, rRTN, 1
+	blr
+
+#else
 L(endstring):
 	and	rTMP,r7F7F,rWORD1
 	beq	cr1,L(equal)
 	add	rTMP,rTMP,r7F7F
 	xor.	rBITDIF,rWORD1,rWORD2
-
 	andc	rNEG,rNEG,rTMP
 	blt	L(highbit)
 	cntlzd	rBITDIF,rBITDIF
@@ -97,7 +143,7 @@ L(endstring):
 	cmpd	cr1,rNEG,rBITDIF
 	sub	rRTN,rWORD1,rWORD2
 	blt	cr1,L(equal)
-	sradi	rRTN,rRTN,63
+	sradi	rRTN,rRTN,63		/* must return an int.  */
 	ori	rRTN,rRTN,1
 	blr
 L(equal):
@@ -105,7 +151,7 @@ L(equal):
 	blr
 
 L(different):
-	ldu	rWORD1,-8(rSTR1)
+	ld	rWORD1,-8(rSTR1)
 	xor.	rBITDIF,rWORD1,rWORD2
 	sub	rRTN,rWORD1,rWORD2
 	blt	L(highbit)
@@ -113,11 +159,10 @@ L(different):
 	ori	rRTN,rRTN,1
 	blr
 L(highbit):
-	srdi	rWORD2,rWORD2,56
-	srdi	rWORD1,rWORD1,56
-	sub	rRTN,rWORD1,rWORD2
+	sradi	rRTN,rWORD2,63
+	ori	rRTN,rRTN,1
 	blr
-
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
 	.align	4
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S b/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S
index 37c7dbfe8..51591069d 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S
@@ -24,33 +24,29 @@
 ENTRY (__strnlen)
 	CALL_MCOUNT 2
 	dcbt	0,r3
-	clrrdi  r8,r3,3
+	clrrdi	r8,r3,3
 	add	r7,r3,r4      /* Calculate the last acceptable address.  */
 	cmpldi	r4,32
 	li	r0,0	      /* Doubleword with null chars.  */
+	addi	r7,r7,-1
+
 	/* If we have less than 33 bytes to search, skip to a faster code.  */
 	ble	L(small_range)
 
-	cmpld	cr7,r3,r7    /* Is the address equal or less than r3?  If
-				it's equal or less, it means size is either 0
-				or a negative number.  */
-	ble	cr7,L(proceed)
-
-	li	r7,-1	      /* Make r11 the biggest if r4 <= 0.  */
-L(proceed):
 	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
 	ld	r12,0(r8)     /* Load doubleword from memory.  */
 	cmpb	r10,r12,r0    /* Check for null bytes in DWORD1.  */
+#ifdef __LITTLE_ENDIAN__
+	srd	r10,r10,r6
+	sld	r10,r10,r6
+#else
 	sld	r10,r10,r6
 	srd	r10,r10,r6
+#endif
 	cmpldi	cr7,r10,0     /* If r10 == 0, no null's have been found.  */
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,8
-	cmpld	cr6,r9,r7
-	bge	cr6,L(end_max)
-
+	clrrdi	r7,r7,3       /* Address of last doubleword.  */
 	mtcrf   0x01,r8
 	/* Are we now aligned to a quadword boundary?  If so, skip to
 	   the main loop.  Otherwise, go through the alignment code.  */
@@ -63,17 +59,18 @@ L(proceed):
 	cmpldi	cr7,r10,0
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,8
-	cmpld	cr6,r9,r7
-	bge	cr6,L(end_max)
-
 L(loop_setup):
-	sub	r5,r7,r9
+	/* The last dword we want to read in the loop below is the one
+	   containing the last byte of the string, ie. the dword at
+	   (s + size - 1) & ~7, or r7.  The first dword read is at
+	   r8 + 8, we read 2 * cnt dwords, so the last dword read will
+	   be at r8 + 8 + 16 * cnt - 8.  Solving for cnt gives
+	   cnt = (r7 - r8) / 16  */
+	sub	r5,r7,r8
 	srdi	r6,r5,4	      /* Number of loop iterations.  */
 	mtctr	r6	      /* Setup the counter.  */
-	b	L(loop)
-	/* Main loop to look for the null byte backwards in the string.  Since
+
+	/* Main loop to look for the null byte in the string.  Since
 	   it's a small loop (< 8 instructions), align it to 32-bytes.  */
 	.p2align  5
 L(loop):
@@ -89,15 +86,18 @@ L(loop):
 	cmpldi	cr7,r5,0
 	bne	cr7,L(found)
 	bdnz	L(loop)
-	/* We're here because the counter reached 0, and that means we
-	   didn't have any matches for null in the whole range.  Just return
-	   the original size.  */
-	addi	r9,r8,8
-	cmpld	cr6,r9,r7
-	blt	cr6,L(loop_small)
+
+	/* We may have one more dword to read.  */
+	cmpld	cr6,r8,r7
+	beq	cr6,L(end_max)
+
+	ldu	r12,8(r8)
+	cmpb	r10,r12,r0
+	cmpldi	cr6,r10,0
+	bne	cr6,L(done)
 
 L(end_max):
-	sub	r3,r7,r3
+	mr	r3,r4
 	blr
 
 	/* OK, one (or both) of the doublewords contains a null byte.  Check
@@ -119,52 +119,59 @@ L(found):
 	/* r10 has the output of the cmpb instruction, that is, it contains
 	   0xff in the same position as the null byte in the original
 	   doubleword from the string.  Use that to calculate the length.
-	   We need to make sure the null char is *before* the start of the
-	   range (since we're going backwards).  */
+	   We need to make sure the null char is *before* the end of the
+	   range.  */
 L(done):
-	cntlzd	r0,r10	      /* Count leading zeroes before the match.  */
-	srdi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
-	add	r9,r8,r0
-	sub	r6,r9,r3      /* Length until the match.  */
-	cmpld	r9,r7
-	bgt	L(end_max)
-	mr	r3,r6
-	blr
-
-	.align	4
-L(zero):
-	li	r3,0
+#ifdef __LITTLE_ENDIAN__
+	addi	r0,r10,-1
+	andc	r0,r0,r10
+	popcntd	r0,r0
+#else
+	cntlzd	r0,r10	      /* Count leading zeros before the match.  */
+#endif
+	sub	r3,r8,r3
+	srdi	r0,r0,3	      /* Convert leading/trailing zeros to bytes.  */
+	add	r3,r3,r0      /* Length until the match.  */
+	cmpld	r3,r4
+	blelr
+	mr	r3,r4
 	blr
 
 /* Deals with size <= 32.  */
 	.align	4
 L(small_range):
 	cmpldi	r4,0
-	beq	L(zero)
+	beq	L(end_max)
+
+	clrrdi	r7,r7,3       /* Address of last doubleword.  */
 
 	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
-	ld	r12,0(r8)     /* Load word from memory.  */
+	ld	r12,0(r8)     /* Load doubleword from memory.  */
 	cmpb	r10,r12,r0    /* Check for null bytes in DWORD1.  */
+#ifdef __LITTLE_ENDIAN__
+	srd	r10,r10,r6
+	sld	r10,r10,r6
+#else
 	sld	r10,r10,r6
 	srd	r10,r10,r6
+#endif
 	cmpldi	cr7,r10,0
 	bne	cr7,L(done)
 
-	addi    r9,r8,8
-	cmpld	r9,r7
-	bge	L(end_max)
-	b	L(loop_small)
+	cmpld	r8,r7
+	beq	L(end_max)
 
 	.p2align  5
 L(loop_small):
 	ldu	r12,8(r8)
 	cmpb	r10,r12,r0
-	addi	r9,r8,8
 	cmpldi	cr6,r10,0
 	bne	cr6,L(done)
-	cmpld	r9,r7
-	bge	L(end_max)
-	b	L(loop_small)
+	cmpld	r8,r7
+	bne	L(loop_small)
+	mr	r3,r4
+	blr
+
 END (__strnlen)
 weak_alias (__strnlen, strnlen)
 libc_hidden_builtin_def (strnlen)
diff --git a/libc/sysdeps/powerpc/powerpc64/setjmp-common.S b/libc/sysdeps/powerpc/powerpc64/setjmp-common.S
index 58ec61062..1829b9ab6 100644
--- a/libc/sysdeps/powerpc/powerpc64/setjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc64/setjmp-common.S
@@ -95,7 +95,7 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)):
 	mfcr r0
 	std  r16,((JB_GPRS+2)*8)(3)
 	stfd fp16,((JB_FPRS+2)*8)(3)
-	std  r0,(JB_CR*8)(3)
+	stw  r0,((JB_CR*8)+4)(3)	/* 32-bit CR.  */
 	std  r17,((JB_GPRS+3)*8)(3)
 	stfd fp17,((JB_FPRS+3)*8)(3)
 	std  r18,((JB_GPRS+4)*8)(3)
@@ -139,50 +139,46 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)):
 	la	r5,((JB_VRS)*8)(3)
 	andi.	r6,r5,0xf
 	mfspr	r0,VRSAVE
-	stw	r0,((JB_VRSAVE)*8)(3)
+	stw	r0,((JB_VRSAVE)*8)(3)	/* 32-bit VRSAVE.  */
 	addi	r6,r5,16
 	beq+	L(aligned_save_vmx)
-	lvsr	v0,0,r5
-	vspltisb v1,-1         /* set v1 to all 1's */
-	vspltisb v2,0          /* set v2 to all 0's */
-	vperm   v3,v2,v1,v0   /* v3 contains shift mask with num all 1 bytes
-				 on left = misalignment  */
 
+	lvsr	v0,0,r5
+	lvsl	v1,0,r5
+	addi	r6,r5,-16
 
-	/* Special case for v20 we need to preserve what is in save area
-	   below v20 before obliterating it */
-	lvx     v5,0,r5
-	vperm   v20,v20,v20,v0
-	vsel    v5,v5,v20,v3
-	vsel    v20,v20,v2,v3
-	stvx    v5,0,r5
+# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \
+	addi	addgpr,addgpr,32;					 \
+	vperm	tmpvr,prevvr,savevr,shiftvr;				 \
+	stvx	tmpvr,0,savegpr
 
-# define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \
-	addi    addgpr,addgpr,32; \
-	vperm   savevr,savevr,savevr,shiftvr; \
-	vsel    hivr,prev_savevr,savevr,maskvr; \
-	stvx    hivr,0,savegpr;
+	/*
+	 * We have to be careful not to corrupt the data below v20 and
+	 * above v31. To keep things simple we just rotate both ends in
+	 * the opposite direction to our main permute so we can use
+	 * the common macro.
+	 */
 
-	save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6)
+	/* load and rotate data below v20 */
+	lvx	v2,0,r5
+	vperm	v2,v2,v2,v1
+	save_misaligned_vmx(v20,v2,v0,v3,r5,r6)
+	save_misaligned_vmx(v21,v20,v0,v3,r6,r5)
+	save_misaligned_vmx(v22,v21,v0,v3,r5,r6)
+	save_misaligned_vmx(v23,v22,v0,v3,r6,r5)
+	save_misaligned_vmx(v24,v23,v0,v3,r5,r6)
+	save_misaligned_vmx(v25,v24,v0,v3,r6,r5)
+	save_misaligned_vmx(v26,v25,v0,v3,r5,r6)
+	save_misaligned_vmx(v27,v26,v0,v3,r6,r5)
+	save_misaligned_vmx(v28,v27,v0,v3,r5,r6)
+	save_misaligned_vmx(v29,v28,v0,v3,r6,r5)
+	save_misaligned_vmx(v30,v29,v0,v3,r5,r6)
+	save_misaligned_vmx(v31,v30,v0,v3,r6,r5)
+	/* load and rotate data above v31 */
+	lvx	v2,0,r6
+	vperm	v2,v2,v2,v1
+	save_misaligned_vmx(v2,v31,v0,v3,r5,r6)
 
-	/* Special case for r31 we need to preserve what is in save area
-	   above v31 before obliterating it */
-	addi    r5,r5,32
-	vperm   v31,v31,v31,v0
-	lvx     v4,0,r5
-	vsel    v5,v30,v31,v3
-	stvx    v5,0,r6
-	vsel    v4,v31,v4,v3
-	stvx    v4,0,r5
 	b	L(no_vmx)
 
 L(aligned_save_vmx):
diff --git a/libc/sysdeps/powerpc/powerpc64/setjmp.S b/libc/sysdeps/powerpc/powerpc64/setjmp.S
index 667b9d12d..0a3b2fc02 100644
--- a/libc/sysdeps/powerpc/powerpc64/setjmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/setjmp.S
@@ -26,9 +26,9 @@
 
 #else /* !NOT_IN_libc */
 /* Build a versioned object for libc.  */
-default_symbol_version (__vmxsetjmp, setjmp, GLIBC_2.3.4)
-default_symbol_version (__vmx_setjmp,_setjmp,GLIBC_2.3.4)
-default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmxsetjmp, setjmp, GLIBC_2_3_4)
+versioned_symbol (libc, __vmx_setjmp, _setjmp, GLIBC_2_3_4)
+versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4)
 # define setjmp __vmxsetjmp
 # define _setjmp __vmx_setjmp
 # define __sigsetjmp __vmx__sigsetjmp
@@ -44,9 +44,9 @@ strong_alias (__vmx__sigsetjmp, __setjmp)
 #  undef __sigjmp_save
 #  undef JB_SIZE
 #  define __NO_VMX__
-symbol_version (__novmxsetjmp, setjmp, GLIBC_2.3)
-symbol_version (__novmx_setjmp,_setjmp,GLIBC_2.3);
-symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.3)
+compat_symbol (libc, __novmxsetjmp, setjmp, GLIBC_2_3)
+compat_symbol (libc, __novmx_setjmp,_setjmp, GLIBC_2_3);
+compat_symbol (libc, __novmx__sigsetjmp,__sigsetjmp, GLIBC_2_3)
 #  define setjmp __novmxsetjmp
 #  define _setjmp __novmx_setjmp
 #  define __sigsetjmp __novmx__sigsetjmp
diff --git a/libc/sysdeps/powerpc/powerpc64/stackguard-macros.h b/libc/sysdeps/powerpc/powerpc64/stackguard-macros.h
index 9da879c61..e80a683e6 100644
--- a/libc/sysdeps/powerpc/powerpc64/stackguard-macros.h
+++ b/libc/sysdeps/powerpc/powerpc64/stackguard-macros.h
@@ -2,3 +2,13 @@
 
 #define STACK_CHK_GUARD \
   ({ uintptr_t x; asm ("ld %0,-28688(13)" : "=r" (x)); x; })
+
+#define POINTER_CHK_GUARD \
+  ({												\
+     uintptr_t x;										\
+     asm ("ld %0,%1(13)"										\
+	  : "=r" (x)										\
+	  : "i" (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t))	\
+         );											\
+     x;												\
+   })
diff --git a/libc/sysdeps/powerpc/powerpc64/stpcpy.S b/libc/sysdeps/powerpc/powerpc64/stpcpy.S
index 070cd4662..c0b39729e 100644
--- a/libc/sysdeps/powerpc/powerpc64/stpcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/stpcpy.S
@@ -62,7 +62,22 @@ L(g2):	add	rTMP, rFEFE, rWORD
 
 	mr	rALT, rWORD
 /* We've hit the end of the string.  Do the rest byte-by-byte.  */
-L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
+L(g1):
+#ifdef __LITTLE_ENDIAN__
+	rlwinm.	rTMP, rALT, 0, 24, 31
+	stbu	rALT, 4(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 24, 24, 31
+	stbu	rTMP, 1(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 16, 24, 31
+	stbu	rTMP, 1(rDEST)
+	beqlr-
+	rlwinm	rTMP, rALT, 8, 24, 31
+	stbu	rTMP, 1(rDEST)
+	blr
+#else
+	rlwinm.	rTMP, rALT, 8, 24, 31
 	stbu	rTMP, 4(rDEST)
 	beqlr-
 	rlwinm.	rTMP, rALT, 16, 24, 31
@@ -73,6 +88,7 @@ L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
 	beqlr-
 	stbu	rALT, 1(rDEST)
 	blr
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte copy.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/strchr.S b/libc/sysdeps/powerpc/powerpc64/strchr.S
index d2d8cd361..da707ae58 100644
--- a/libc/sysdeps/powerpc/powerpc64/strchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/strchr.S
@@ -37,11 +37,13 @@ ENTRY (strchr)
 #define rIGN	r10	/* number of bits we should ignore in the first word */
 #define rMASK	r11	/* mask with the bits to ignore set to 0 */
 #define rTMP3	r12
+#define rTMP4	rIGN
+#define rTMP5	rMASK
 
 	dcbt	0,rRTN
-	rlwimi	rCHR, rCHR, 8, 16, 23
+	insrdi	rCHR, rCHR, 8, 48
 	li	rMASK, -1
-	rlwimi	rCHR, rCHR, 16, 0, 15
+	insrdi	rCHR, rCHR, 16, 32
 	rlwinm	rIGN, rRTN, 3, 26, 28
 	insrdi	rCHR, rCHR, 32, 0
 	lis	rFEFE, -0x101
@@ -54,64 +56,93 @@ ENTRY (strchr)
 	add	rFEFE, rFEFE, rTMP1
 /* Test the first (partial?) word.  */
 	ld	rWORD, 0(rSTR)
+#ifdef __LITTLE_ENDIAN__
+	sld	rMASK, rMASK, rIGN
+#else
 	srd	rMASK, rMASK, rIGN
+#endif
 	orc	rWORD, rWORD, rMASK
 	add	rTMP1, rFEFE, rWORD
 	nor	rTMP2, r7F7F, rWORD
-	and.	rTMP1, rTMP1, rTMP2
+	and.	rTMP4, rTMP1, rTMP2
 	xor	rTMP3, rCHR, rWORD
 	orc	rTMP3, rTMP3, rMASK
 	b	L(loopentry)
 
 /* The loop.  */
 
-L(loop):ldu rWORD, 8(rSTR)
-	and.	rTMP1, rTMP1, rTMP2
+L(loop):
+	ldu	rWORD, 8(rSTR)
+	and.	rTMP5, rTMP1, rTMP2
 /* Test for 0.	*/
-	add	rTMP1, rFEFE, rWORD
-	nor	rTMP2, r7F7F, rWORD
+	add	rTMP1, rFEFE, rWORD /* x - 0x01010101.  */
+	nor	rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080.  */
 	bne	L(foundit)
-	and.	rTMP1, rTMP1, rTMP2
+	and.	rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080.  */
 /* Start test for the bytes we're looking for.  */
 	xor	rTMP3, rCHR, rWORD
 L(loopentry):
 	add	rTMP1, rFEFE, rTMP3
 	nor	rTMP2, r7F7F, rTMP3
 	beq	L(loop)
+
 /* There is a zero byte in the word, but may also be a matching byte (either
    before or after the zero byte).  In fact, we may be looking for a
-   zero byte, in which case we return a match.  We guess that this hasn't
-   happened, though.  */
-L(missed):
-	and.	rTMP1, rTMP1, rTMP2
+   zero byte, in which case we return a match.  */
+	and.	rTMP5, rTMP1, rTMP2
 	li	rRTN, 0
 	beqlr
-/* It did happen. Decide which one was first...
-   I'm not sure if this is actually faster than a sequence of
-   rotates, compares, and branches (we use it anyway because it's shorter).  */
+/* At this point:
+   rTMP5 bytes are 0x80 for each match of c, 0 otherwise.
+   rTMP4 bytes are 0x80 for each match of 0, 0 otherwise.
+   But there may be false matches in the next most significant byte from
+   a true match due to carries.  This means we need to recalculate the
+   matches using a longer method for big-endian.  */
+#ifdef __LITTLE_ENDIAN__
+	addi	rTMP1, rTMP5, -1
+	andc	rTMP1, rTMP1, rTMP5
+	cntlzd	rCLZB, rTMP1
+	addi	rTMP2, rTMP4, -1
+	andc	rTMP2, rTMP2, rTMP4
+	cmpld	rTMP1, rTMP2
+	bgtlr
+	subfic	rCLZB, rCLZB, 64-7
+#else
+/* I think we could reduce this by two instructions by keeping the "nor"
+   results from the loop for reuse here.  See strlen.S tail.  Similarly
+   one instruction could be pruned from L(foundit).  */
 	and	rFEFE, r7F7F, rWORD
-	or	rMASK, r7F7F, rWORD
+	or	rTMP5, r7F7F, rWORD
 	and	rTMP1, r7F7F, rTMP3
-	or	rIGN, r7F7F, rTMP3
+	or	rTMP4, r7F7F, rTMP3
 	add	rFEFE, rFEFE, r7F7F
 	add	rTMP1, rTMP1, r7F7F
-	nor	rWORD, rMASK, rFEFE
-	nor	rTMP2, rIGN, rTMP1
+	nor	rWORD, rTMP5, rFEFE
+	nor	rTMP2, rTMP4, rTMP1
+	cntlzd	rCLZB, rTMP2
 	cmpld	rWORD, rTMP2
 	bgtlr
-	cntlzd	rCLZB, rTMP2
+#endif
 	srdi	rCLZB, rCLZB, 3
 	add	rRTN, rSTR, rCLZB
 	blr
 
 L(foundit):
+#ifdef __LITTLE_ENDIAN__
+	addi	rTMP1, rTMP5, -1
+	andc	rTMP1, rTMP1, rTMP5
+	cntlzd	rCLZB, rTMP1
+	subfic	rCLZB, rCLZB, 64-7-64
+	sradi	rCLZB, rCLZB, 3
+#else
 	and	rTMP1, r7F7F, rTMP3
-	or	rIGN, r7F7F, rTMP3
+	or	rTMP4, r7F7F, rTMP3
 	add	rTMP1, rTMP1, r7F7F
-	nor	rTMP2, rIGN, rTMP1
+	nor	rTMP2, rTMP4, rTMP1
 	cntlzd	rCLZB, rTMP2
 	subi	rSTR, rSTR, 8
 	srdi	rCLZB, rCLZB, 3
+#endif
 	add	rRTN, rSTR, rCLZB
 	blr
 END (strchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/strcmp.S b/libc/sysdeps/powerpc/powerpc64/strcmp.S
index c9d6dac12..70854689d 100644
--- a/libc/sysdeps/powerpc/powerpc64/strcmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/strcmp.S
@@ -25,7 +25,7 @@
 EALIGN (strcmp, 4, 0)
 	CALL_MCOUNT 2
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -35,6 +35,7 @@ EALIGN (strcmp, 4, 0)
 #define r7F7F	r8	/* constant 0x7f7f7f7f7f7f7f7f */
 #define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
 #define rBITDIF	r10	/* bits that differ in s1 & s2 words */
+#define rTMP	r11
 
 	dcbt	0,rSTR1
 	or	rTMP, rSTR2, rSTR1
@@ -58,19 +59,66 @@ L(g0):	ldu	rWORD1, 8(rSTR1)
 	ldu	rWORD2, 8(rSTR2)
 L(g1):	add	rTMP, rFEFE, rWORD1
 	nor	rNEG, r7F7F, rWORD1
-
 	and.	rTMP, rTMP, rNEG
 	cmpd	cr1, rWORD1, rWORD2
 	beq+	L(g0)
-L(endstring):
+
 /* OK. We've hit the end of the string. We need to be careful that
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	addi    rTMP2, rTMP, -1
+	beq	cr1, L(equal)
+	andc    rTMP2, rTMP2, rTMP
+	rldimi	rTMP2, rTMP2, 1, 0
+	and	rWORD2, rWORD2, rTMP2	/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	cmpd	cr1, rWORD1, rWORD2
+	beq	cr1, L(equal)
+	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
+	neg	rNEG, rBITDIF
+	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
+	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
+	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
+	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
+	sld	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt-	L(highbit)
+	sradi	rRTN, rRTN, 63		/* must return an int.  */
+	ori	rRTN, rRTN, 1
+	blr
+L(equal):
+	li	rRTN, 0
+	blr
+
+L(different):
+	ld	rWORD1, -8(rSTR1)
+	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
+	neg	rNEG, rBITDIF
+	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
+	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
+	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
+	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
+	sld	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt-	L(highbit)
+	sradi	rRTN, rRTN, 63
+	ori	rRTN, rRTN, 1
+	blr
+L(highbit):
+	sradi	rRTN, rWORD2, 63
+	ori	rRTN, rRTN, 1
+	blr
+
+#else
+L(endstring):
 	and	rTMP, r7F7F, rWORD1
 	beq	cr1, L(equal)
 	add	rTMP, rTMP, r7F7F
 	xor.	rBITDIF, rWORD1, rWORD2
-
 	andc	rNEG, rNEG, rTMP
 	blt-	L(highbit)
 	cntlzd	rBITDIF, rBITDIF
@@ -79,7 +127,7 @@ L(endstring):
 	cmpd	cr1, rNEG, rBITDIF
 	sub	rRTN, rWORD1, rWORD2
 	blt-	cr1, L(equal)
-	sradi	rRTN, rRTN, 63
+	sradi	rRTN, rRTN, 63		/* must return an int.  */
 	ori	rRTN, rRTN, 1
 	blr
 L(equal):
@@ -95,11 +143,10 @@ L(different):
 	ori	rRTN, rRTN, 1
 	blr
 L(highbit):
-	srdi	rWORD2, rWORD2, 56
-	srdi	rWORD1, rWORD1, 56
-	sub	rRTN, rWORD1, rWORD2
+	sradi	rRTN, rWORD2, 63
+	ori	rRTN, rRTN, 1
 	blr
-
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/strcpy.S b/libc/sysdeps/powerpc/powerpc64/strcpy.S
index 4c6fd3f9d..a7fd85bad 100644
--- a/libc/sysdeps/powerpc/powerpc64/strcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/strcpy.S
@@ -68,6 +68,32 @@ L(g2):	add	rTMP, rFEFE, rWORD
 	mr	rALT, rWORD
 /* We've hit the end of the string.  Do the rest byte-by-byte.  */
 L(g1):
+#ifdef __LITTLE_ENDIAN__
+	extrdi.	rTMP, rALT, 8, 56
+	stb	rALT, 8(rDEST)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 48
+	stb	rTMP, 9(rDEST)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 40
+	stb	rTMP, 10(rDEST)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 32
+	stb	rTMP, 11(rDEST)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 24
+	stb	rTMP, 12(rDEST)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 16
+	stb	rTMP, 13(rDEST)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 8
+	stb	rTMP, 14(rDEST)
+	beqlr-
+	extrdi	rTMP, rALT, 8, 0
+	stb	rTMP, 15(rDEST)
+	blr
+#else
 	extrdi.	rTMP, rALT, 8, 0
 	stb	rTMP, 8(rDEST)
 	beqlr-
@@ -91,6 +117,7 @@ L(g1):
 	beqlr-
 	stb	rALT, 15(rDEST)
 	blr
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte copy.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/strlen.S b/libc/sysdeps/powerpc/powerpc64/strlen.S
index 0f9b5eea9..4ed1ba3ad 100644
--- a/libc/sysdeps/powerpc/powerpc64/strlen.S
+++ b/libc/sysdeps/powerpc/powerpc64/strlen.S
@@ -29,7 +29,12 @@
       1 is subtracted you get a value in the range 0x00-0x7f, none of which
       have their high bit set. The expression here is
       (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
-      there were no 0x00 bytes in the word.
+      there were no 0x00 bytes in the word.  You get 0x80 in bytes that
+      match, but possibly false 0x80 matches in the next more significant
+      byte to a true match due to carries.  For little-endian this is
+      of no consequence since the least significant match is the one
+      we're interested in, but big-endian needs method 2 to find which
+      byte matches.
 
    2) Given a word 'x', we can test to see _which_ byte was zero by
       calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
@@ -62,7 +67,7 @@
    Answer:
    1) Added a Data Cache Block Touch early to prefetch the first 128
    byte cache line. Adding dcbt instructions to the loop would not be
-   effective since most strings will be shorter than the cache line.*/
+   effective since most strings will be shorter than the cache line.  */
 
 /* Some notes on register usage: Under the SVR4 ABI, we can use registers
    0 and 3 through 12 (so long as we don't call any procedures) without
@@ -78,7 +83,7 @@
 ENTRY (strlen)
 	CALL_MCOUNT 1
 
-#define rTMP1	r0
+#define rTMP4	r0
 #define rRTN	r3	/* incoming STR arg, outgoing result */
 #define rSTR	r4	/* current string position */
 #define rPADN	r5	/* number of padding bits we prepend to the
@@ -88,9 +93,9 @@ ENTRY (strlen)
 #define rWORD1	r8	/* current string doubleword */
 #define rWORD2	r9	/* next string doubleword */
 #define rMASK	r9	/* mask for first string doubleword */
-#define rTMP2	r10
-#define rTMP3	r11
-#define rTMP4	r12
+#define rTMP1	r10
+#define rTMP2	r11
+#define rTMP3	r12
 
 	dcbt	0,rRTN
 	clrrdi	rSTR, rRTN, 3
@@ -100,30 +105,36 @@ ENTRY (strlen)
 	addi	r7F7F, r7F7F, 0x7f7f
 	li	rMASK, -1
 	insrdi	r7F7F, r7F7F, 32, 0
-/* That's the setup done, now do the first pair of doublewords.
-   We make an exception and use method (2) on the first two doublewords,
-   to reduce overhead.  */
+/* We use method (2) on the first two doublewords, because rFEFE isn't
+   required which reduces setup overhead.  Also gives a faster return
+   for small strings on big-endian due to needing to recalculate with
+   method (2) anyway.  */
+#ifdef __LITTLE_ENDIAN__
+	sld	rMASK, rMASK, rPADN
+#else
 	srd	rMASK, rMASK, rPADN
+#endif
 	and	rTMP1, r7F7F, rWORD1
 	or	rTMP2, r7F7F, rWORD1
 	lis	rFEFE, -0x101
 	add	rTMP1, rTMP1, r7F7F
 	addi	rFEFE, rFEFE, -0x101
-	nor	rTMP1, rTMP2, rTMP1
-	and.	rWORD1, rTMP1, rMASK
+	nor	rTMP3, rTMP2, rTMP1
+	and.	rTMP3, rTMP3, rMASK
 	mtcrf	0x01, rRTN
 	bne	L(done0)
-	sldi  rTMP1, rFEFE, 32
-	add  rFEFE, rFEFE, rTMP1
+	sldi	rTMP1, rFEFE, 32
+	add	rFEFE, rFEFE, rTMP1
 /* Are we now aligned to a doubleword boundary?  */
 	bt	28, L(loop)
 
 /* Handle second doubleword of pair.  */
+/* Perhaps use method (1) here for little-endian, saving one instruction?  */
 	ldu	rWORD1, 8(rSTR)
 	and	rTMP1, r7F7F, rWORD1
 	or	rTMP2, r7F7F, rWORD1
 	add	rTMP1, rTMP1, r7F7F
-	nor.	rWORD1, rTMP2, rTMP1
+	nor.	rTMP3, rTMP2, rTMP1
 	bne	L(done0)
 
 /* The loop.  */
@@ -137,28 +148,52 @@ L(loop):
 	add	rTMP3, rFEFE, rWORD2
 	nor	rTMP4, r7F7F, rWORD2
 	bne	L(done1)
-	and.	rTMP1, rTMP3, rTMP4
+	and.	rTMP3, rTMP3, rTMP4
 	beq	L(loop)
 
+#ifndef __LITTLE_ENDIAN__
 	and	rTMP1, r7F7F, rWORD2
 	add	rTMP1, rTMP1, r7F7F
-	andc	rWORD1, rTMP4, rTMP1
+	andc	rTMP3, rTMP4, rTMP1
 	b	L(done0)
 
 L(done1):
 	and	rTMP1, r7F7F, rWORD1
 	subi	rSTR, rSTR, 8
 	add	rTMP1, rTMP1, r7F7F
-	andc	rWORD1, rTMP2, rTMP1
+	andc	rTMP3, rTMP2, rTMP1
 
 /* When we get to here, rSTR points to the first doubleword in the string that
-   contains a zero byte, and the most significant set bit in rWORD1 is in that
-   byte.  */
+   contains a zero byte, and rTMP3 has 0x80 for bytes that are zero, and 0x00
+   otherwise.  */
 L(done0):
-	cntlzd	rTMP3, rWORD1
+	cntlzd	rTMP3, rTMP3
 	subf	rTMP1, rRTN, rSTR
 	srdi	rTMP3, rTMP3, 3
 	add	rRTN, rTMP1, rTMP3
 	blr
+#else
+
+L(done0):
+	addi	rTMP1, rTMP3, -1	/* Form a mask from trailing zeros.  */
+	andc	rTMP1, rTMP1, rTMP3
+	cntlzd	rTMP1, rTMP1		/* Count bits not in the mask.  */
+	subf	rTMP3, rRTN, rSTR
+	subfic	rTMP1, rTMP1, 64-7
+	srdi	rTMP1, rTMP1, 3
+	add	rRTN, rTMP1, rTMP3
+	blr
+
+L(done1):
+	addi	rTMP3, rTMP1, -1
+	andc	rTMP3, rTMP3, rTMP1
+	cntlzd	rTMP3, rTMP3
+	subf	rTMP1, rRTN, rSTR
+	subfic	rTMP3, rTMP3, 64-7-64
+	sradi	rTMP3, rTMP3, 3
+	add	rRTN, rTMP1, rTMP3
+	blr
+#endif
+
 END (strlen)
 libc_hidden_builtin_def (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc64/strncmp.S b/libc/sysdeps/powerpc/powerpc64/strncmp.S
index 779d9f7f6..8f842c4b2 100644
--- a/libc/sysdeps/powerpc/powerpc64/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/strncmp.S
@@ -25,7 +25,7 @@
 EALIGN (strncmp, 4, 0)
 	CALL_MCOUNT 3
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -36,6 +36,7 @@ EALIGN (strncmp, 4, 0)
 #define r7F7F	r9	/* constant 0x7f7f7f7f7f7f7f7f */
 #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
 #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
+#define rTMP	r12
 
 	dcbt	0,rSTR1
 	or	rTMP, rSTR2, rSTR1
@@ -77,12 +78,59 @@ L(g1):	add	rTMP, rFEFE, rWORD1
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
 
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	addi    rTMP2, rTMP, -1
+	beq	cr1, L(equal)
+	andc    rTMP2, rTMP2, rTMP
+	rldimi	rTMP2, rTMP2, 1, 0
+	and	rWORD2, rWORD2, rTMP2	/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	cmpd	cr1, rWORD1, rWORD2
+	beq	cr1, L(equal)
+	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
+	neg	rNEG, rBITDIF
+	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
+	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
+	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
+	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
+	sld	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt-	L(highbit)
+	sradi	rRTN, rRTN, 63		/* must return an int.  */
+	ori	rRTN, rRTN, 1
+	blr
+L(equal):
+	li	rRTN, 0
+	blr
+
+L(different):
+	ld	rWORD1, -8(rSTR1)
+	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
+	neg	rNEG, rBITDIF
+	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
+	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
+	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
+	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
+	sld	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt-	L(highbit)
+	sradi	rRTN, rRTN, 63
+	ori	rRTN, rRTN, 1
+	blr
+L(highbit):
+	sradi	rRTN, rWORD2, 63
+	ori	rRTN, rRTN, 1
+	blr
+
+#else
 L(endstring):
 	and	rTMP, r7F7F, rWORD1
 	beq	cr1, L(equal)
 	add	rTMP, rTMP, r7F7F
 	xor.	rBITDIF, rWORD1, rWORD2
-
 	andc	rNEG, rNEG, rTMP
 	blt-	L(highbit)
 	cntlzd	rBITDIF, rBITDIF
@@ -91,7 +139,7 @@ L(endstring):
 	cmpd	cr1, rNEG, rBITDIF
 	sub	rRTN, rWORD1, rWORD2
 	blt-	cr1, L(equal)
-	sradi	rRTN, rRTN, 63
+	sradi	rRTN, rRTN, 63		/* must return an int.  */
 	ori	rRTN, rRTN, 1
 	blr
 L(equal):
@@ -99,7 +147,7 @@ L(equal):
 	blr
 
 L(different):
-	ldu	rWORD1, -8(rSTR1)
+	ld	rWORD1, -8(rSTR1)
 	xor.	rBITDIF, rWORD1, rWORD2
 	sub	rRTN, rWORD1, rWORD2
 	blt-	L(highbit)
@@ -107,11 +155,10 @@ L(different):
 	ori	rRTN, rRTN, 1
 	blr
 L(highbit):
-	srdi	rWORD2, rWORD2, 56
-	srdi	rWORD1, rWORD1, 56
-	sub	rRTN, rWORD1, rWORD2
+	sradi	rRTN, rWORD2, 63
+	ori	rRTN, rRTN, 1
 	blr
-
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/preconfigure b/libc/sysdeps/powerpc/preconfigure
new file mode 100644
index 000000000..1741c251f
--- /dev/null
+++ b/libc/sysdeps/powerpc/preconfigure
@@ -0,0 +1,11 @@
+# Check for e500.
+
+case "$machine" in
+powerpc)
+  $CC $CFLAGS $CPPFLAGS -E -dM -xc /dev/null > conftest.i
+  if grep -q __NO_FPRS__ conftest.i && ! grep -q _SOFT_FLOAT conftest.i; then
+    base_machine=powerpc machine=powerpc/powerpc32/e500
+  fi
+  rm -f conftest.i
+  ;;
+esac
diff --git a/libc/sysdeps/powerpc/soft-fp/sfp-machine.h b/libc/sysdeps/powerpc/soft-fp/sfp-machine.h
new file mode 100644
index 000000000..041187807
--- /dev/null
+++ b/libc/sysdeps/powerpc/soft-fp/sfp-machine.h
@@ -0,0 +1,115 @@
+#define _FP_W_TYPE_SIZE		32
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+#define _FP_MUL_MEAT_S(R,X,Y)				\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)				\
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)				\
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_loop(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+#define _FP_QNANNEGATEDP 0
+
+/* Someone please check this.  */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)		\
+	&& !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs))	\
+      {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
+    else							\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+#if defined __NO_FPRS__ && !defined _SOFT_FLOAT
+
+/* Exception flags.  We use the bit positions of the appropriate bits
+   in the FPEFSCR.  */
+
+# include <fenv_libc.h>
+# include <sysdep.h>
+# include <sys/prctl.h>
+
+int __feraiseexcept_soft (int);
+libc_hidden_proto (__feraiseexcept_soft)
+
+# define FP_EX_INEXACT         SPEFSCR_FINXS
+# define FP_EX_INVALID         SPEFSCR_FINVS
+# define FP_EX_DIVZERO         SPEFSCR_FDBZS
+# define FP_EX_UNDERFLOW       SPEFSCR_FUNFS
+# define FP_EX_OVERFLOW        SPEFSCR_FOVFS
+
+# define _FP_DECL_EX \
+  int _spefscr __attribute__ ((unused)), _ftrapex __attribute__ ((unused)) = 0
+# define FP_INIT_ROUNDMODE						\
+  do									\
+    {									\
+      int _r;								\
+      INTERNAL_SYSCALL_DECL (_err);					\
+									\
+      _spefscr = fegetenv_register ();					\
+      _r = INTERNAL_SYSCALL (prctl, _err, 2, PR_GET_FPEXC, &_ftrapex);	\
+      if (INTERNAL_SYSCALL_ERROR_P (_r, _err))				\
+	_ftrapex = 0;							\
+    }									\
+  while (0)
+# define FP_INIT_EXCEPTIONS /* Empty.  */
+
+# define FP_HANDLE_EXCEPTIONS  __feraiseexcept_soft (_fex)
+# define FP_ROUNDMODE          (_spefscr & 0x3)
+
+/* Not correct in general, but sufficient for the uses in soft-fp.  */
+# define FP_TRAPPING_EXCEPTIONS (_ftrapex & PR_FP_EXC_UND	\
+				 ? FP_EX_UNDERFLOW		\
+				 : 0)
+
+#else
+
+/* Exception flags.  We use the bit positions of the appropriate bits
+   in the FPSCR, which also correspond to the FE_* bits.  This makes
+   everything easier ;-).  */
+# define FP_EX_INVALID         (1 << (31 - 2))
+# define FP_EX_OVERFLOW        (1 << (31 - 3))
+# define FP_EX_UNDERFLOW       (1 << (31 - 4))
+# define FP_EX_DIVZERO         (1 << (31 - 5))
+# define FP_EX_INEXACT         (1 << (31 - 6))
+
+# define FP_HANDLE_EXCEPTIONS  __simulate_exceptions (_fex)
+# define FP_ROUNDMODE          __sim_round_mode
+# define FP_TRAPPING_EXCEPTIONS (~__sim_disabled_exceptions & 0x3e000000)
+
+#endif
+
+/* FIXME: these variables should be thread specific (see bugzilla bug
+   15483) and ideally preserved across signal handlers, like hardware
+   FP status words, but the latter is quite difficult to accomplish in
+   userland.  */
+
+extern int __sim_exceptions;
+libc_hidden_proto (__sim_exceptions);
+extern int __sim_disabled_exceptions;
+libc_hidden_proto (__sim_disabled_exceptions);
+extern int __sim_round_mode;
+libc_hidden_proto (__sim_round_mode);
+
+extern void __simulate_exceptions (int x) attribute_hidden;
diff --git a/libc/sysdeps/powerpc/sysdep.h b/libc/sysdeps/powerpc/sysdep.h
index 1b5334ad3..bc2cb6681 100644
--- a/libc/sysdeps/powerpc/sysdep.h
+++ b/libc/sysdeps/powerpc/sysdep.h
@@ -144,6 +144,21 @@
 
 #define VRSAVE	256
 
+/* The 32-bit words of a 64-bit dword are at these offsets in memory.  */
+#if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
+# define LOWORD 0
+# define HIWORD 4
+#else
+# define LOWORD 4
+# define HIWORD 0
+#endif
+
+/* The high 16-bit word of a 64-bit dword is at this offset in memory.  */
+#if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
+# define HISHORT 6
+#else
+# define HISHORT 0
+#endif
 
 /* This seems to always be the case on PPC.  */
 #define ALIGNARG(log2) log2
author	joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d>	2013-10-18 21:33:25 +0000
committer	joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d>	2013-10-18 21:33:25 +0000
commit	fe2ed5aaa408e1ab996a9fe1595a05634208a79c (patch)
tree	e1027fbc9d8a4a8c33f8149b2b42e8cde89c74f6 /libc/sysdeps/powerpc
parent	571c782b982d888565e7d06bfc2f3d47582fe829 (diff)