summaryrefslogtreecommitdiff
path: root/libc/sysdeps/powerpc/powerpc64/power4
diff options
context:
space:
mode:
Diffstat (limited to 'libc/sysdeps/powerpc/powerpc64/power4')
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c182
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c2
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c4
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrt.c2
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrtf.c2
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power4/memcmp.S28
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power4/memcpy.S20
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power4/memset.S3
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power4/strncmp.S4
9 files changed, 91 insertions, 156 deletions
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c b/libc/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
index d15680e77..9fcaa763c 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
+++ b/libc/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
@@ -2,7 +2,7 @@
/*
* IBM Accurate Mathematical Library
* written by International Business Machines Corp.
- * Copyright (C) 2001, 2006 Free Software Foundation
+ * Copyright (C) 2001-2013 Free Software Foundation, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
@@ -23,9 +23,7 @@
/* FUNCTIONS: */
/* mcr */
/* acr */
-/* cr */
/* cpy */
-/* cpymn */
/* norm */
/* denorm */
/* mp_dbl */
@@ -46,11 +44,13 @@
#include "endian.h"
#include "mpa.h"
#include "mpa2.h"
-#include <sys/param.h> /* For MIN() */
-/* mcr() compares the sizes of the mantissas of two multiple precision */
-/* numbers. Mantissas are compared regardless of the signs of the */
-/* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also */
-/* disregarded. */
+#include <sys/param.h>
+
+const mp_no mpone = {1, {1.0, 1.0}};
+const mp_no mptwo = {1, {1.0, 2.0}};
+
+/* Compare mantissa of two multiple precision numbers regardless of the sign
+ and exponent of the numbers. */
static int mcr(const mp_no *x, const mp_no *y, int p) {
long i;
long p2 = p;
@@ -61,9 +61,7 @@ static int mcr(const mp_no *x, const mp_no *y, int p) {
return 0;
}
-
-
-/* acr() compares the absolute values of two multiple precision numbers */
+/* Compare the absolute values of two multiple precision numbers. */
int __acr(const mp_no *x, const mp_no *y, int p) {
long i;
@@ -81,21 +79,8 @@ int __acr(const mp_no *x, const mp_no *y, int p) {
return i;
}
-
-/* cr90 compares the values of two multiple precision numbers */
-int __cr(const mp_no *x, const mp_no *y, int p) {
- int i;
-
- if (X[0] > Y[0]) i= 1;
- else if (X[0] < Y[0]) i=-1;
- else if (X[0] < ZERO ) i= __acr(y,x,p);
- else i= __acr(x,y,p);
-
- return i;
-}
-
-
-/* Copy a multiple precision number. Set *y=*x. x=y is permissible. */
+/* Copy multiple precision number X into Y. They could be the same
+ number. */
void __cpy(const mp_no *x, mp_no *y, int p) {
long i;
@@ -105,35 +90,12 @@ void __cpy(const mp_no *x, mp_no *y, int p) {
return;
}
-
-/* Copy a multiple precision number x of precision m into a */
-/* multiple precision number y of precision n. In case n>m, */
-/* the digits of y beyond the m'th are set to zero. In case */
-/* n<m, the digits of x beyond the n'th are ignored. */
-/* x=y is permissible. */
-
-void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
-
- long i,k;
- long n2 = n;
- long m2 = m;
-
- EY = EX; k=MIN(m2,n2);
- for (i=0; i <= k; i++) Y[i] = X[i];
- for ( ; i <= n2; i++) Y[i] = ZERO;
-
- return;
-}
-
-/* Convert a multiple precision number *x into a double precision */
-/* number *y, normalized case (|x| >= 2**(-1022))) */
+/* Convert a multiple precision number *X into a double precision
+ number *Y, normalized case (|x| >= 2**(-1022))). */
static void norm(const mp_no *x, double *y, int p)
{
- #define R radixi.d
+ #define R RADIXI
long i;
-#if 0
- int k;
-#endif
double a,c,u,v,z[5];
if (p<5) {
if (p==1) c = X[1];
@@ -180,18 +142,15 @@ static void norm(const mp_no *x, double *y, int p)
#undef R
}
-/* Convert a multiple precision number *x into a double precision */
-/* number *y, denormalized case (|x| < 2**(-1022))) */
+/* Convert a multiple precision number *X into a double precision
+ number *Y, Denormal case (|x| < 2**(-1022))). */
static void denorm(const mp_no *x, double *y, int p)
{
long i,k;
long p2 = p;
double c,u,z[5];
-#if 0
- double a,v;
-#endif
-#define R radixi.d
+#define R RADIXI
if (EX<-44 || (EX==-44 && X[1]<TWO5))
{ *y=ZERO; return; }
@@ -230,14 +189,9 @@ static void denorm(const mp_no *x, double *y, int p)
#undef R
}
-/* Convert a multiple precision number *x into a double precision number *y. */
-/* The result is correctly rounded to the nearest/even. *x is left unchanged */
-
+/* Convert multiple precision number *X into double precision number *Y. The
+ result is correctly rounded to the nearest/even. */
void __mp_dbl(const mp_no *x, double *y, int p) {
-#if 0
- int i,k;
- double a,c,u,v,z[5];
-#endif
if (X[0] == ZERO) {*y = ZERO; return; }
@@ -246,27 +200,24 @@ void __mp_dbl(const mp_no *x, double *y, int p) {
else denorm(x,y,p);
}
-
-/* dbl_mp() converts a double precision number x into a multiple precision */
-/* number *y. If the precision p is too small the result is truncated. x is */
-/* left unchanged. */
-
+/* Get the multiple precision equivalent of X into *Y. If the precision is too
+ small, the result is truncated. */
void __dbl_mp(double x, mp_no *y, int p) {
long i,n;
long p2 = p;
double u;
- /* Sign */
+ /* Sign. */
if (x == ZERO) {Y[0] = ZERO; return; }
else if (x > ZERO) Y[0] = ONE;
else {Y[0] = MONE; x=-x; }
- /* Exponent */
+ /* Exponent. */
for (EY=ONE; x >= RADIX; EY += ONE) x *= RADIXI;
for ( ; x < ONE; EY -= ONE) x *= RADIX;
- /* Digits */
+ /* Digits. */
n=MIN(p2,4);
for (i=1; i<=n; i++) {
u = (x + TWO52) - TWO52;
@@ -276,13 +227,10 @@ void __dbl_mp(double x, mp_no *y, int p) {
return;
}
-
-/* add_magnitudes() adds the magnitudes of *x & *y assuming that */
-/* abs(*x) >= abs(*y) > 0. */
-/* The sign of the sum *z is undefined. x&y may overlap but not x&z or y&z. */
-/* No guard digit is used. The result equals the exact sum, truncated. */
-/* *x & *y are left unchanged. */
-
+/* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The
+ sign of the sum *Z is not changed. X and Y may overlap but not X and Z or
+ Y and Z. No guard digit is used. The result equals the exact sum,
+ truncated. */
static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
long i,j,k;
@@ -319,13 +267,10 @@ static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
else EZ += ONE;
}
-
-/* sub_magnitudes() subtracts the magnitudes of *x & *y assuming that */
-/* abs(*x) > abs(*y) > 0. */
-/* The sign of the difference *z is undefined. x&y may overlap but not x&z */
-/* or y&z. One guard digit is used. The error is less than one ulp. */
-/* *x & *y are left unchanged. */
-
+/* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
+ The sign of the difference *Z is not changed. X and Y may overlap but not X
+ and Z or Y and Z. One guard digit is used. The error is less than one
+ ULP. */
static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
long i,j,k;
@@ -378,11 +323,9 @@ static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
return;
}
-
-/* Add two multiple precision numbers. Set *z = *x + *y. x&y may overlap */
-/* but not x&z or y&z. One guard digit is used. The error is less than */
-/* one ulp. *x & *y are left unchanged. */
-
+/* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X
+ and Z or Y and Z. One guard digit is used. The error is less than one
+ ULP. */
void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) {
int n;
@@ -402,11 +345,9 @@ void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) {
return;
}
-
-/* Subtract two multiple precision numbers. *z is set to *x - *y. x&y may */
-/* overlap but not x&z or y&z. One guard digit is used. The error is */
-/* less than one ulp. *x & *y are left unchanged. */
-
+/* Subtract *Y from *X and return the result in *Z. X and Y may overlap but
+ not X and Z or Y and Z. One guard digit is used. The error is less than
+ one ULP. */
void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) {
int n;
@@ -426,12 +367,9 @@ void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) {
return;
}
-
-/* Multiply two multiple precision numbers. *z is set to *x * *y. x&y */
-/* may overlap but not x&z or y&z. In case p=1,2,3 the exact result is */
-/* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp. */
-/* *x & *y are left unchanged. */
-
+/* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X
+ and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
+ digits. In case P > 3 the error is bounded by 1.001 ULP. */
void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
long i, i1, i2, j, k, k2;
@@ -449,19 +387,19 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
if (k > p2) {i1=k-p2; i2=p2+1; }
else {i1=1; i2=k; }
#if 1
- /* rearange this inner loop to allow the fmadd instructions to be
+ /* Rearrange this inner loop to allow the fmadd instructions to be
independent and execute in parallel on processors that have
- dual symetrical FP pipelines. */
+ dual symmetrical FP pipelines. */
if (i1 < (i2-1))
{
- /* make sure we have at least 2 iterations */
+ /* Make sure we have at least 2 iterations. */
if (((i2 - i1) & 1L) == 1L)
{
/* Handle the odd iterations case. */
zk2 = x->d[i2-1]*y->d[i1];
}
else
- zk2 = zero.d;
+ zk2 = 0.0;
/* Do two multiply/adds per loop iteration, using independent
accumulators; zk and zk2. */
for (i=i1,j=i2-1; i<i2-1; i+=2,j-=2)
@@ -469,7 +407,7 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
zk += x->d[i]*y->d[j];
zk2 += x->d[i+1]*y->d[j-1];
}
- zk += zk2; /* final sum. */
+ zk += zk2; /* Final sum. */
}
else
{
@@ -477,7 +415,7 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
zk += x->d[i1]*y->d[i1];
}
#else
- /* The orginal code. */
+ /* The original code. */
for (i=i1,j=i2-1; i<i2; i++,j--) zk += X[i]*Y[j];
#endif
@@ -489,7 +427,7 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
}
Z[k] = zk;
- /* Is there a carry beyond the most significant digit? */
+ /* Is there a carry beyond the most significant digit? */
if (Z[1] == ZERO) {
for (i=1; i<=p2; i++) Z[i]=Z[i+1];
EZ = EX + EY - 1; }
@@ -500,17 +438,14 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
return;
}
+/* Invert *X and store in *Y. Relative error bound:
+ - For P = 2: 1.001 * R ^ (1 - P)
+ - For P = 3: 1.063 * R ^ (1 - P)
+ - For P > 3: 2.001 * R ^ (1 - P)
-/* Invert a multiple precision number. Set *y = 1 / *x. */
-/* Relative error bound = 1.001*r**(1-p) for p=2, 1.063*r**(1-p) for p=3, */
-/* 2.001*r**(1-p) for p>3. */
-/* *x=0 is not permissible. *x is left unchanged. */
-
+ *X = 0 is not permissible. */
void __inv(const mp_no *x, mp_no *y, int p) {
long i;
-#if 0
- int l;
-#endif
double t;
mp_no z,w;
static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3,
@@ -532,12 +467,13 @@ void __inv(const mp_no *x, mp_no *y, int p) {
return;
}
+/* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z
+ or Y and Z. Relative error bound:
+ - For P = 2: 2.001 * R ^ (1 - P)
+ - For P = 3: 2.063 * R ^ (1 - P)
+ - For P > 3: 3.001 * R ^ (1 - P)
-/* Divide one multiple precision number by another.Set *z = *x / *y. *x & *y */
-/* are left unchanged. x&y may overlap but not x&z or y&z. */
-/* Relative error bound = 2.001*r**(1-p) for p=2, 2.063*r**(1-p) for p=3 */
-/* and 3.001*r**(1-p) for p>3. *y=0 is not permissible. */
-
+ *X = 0 is not permissible. */
void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) {
mp_no w;
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c b/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c
index 2db82810e..d93f50544 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c
+++ b/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c
@@ -1,7 +1,7 @@
/*
* IBM Accurate Mathematical Library
* written by International Business Machines Corp.
- * Copyright (C) 2001, 2007 Free Software Foundation
+ * Copyright (C) 2001-2013 Free Software Foundation, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c b/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c
index fdb27718e..7c97d9581 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c
+++ b/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c
@@ -1,7 +1,7 @@
/*
* IBM Accurate Mathematical Library
* written by International Business Machines Corp.
- * Copyright (C) 2001, 2006 Free Software Foundation
+ * Copyright (C) 2001-2013 Free Software Foundation, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
@@ -59,7 +59,7 @@ __slowpow (double x, double y, double z)
res1 = (double) (ldpp - ldeps);
if (res != res1) /* if result still not accurate enough */
- { /* use mpa for higher persision. */
+ { /* use mpa for higher precision. */
mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1;
static const mp_no eps = { -3, {1.0, 4.0} };
int p;
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrt.c b/libc/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrt.c
index 1bd6a67a9..bd0f9f04f 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrt.c
+++ b/libc/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrt.c
@@ -1,5 +1,5 @@
/* Double-precision floating point square root wrapper.
- Copyright (C) 2004, 2007, 2012 Free Software Foundation, Inc.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrtf.c b/libc/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrtf.c
index 0e7e6923a..07c4dc156 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrtf.c
+++ b/libc/sysdeps/powerpc/powerpc64/power4/fpu/w_sqrtf.c
@@ -1,5 +1,5 @@
/* Single-precision floating point square root wrapper.
- Copyright (C) 2004, 2007, 2012 Free Software Foundation, Inc.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S b/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S
index ce323f1e2..7df52f810 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S
@@ -1,5 +1,5 @@
/* Optimized strcmp implementation for PowerPC64.
- Copyright (C) 2003, 2006, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2003-2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -53,7 +53,7 @@ EALIGN (BP_SYM(memcmp), 4, 0)
beq- cr6, L(zeroLength)
dcbt 0,rSTR1
dcbt 0,rSTR2
-/* If less than 8 bytes or not aligned, use the unalligned
+/* If less than 8 bytes or not aligned, use the unaligned
byte loop. */
blt cr1, L(bytealigned)
std rWORD8,-8(r1)
@@ -62,7 +62,7 @@ EALIGN (BP_SYM(memcmp), 4, 0)
cfi_offset(rWORD7,-16)
bne L(unaligned)
/* At this point we know both strings have the same alignment and the
- compare length is at least 8 bytes. rBITDIF containes the low order
+ compare length is at least 8 bytes. rBITDIF contains the low order
3 bits of rSTR1 and cr5 contains the result of the logical compare
of rBITDIF to 0. If rBITDIF == 0 then we are already double word
aligned and can perform the DWaligned loop.
@@ -70,7 +70,7 @@ EALIGN (BP_SYM(memcmp), 4, 0)
Otherwise we know the two strings have the same alignment (but not
yet DW). So we can force the string addresses to the next lower DW
boundary and special case this first DW word using shift left to
- ellimiate bits preceeding the first byte. Since we want to join the
+ eliminate bits preceding the first byte. Since we want to join the
normal (DWaligned) compare loop, starting at the second double word,
we need to adjust the length (rN) and special case the loop
versioning for the first DW. This insures that the loop count is
@@ -152,8 +152,8 @@ L(DWaligned):
L(dP1):
mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
- (8-15 byte compare), we want to use only volitile registers. This
- means we can avoid restoring non-volitile registers since we did not
+ (8-15 byte compare), we want to use only volatile registers. This
+ means we can avoid restoring non-volatile registers since we did not
change any on the early exit path. The key here is the non-early
exit path only cares about the condition code (cr5), not about which
register pair was used. */
@@ -215,7 +215,7 @@ L(dP2e):
bne cr5, L(dLcr5)
b L(dLoop2)
/* Again we are on a early exit path (16-23 byte compare), we want to
- only use volitile registers and avoid restoring non-volitile
+ only use volatile registers and avoid restoring non-volatile
registers. */
.align 4
L(dP2x):
@@ -256,7 +256,7 @@ L(dP3e):
bne cr6, L(dLcr6)
b L(dLoop1)
/* Again we are on a early exit path (24-31 byte compare), we want to
- only use volitile registers and avoid restoring non-volitile
+ only use volatile registers and avoid restoring non-volatile
registers. */
.align 4
L(dP3x):
@@ -340,7 +340,7 @@ L(d04):
beq L(zeroLength)
/* At this point we have a remainder of 1 to 7 bytes to compare. Since
we are aligned it is safe to load the whole double word, and use
- shift right double to elliminate bits beyond the compare length. */
+ shift right double to eliminate bits beyond the compare length. */
L(d00):
ld rWORD1, 8(rSTR1)
ld rWORD2, 8(rSTR2)
@@ -496,15 +496,15 @@ L(zeroLength):
.align 4
/* At this point we know the strings have different alignment and the
- compare length is at least 8 bytes. rBITDIF containes the low order
+ compare length is at least 8 bytes. rBITDIF contains the low order
3 bits of rSTR1 and cr5 contains the result of the logical compare
of rBITDIF to 0. If rBITDIF == 0 then rStr1 is double word
aligned and can perform the DWunaligned loop.
- Otherwise we know that rSTR1 is not aready DW aligned yet.
+ Otherwise we know that rSTR1 is not already DW aligned yet.
So we can force the string addresses to the next lower DW
boundary and special case this first DW word using shift left to
- ellimiate bits preceeding the first byte. Since we want to join the
+ eliminate bits preceding the first byte. Since we want to join the
normal (DWaligned) compare loop, starting at the second double word,
we need to adjust the length (rN) and special case the loop
versioning for the first DW. This insures that the loop count is
@@ -537,7 +537,7 @@ L(unaligned):
clrrdi rSTR2, rSTR2, 3
std r26,-48(r1)
cfi_offset(r26,-48)
-/* Compute the leaft/right shift counts for the unalign rSTR2,
+/* Compute the left/right shift counts for the unalign rSTR2,
compensating for the logical (DW aligned) start of rSTR1. */
clrldi rSHL, r27, 61
clrrdi rSTR1, rSTR1, 3
@@ -876,7 +876,7 @@ L(du14):
sldi. rN, rN, 3
bne cr5, L(duLcr5)
/* At this point we have a remainder of 1 to 7 bytes to compare. We use
- shift right double to elliminate bits beyond the compare length.
+ shift right double to eliminate bits beyond the compare length.
This allows the use of double word subtract to compute the final
result.
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S
index fbc815d90..734434af0 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S
@@ -1,5 +1,5 @@
/* Optimized memcpy implementation for PowerPC64.
- Copyright (C) 2003, 2006, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2003-2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -28,11 +28,11 @@
with the appropriate combination of byte and halfword load/stores.
There is minimal effort to optimize the alignment of short moves.
The 64-bit implementations of POWER3 and POWER4 do a reasonable job
- of handling unligned load/stores that do not cross 32-byte boundries.
+ of handling unaligned load/stores that do not cross 32-byte boundaries.
Longer moves (>= 32-bytes) justify the effort to get at least the
destination doubleword (8-byte) aligned. Further optimization is
- posible when both source and destination are doubleword aligned.
+ possible when both source and destination are doubleword aligned.
Each case has a optimized unrolled loop. */
.machine power4
@@ -44,9 +44,9 @@ EALIGN (BP_SYM (memcpy), 5, 0)
std 3,-16(1)
std 31,-8(1)
cfi_offset(31,-8)
- andi. 11,3,7 /* check alignement of dst. */
+ andi. 11,3,7 /* check alignment of dst. */
clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */
- clrldi 10,4,61 /* check alignement of src. */
+ clrldi 10,4,61 /* check alignment of src. */
cmpldi cr6,5,8
ble- cr1,.L2 /* If move < 32 bytes use short move code. */
cmpld cr6,10,11
@@ -57,7 +57,7 @@ EALIGN (BP_SYM (memcpy), 5, 0)
beq .L0
subf 31,0,5
- /* Move 0-7 bytes as needed to get the destination doubleword alligned. */
+ /* Move 0-7 bytes as needed to get the destination doubleword aligned. */
1: bf 31,2f
lbz 6,0(12)
addi 12,12,1
@@ -74,10 +74,10 @@ EALIGN (BP_SYM (memcpy), 5, 0)
stw 6,0(3)
addi 3,3,4
0:
- clrldi 10,12,61 /* check alignement of src again. */
+ clrldi 10,12,61 /* check alignment of src again. */
srdi 9,31,3 /* Number of full double words remaining. */
- /* Copy doublewords from source to destination, assumpting the
+ /* Copy doublewords from source to destination, assuming the
destination is aligned on a doubleword boundary.
At this point we know there are at least 25 bytes left (32-7) to copy.
@@ -154,7 +154,7 @@ EALIGN (BP_SYM (memcpy), 5, 0)
add 12,12,0
/* At this point we have a tail of 0-7 bytes and we know that the
- destiniation is double word aligned. */
+ destination is double word aligned. */
4: bf 29,2f
lwz 6,0(12)
addi 12,12,4
@@ -284,7 +284,7 @@ EALIGN (BP_SYM (memcpy), 5, 0)
bne cr6,4f
/* Would have liked to use use ld/std here but the 630 processors are
slow for load/store doubles that are not at least word aligned.
- Unaligned Load/Store word execute with only a 1 cycle penaltity. */
+ Unaligned Load/Store word execute with only a 1 cycle penalty. */
lwz 6,0(4)
lwz 7,4(4)
stw 6,0(3)
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memset.S b/libc/sysdeps/powerpc/powerpc64/power4/memset.S
index c86a68a04..198269272 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/memset.S
@@ -1,6 +1,5 @@
/* Optimized memset implementation for PowerPC64.
- Copyright (C) 1997, 1999, 2000, 2002, 2003, 2007
- Free Software Foundation, Inc.
+ Copyright (C) 1997-2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S b/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S
index a4a6562de..19877fa78 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S
@@ -1,5 +1,5 @@
/* Optimized strcmp implementation for PowerPC64.
- Copyright (C) 2003, 2006, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2003-2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -52,7 +52,7 @@ EALIGN (BP_SYM(strncmp), 4, 0)
cmpldi cr1, rN, 0
lis rFEFE, -0x101
bne L(unaligned)
-/* We are doubleword alligned so set up for two loops. first a double word
+/* We are doubleword aligned so set up for two loops. first a double word
loop, then fall into the byte loop if any residual. */
srdi. rTMP, rN, 3
clrldi rN, rN, 61