diff options
author | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2013-10-18 21:33:25 +0000 |
---|---|---|
committer | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2013-10-18 21:33:25 +0000 |
commit | fe2ed5aaa408e1ab996a9fe1595a05634208a79c (patch) | |
tree | e1027fbc9d8a4a8c33f8149b2b42e8cde89c74f6 /libc/sysdeps/powerpc/powerpc32/strchr.S | |
parent | 571c782b982d888565e7d06bfc2f3d47582fe829 (diff) |
Merge changes between r23946 and r24305 from /fsf/trunk.
git-svn-id: svn://svn.eglibc.org/trunk@24306 7b3dc134-2b1b-0410-93df-9e9f96275f8d
Diffstat (limited to 'libc/sysdeps/powerpc/powerpc32/strchr.S')
-rw-r--r-- | libc/sysdeps/powerpc/powerpc32/strchr.S | 71 |
1 files changed, 51 insertions, 20 deletions
diff --git a/libc/sysdeps/powerpc/powerpc32/strchr.S b/libc/sysdeps/powerpc/powerpc32/strchr.S index c9952eecc..605056577 100644 --- a/libc/sysdeps/powerpc/powerpc32/strchr.S +++ b/libc/sysdeps/powerpc/powerpc32/strchr.S @@ -36,6 +36,8 @@ ENTRY (strchr) #define rIGN r10 /* number of bits we should ignore in the first word */ #define rMASK r11 /* mask with the bits to ignore set to 0 */ #define rTMP3 r12 +#define rTMP4 rIGN +#define rTMP5 rMASK rlwimi rCHR, rCHR, 8, 16, 23 @@ -49,64 +51,93 @@ ENTRY (strchr) addi r7F7F, r7F7F, 0x7f7f /* Test the first (partial?) word. */ lwz rWORD, 0(rSTR) +#ifdef __LITTLE_ENDIAN__ + slw rMASK, rMASK, rIGN +#else srw rMASK, rMASK, rIGN +#endif orc rWORD, rWORD, rMASK add rTMP1, rFEFE, rWORD nor rTMP2, r7F7F, rWORD - and. rTMP1, rTMP1, rTMP2 + and. rTMP4, rTMP1, rTMP2 xor rTMP3, rCHR, rWORD orc rTMP3, rTMP3, rMASK b L(loopentry) /* The loop. */ -L(loop):lwzu rWORD, 4(rSTR) - and. rTMP1, rTMP1, rTMP2 +L(loop): + lwzu rWORD, 4(rSTR) + and. rTMP5, rTMP1, rTMP2 /* Test for 0. */ - add rTMP1, rFEFE, rWORD - nor rTMP2, r7F7F, rWORD + add rTMP1, rFEFE, rWORD /* x - 0x01010101. */ + nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */ bne L(foundit) - and. rTMP1, rTMP1, rTMP2 + and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */ /* Start test for the bytes we're looking for. */ xor rTMP3, rCHR, rWORD L(loopentry): add rTMP1, rFEFE, rTMP3 nor rTMP2, r7F7F, rTMP3 beq L(loop) + /* There is a zero byte in the word, but may also be a matching byte (either before or after the zero byte). In fact, we may be looking for a - zero byte, in which case we return a match. We guess that this hasn't - happened, though. */ -L(missed): - and. rTMP1, rTMP1, rTMP2 + zero byte, in which case we return a match. */ + and. rTMP5, rTMP1, rTMP2 li rRTN, 0 beqlr -/* It did happen. Decide which one was first... - I'm not sure if this is actually faster than a sequence of - rotates, compares, and branches (we use it anyway because it's shorter). */ +/* At this point: + rTMP5 bytes are 0x80 for each match of c, 0 otherwise. + rTMP4 bytes are 0x80 for each match of 0, 0 otherwise. + But there may be false matches in the next most significant byte from + a true match due to carries. This means we need to recalculate the + matches using a longer method for big-endian. */ +#ifdef __LITTLE_ENDIAN__ + addi rTMP1, rTMP5, -1 + andc rTMP1, rTMP1, rTMP5 + cntlzw rCLZB, rTMP1 + addi rTMP2, rTMP4, -1 + andc rTMP2, rTMP2, rTMP4 + cmplw rTMP1, rTMP2 + bgtlr + subfic rCLZB, rCLZB, 32-7 +#else +/* I think we could reduce this by two instructions by keeping the "nor" + results from the loop for reuse here. See strlen.S tail. Similarly + one instruction could be pruned from L(foundit). */ and rFEFE, r7F7F, rWORD - or rMASK, r7F7F, rWORD + or rTMP5, r7F7F, rWORD and rTMP1, r7F7F, rTMP3 - or rIGN, r7F7F, rTMP3 + or rTMP4, r7F7F, rTMP3 add rFEFE, rFEFE, r7F7F add rTMP1, rTMP1, r7F7F - nor rWORD, rMASK, rFEFE - nor rTMP2, rIGN, rTMP1 + nor rWORD, rTMP5, rFEFE + nor rTMP2, rTMP4, rTMP1 + cntlzw rCLZB, rTMP2 cmplw rWORD, rTMP2 bgtlr - cntlzw rCLZB, rTMP2 +#endif srwi rCLZB, rCLZB, 3 add rRTN, rSTR, rCLZB blr L(foundit): +#ifdef __LITTLE_ENDIAN__ + addi rTMP1, rTMP5, -1 + andc rTMP1, rTMP1, rTMP5 + cntlzw rCLZB, rTMP1 + subfic rCLZB, rCLZB, 32-7-32 + srawi rCLZB, rCLZB, 3 +#else and rTMP1, r7F7F, rTMP3 - or rIGN, r7F7F, rTMP3 + or rTMP4, r7F7F, rTMP3 add rTMP1, rTMP1, r7F7F - nor rTMP2, rIGN, rTMP1 + nor rTMP2, rTMP4, rTMP1 cntlzw rCLZB, rTMP2 subi rSTR, rSTR, 4 srwi rCLZB, rCLZB, 3 +#endif add rRTN, rSTR, rCLZB blr END (strchr) |