diff options
author | Yongqin Liu <yongqin.liu@linaro.org> | 2016-01-06 19:25:56 +0800 |
---|---|---|
committer | Yongqin Liu <yongqin.liu@linaro.org> | 2016-01-06 19:25:56 +0800 |
commit | 54ff4b4be83521c8cb989d4bf7253a04abaa376e (patch) | |
tree | c09435b37fa7182d09d6ace6f407c32757343c6d | |
parent | c5f478ced67e06e5f43a2b948a0061eed7cb327f (diff) |
clean for static-binary
Signed-off-by: Yongqin Liu <yongqin.liu@linaro.org>
-rw-r--r-- | android-tools/static-binary/src/main.c | 108 | ||||
-rw-r--r-- | android-tools/static-binary/src/memcpy_base.S | 321 | ||||
-rw-r--r-- | android-tools/static-binary/src/memcpy_bionic.S | 127 | ||||
-rw-r--r-- | android-tools/static-binary/src/memcpy_impl.S | 627 | ||||
-rw-r--r-- | android-tools/static-binary/src/memcpy_impl_glibc217.S | 296 | ||||
-rw-r--r-- | android-tools/static-binary/src/memcpy_neon.S | 3 | ||||
-rw-r--r-- | android-tools/static-binary/src/memcpy_none.S | 2 | ||||
-rw-r--r-- | android-tools/static-binary/src/memcpy_vfp.S | 3 |
8 files changed, 0 insertions, 1487 deletions
diff --git a/android-tools/static-binary/src/main.c b/android-tools/static-binary/src/main.c deleted file mode 100644 index 7bc2617..0000000 --- a/android-tools/static-binary/src/main.c +++ /dev/null @@ -1,108 +0,0 @@ -#include <stdio.h> -#include <stdint.h> -#include <string.h> -#include <stdlib.h> -#include <sys/time.h> - -void *ad_memcpy_neon(void *dest, const void *src, size_t n); -void *ad_memcpy_vfp(void *dest, const void *src, size_t n); -void *ad_memcpy_none(void *dest, const void *src, size_t n); -void *ad_memcpy_glibc217(void *dest, const void *src, size_t n); -void *ad_memcpy_bionic(void *dest, const void *src, size_t n); - -#define N_ELEMENTS(x) (sizeof(x) / sizeof((x)[0])) - -struct bench { - unsigned int len; - unsigned long long loops; -}; - -int -main (int argc, char *argv[]) -{ - static const struct bench benches[] = { - { .len = 8, .loops = 16000000LLU }, - { .len = 81, .loops = 8000000LLU }, - { .len = 8192, .loops = 200000LLU }, - { .len = 131072, .loops = 5000LLU }, - { .len = 1048576 * 10, .loops = 10LLU }, - }; - - for (int iter = 0; iter < N_ELEMENTS (benches); ++iter) { - const struct bench *bench = &benches[iter]; - unsigned int len = bench->len; - unsigned long long loops; - - unsigned char *src; - unsigned char *dst; - struct timeval start, end; - double mbps; - - printf ("benchmarking: len: %8u loops: %llu\n", len, bench->loops); - - src = malloc (len); - dst = malloc (len); - memset (src, 0xaa, len); - - gettimeofday (&start, NULL); - loops = bench->loops + 1; - while (--loops) - ad_memcpy_neon (dst, src, len); - gettimeofday (&end, NULL); - timersub (&end, &start, &end); - mbps = (bench->loops * bench->len * 1000000.0d) / (double)(end.tv_sec * 1000000.0d + end.tv_usec) / 1024 / 1024; - printf (" memcpy (neon) took %2ju.%.6ju s ~ %'.1f MiB/s\n", (uintmax_t)end.tv_sec, (uintmax_t)end.tv_usec, mbps); - - gettimeofday (&start, NULL); - loops = bench->loops + 1; - while (--loops) - ad_memcpy_vfp (dst, src, len); - gettimeofday (&end, NULL); - timersub (&end, &start, &end); - mbps = (bench->loops * bench->len * 1000000.0d) / (double)(end.tv_sec * 1000000.0d + end.tv_usec) / 1024 / 1024; - printf (" memcpy (vfp) took %2ju.%.6ju s ~ %'.1f MiB/s\n", (uintmax_t)end.tv_sec, (uintmax_t)end.tv_usec, mbps); - - gettimeofday (&start, NULL); - loops = bench->loops + 1; - while (--loops) - ad_memcpy_none (dst, src, len); - gettimeofday (&end, NULL); - timersub (&end, &start, &end); - mbps = (bench->loops * bench->len * 1000000.0d) / (double)(end.tv_sec * 1000000.0d + end.tv_usec) / 1024 / 1024; - printf (" memcpy (none) took %2ju.%.6ju s ~ %'.1f MiB/s\n", (uintmax_t)end.tv_sec, (uintmax_t)end.tv_usec, mbps); - - gettimeofday (&start, NULL); - loops = bench->loops + 1; - while (--loops) - ad_memcpy_glibc217 (dst, src, len); - gettimeofday (&end, NULL); - timersub (&end, &start, &end); - mbps = (bench->loops * bench->len * 1000000.0d) / (double)(end.tv_sec * 1000000.0d + end.tv_usec) / 1024 / 1024; - printf (" memcpy (old arm generic) took %2ju.%.6ju s ~ %'.1f MiB/s\n", (uintmax_t)end.tv_sec, (uintmax_t)end.tv_usec, mbps); - - gettimeofday (&start, NULL); - loops = bench->loops + 1; - while (--loops) - ad_memcpy_bionic (dst, src, len); - gettimeofday (&end, NULL); - timersub (&end, &start, &end); - mbps = (bench->loops * bench->len * 1000000.0d) / (double)(end.tv_sec * 1000000.0d + end.tv_usec) / 1024 / 1024; - printf (" memcpy (bionic) took %2ju.%.6ju s ~ %'.1f MiB/s\n", (uintmax_t)end.tv_sec, (uintmax_t)end.tv_usec, mbps); - - gettimeofday (&start, NULL); - loops = bench->loops + 1; - while (--loops) - memcpy (dst, src, len); - gettimeofday (&end, NULL); - timersub (&end, &start, &end); - mbps = (bench->loops * bench->len * 1000000.0d) / (double)(end.tv_sec * 1000000.0d + end.tv_usec) / 1024 / 1024; - printf (" memcpy (curr toolchain - dynamic) took %2ju.%.6ju s ~ %'.1f MiB/s\n", (uintmax_t)end.tv_sec, (uintmax_t)end.tv_usec, mbps); - - free (dst); - free (src); - - puts (""); - } - - return 0; -} diff --git a/android-tools/static-binary/src/memcpy_base.S b/android-tools/static-binary/src/memcpy_base.S deleted file mode 100644 index 721e9bc..0000000 --- a/android-tools/static-binary/src/memcpy_base.S +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * Copyright (c) 2013 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Define an entry point visible from C. */ -#define C_LABEL(name) name: - -ENTRY_PRIVATE(MEMCPY_BASE) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - - // Assumes that n >= 0, and dst, src are valid pointers. - // For any sizes less than 832 use the neon code that doesn't - // care about the src alignment. This avoids any checks - // for src alignment, and offers the best improvement since - // smaller sized copies are dominated by the overhead of - // the pre and post main loop. - // For larger copies, if src and dst cannot both be aligned to - // word boundaries, use the neon code. - // For all other copies, align dst to a double word boundary - // and copy using LDRD/STRD instructions. - - cmp r2, #16 - blo .L_copy_less_than_16_unknown_align - - // TODO: The aligned copy code is extremely slow copying some large - // buffers so always go through the unaligned path for now. - //cmp r2, #832 - //bge .L_check_alignment - -.L_copy_unknown_alignment: - // Unknown alignment of src and dst. - // Assumes that the first few bytes have already been prefetched. - - // Align destination to 128 bits. The mainloop store instructions - // require this alignment or they will throw an exception. - rsb r3, r0, #0 - ands r3, r3, #0xF - beq 2f - - // Copy up to 15 bytes (count in r3). - sub r2, r2, r3 - movs ip, r3, lsl #31 - - itt mi - ldrmib lr, [r1], #1 - strmib lr, [r0], #1 - itttt cs - ldrcsb ip, [r1], #1 - ldrcsb lr, [r1], #1 - strcsb ip, [r0], #1 - strcsb lr, [r0], #1 - - movs ip, r3, lsl #29 - bge 1f - // Copies 4 bytes, dst 32 bits aligned before, at least 64 bits after. - vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! - vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]! -1: bcc 2f - // Copies 8 bytes, dst 64 bits aligned before, at least 128 bits after. - vld1.8 {d0}, [r1]! - vst1.8 {d0}, [r0, :64]! - -2: // Make sure we have at least 64 bytes to copy. - subs r2, r2, #64 - blo 2f - -1: // The main loop copies 64 bytes at a time. - vld1.8 {d0 - d3}, [r1]! - vld1.8 {d4 - d7}, [r1]! - pld [r1, #(64*4)] - subs r2, r2, #64 - vst1.8 {d0 - d3}, [r0, :128]! - vst1.8 {d4 - d7}, [r0, :128]! - bhs 1b - -2: // Fix-up the remaining count and make sure we have >= 32 bytes left. - adds r2, r2, #32 - blo 3f - - // 32 bytes. These cache lines were already preloaded. - vld1.8 {d0 - d3}, [r1]! - sub r2, r2, #32 - vst1.8 {d0 - d3}, [r0, :128]! -3: // Less than 32 left. - add r2, r2, #32 - tst r2, #0x10 - beq .L_copy_less_than_16_unknown_align - // Copies 16 bytes, destination 128 bits aligned. - vld1.8 {d0, d1}, [r1]! - vst1.8 {d0, d1}, [r0, :128]! - -.L_copy_less_than_16_unknown_align: - // Copy up to 15 bytes (count in r2). - movs ip, r2, lsl #29 - bcc 1f - vld1.8 {d0}, [r1]! - vst1.8 {d0}, [r0]! -1: bge 2f - vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! - vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]! - -2: // Copy 0 to 4 bytes. - lsls r2, r2, #31 - itt ne - ldrneb lr, [r1], #1 - strneb lr, [r0], #1 - itttt cs - ldrcsb ip, [r1], #1 - ldrcsb lr, [r1] - strcsb ip, [r0], #1 - strcsb lr, [r0] - - pop {r0, pc} - -.L_check_alignment: - // If src and dst cannot both be aligned to a word boundary, - // use the unaligned copy version. - eor r3, r0, r1 - ands r3, r3, #0x3 - bne .L_copy_unknown_alignment -END(MEMCPY_BASE) - -ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED) - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 - - // To try and improve performance, stack layout changed, - // i.e., not keeping the stack looking like users expect - // (highest numbered register at highest address). - strd r4, r5, [sp, #-8]! - .cfi_adjust_cfa_offset 8 - .cfi_rel_offset r4, 0 - .cfi_rel_offset r5, 4 - strd r6, r7, [sp, #-8]! - .cfi_adjust_cfa_offset 8 - .cfi_rel_offset r6, 0 - .cfi_rel_offset r7, 0 - strd r8, r9, [sp, #-8]! - .cfi_adjust_cfa_offset 8 - .cfi_rel_offset r8, 0 - .cfi_rel_offset r9, 4 - - // Optimized for already aligned dst code. - ands ip, r0, #3 - bne .L_dst_not_word_aligned - -.L_word_aligned: - // Align the destination buffer to 8 bytes, to make sure double - // loads and stores don't cross a cache line boundary, - // as they are then more expensive even if the data is in the cache - // (require two load/store issue cycles instead of one). - // If only one of the buffers is not 8 bytes aligned, - // then it's more important to align dst than src, - // because there is more penalty for stores - // than loads that cross a cacheline boundary. - // This check and realignment are only done if there is >= 832 - // bytes to copy. - - // Dst is word aligned, but check if it is already double word aligned. - ands r3, r0, #4 - beq 1f - ldr r3, [r1], #4 - str r3, [r0], #4 - sub r2, #4 - -1: // Can only get here if > 64 bytes to copy, so don't do check r2. - sub r2, #64 - -2: // Every loop iteration copies 64 bytes. - .irp offset, #0, #8, #16, #24, #32 - ldrd r4, r5, [r1, \offset] - strd r4, r5, [r0, \offset] - .endr - - ldrd r4, r5, [r1, #40] - ldrd r6, r7, [r1, #48] - ldrd r8, r9, [r1, #56] - - // Keep the pld as far from the next load as possible. - // The amount to prefetch was determined experimentally using - // large sizes, and verifying the prefetch size does not affect - // the smaller copies too much. - // WARNING: If the ldrd and strd instructions get too far away - // from each other, performance suffers. Three loads - // in a row is the best tradeoff. - pld [r1, #(64*16)] - strd r4, r5, [r0, #40] - strd r6, r7, [r0, #48] - strd r8, r9, [r0, #56] - - add r0, r0, #64 - add r1, r1, #64 - subs r2, r2, #64 - bge 2b - - // Fix-up the remaining count and make sure we have >= 32 bytes left. - adds r2, r2, #32 - blo 4f - - // Copy 32 bytes. These cache lines were already preloaded. - .irp offset, #0, #8, #16, #24 - ldrd r4, r5, [r1, \offset] - strd r4, r5, [r0, \offset] - .endr - add r1, r1, #32 - add r0, r0, #32 - sub r2, r2, #32 -4: // Less than 32 left. - add r2, r2, #32 - tst r2, #0x10 - beq 5f - // Copy 16 bytes. - .irp offset, #0, #8 - ldrd r4, r5, [r1, \offset] - strd r4, r5, [r0, \offset] - .endr - add r1, r1, #16 - add r0, r0, #16 - -5: // Copy up to 15 bytes (count in r2). - movs ip, r2, lsl #29 - bcc 1f - // Copy 8 bytes. - ldrd r4, r5, [r1], #8 - strd r4, r5, [r0], #8 -1: bge 2f - // Copy 4 bytes. - ldr r4, [r1], #4 - str r4, [r0], #4 -2: // Copy 0 to 4 bytes. - lsls r2, r2, #31 - itt ne - ldrneb lr, [r1], #1 - strneb lr, [r0], #1 - itttt cs - ldrcsb ip, [r1], #1 - ldrcsb lr, [r1] - strcsb ip, [r0], #1 - strcsb lr, [r0] - - // Restore registers: optimized pop {r0, pc} - ldrd r8, r9, [sp], #8 - ldrd r6, r7, [sp], #8 - ldrd r4, r5, [sp], #8 - pop {r0, pc} - -.L_dst_not_word_aligned: - // Align dst to word. - rsb ip, ip, #4 - cmp ip, #2 - - itt gt - ldrgtb lr, [r1], #1 - strgtb lr, [r0], #1 - - itt ge - ldrgeb lr, [r1], #1 - strgeb lr, [r0], #1 - - ldrb lr, [r1], #1 - strb lr, [r0], #1 - - sub r2, r2, ip - - // Src is guaranteed to be at least word aligned by this point. - b .L_word_aligned -END(MEMCPY_BASE_ALIGNED) diff --git a/android-tools/static-binary/src/memcpy_bionic.S b/android-tools/static-binary/src/memcpy_bionic.S deleted file mode 100644 index 6b3537c..0000000 --- a/android-tools/static-binary/src/memcpy_bionic.S +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * Copyright (c) 2013 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -// Prototype: void *memcpy (void *dst, const void *src, size_t count). - -#define memcpy ad_memcpy_bionic -// TODO: Try below? -//#define __memcpy_base ad_memcpy_bionic -//#define __memcpy_base_aligned ad_memcpy_bionic - -/* Define an entry point visible from C. */ -#define C_LABEL(name) name: - -#define ENTRY(name) \ - .global name; \ - .type name, %function; \ - .align 4; \ - C_LABEL(name) \ - .cfi_sections .debug_frame; \ - .cfi_startproc; - -#undef END -#define END(name) \ - .cfi_endproc; - -/* Like ENTRY, but with hidden visibility. */ -#define ENTRY_PRIVATE(f) \ - ENTRY(f); \ - .hidden f - - .text - .syntax unified - .fpu neon - -/* -ENTRY(__memcpy_chk) - cmp r2, r3 - bhi __memcpy_chk_fail - - // Fall through to memcpy... -END(__memcpy_chk) -*/ - -ENTRY(memcpy) - pld [r1, #64] - push {r0, lr} - .cfi_def_cfa_offset 8 - .cfi_rel_offset r0, 0 - .cfi_rel_offset lr, 4 -END(memcpy) - -#define MEMCPY_BASE __memcpy_base -#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned -#include "memcpy_base.S" - -ENTRY_PRIVATE(__memcpy_chk_fail) - // Preserve lr for backtrace. - push {lr} - .cfi_def_cfa_offset 4 - .cfi_rel_offset lr, 0 - - ldr r0, error_message - ldr r1, error_code -1: - add r0, pc - //bl __fortify_chk_fail -error_code: - .word 80100 //BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW -error_message: - .word error_string-(1b+8) -END(__memcpy_chk_fail) - - .data -error_string: - .string "memcpy: prevented write past end of buffer" diff --git a/android-tools/static-binary/src/memcpy_impl.S b/android-tools/static-binary/src/memcpy_impl.S deleted file mode 100644 index f6e06f4..0000000 --- a/android-tools/static-binary/src/memcpy_impl.S +++ /dev/null @@ -1,627 +0,0 @@ - /* Copyright (c) 2013, Linaro Limited - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of Linaro Limited nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - */ - - /* - This memcpy routine is optimised for Cortex-A15 cores and takes advantage - of VFP or NEON when built with the appropriate flags. - - Assumptions: - - ARMv6 (ARMv7-a if using Neon) - ARM state - Unaligned accesses - - */ - -/* Define an entry point visible from C. */ -#define C_LABEL(name) name: - -#define ENTRY(name) \ - .global name; \ - .type name, %function; \ - .align 4; \ - C_LABEL(name) \ - .cfi_sections .debug_frame; \ - .cfi_startproc; - -#undef END -#define END(name) \ - .cfi_endproc; - - - .syntax unified - /* This implementation requires ARM state. */ - .arm - -#ifdef MEMCPY_NEON - - .fpu neon - .arch armv7-a - # define FRAME_SIZE 4 - # define USE_VFP - # define USE_NEON - -#elif defined (MEMCPY_VFP) - - .arch armv6 - .fpu vfpv2 - # define FRAME_SIZE 32 - # define USE_VFP - -#else - .arch armv6 - # define FRAME_SIZE 32 - -#endif - - /* Old versions of GAS incorrectly implement the NEON align semantics. */ - #ifdef BROKEN_ASM_NEON_ALIGN - #define ALIGN(addr, align) addr,:align - #else - #define ALIGN(addr, align) addr:align - #endif - - #define PC_OFFSET 8 /* PC pipeline compensation. */ - #define INSN_SIZE 4 - - /* Call parameters. */ - #define dstin r0 - #define src r1 - #define count r2 - - /* Locals. */ - #define tmp1 r3 - #define dst ip - #define tmp2 r10 - - #ifndef USE_NEON - /* For bulk copies using GP registers. */ - #define A_l r2 /* Call-clobbered. */ - #define A_h r3 /* Call-clobbered. */ - #define B_l r4 - #define B_h r5 - #define C_l r6 - #define C_h r7 - #define D_l r8 - #define D_h r9 - #endif - - /* Number of lines ahead to pre-fetch data. If you change this the code - below will need adjustment to compensate. */ - - #define prefetch_lines 5 - - #ifdef USE_VFP - .macro cpy_line_vfp vreg, base - vstr \vreg, [dst, #\base] - vldr \vreg, [src, #\base] - vstr d0, [dst, #\base + 8] - vldr d0, [src, #\base + 8] - vstr d1, [dst, #\base + 16] - vldr d1, [src, #\base + 16] - vstr d2, [dst, #\base + 24] - vldr d2, [src, #\base + 24] - vstr \vreg, [dst, #\base + 32] - vldr \vreg, [src, #\base + prefetch_lines * 64 - 32] - vstr d0, [dst, #\base + 40] - vldr d0, [src, #\base + 40] - vstr d1, [dst, #\base + 48] - vldr d1, [src, #\base + 48] - vstr d2, [dst, #\base + 56] - vldr d2, [src, #\base + 56] - .endm - - .macro cpy_tail_vfp vreg, base - vstr \vreg, [dst, #\base] - vldr \vreg, [src, #\base] - vstr d0, [dst, #\base + 8] - vldr d0, [src, #\base + 8] - vstr d1, [dst, #\base + 16] - vldr d1, [src, #\base + 16] - vstr d2, [dst, #\base + 24] - vldr d2, [src, #\base + 24] - vstr \vreg, [dst, #\base + 32] - vstr d0, [dst, #\base + 40] - vldr d0, [src, #\base + 40] - vstr d1, [dst, #\base + 48] - vldr d1, [src, #\base + 48] - vstr d2, [dst, #\base + 56] - vldr d2, [src, #\base + 56] - .endm - #endif - - .p2align 6 -ENTRY(memcpy) - - mov dst, dstin /* Preserve dstin, we need to return it. */ - cmp count, #64 - bge .Lcpy_not_short - /* Deal with small copies quickly by dropping straight into the - exit block. */ - - .Ltail63unaligned: - #ifdef USE_NEON - and tmp1, count, #0x38 - rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE) - add pc, pc, tmp1 - vld1.8 {d0}, [src]! /* 14 words to go. */ - vst1.8 {d0}, [dst]! - vld1.8 {d0}, [src]! /* 12 words to go. */ - vst1.8 {d0}, [dst]! - vld1.8 {d0}, [src]! /* 10 words to go. */ - vst1.8 {d0}, [dst]! - vld1.8 {d0}, [src]! /* 8 words to go. */ - vst1.8 {d0}, [dst]! - vld1.8 {d0}, [src]! /* 6 words to go. */ - vst1.8 {d0}, [dst]! - vld1.8 {d0}, [src]! /* 4 words to go. */ - vst1.8 {d0}, [dst]! - vld1.8 {d0}, [src]! /* 2 words to go. */ - vst1.8 {d0}, [dst]! - - tst count, #4 - ldrne tmp1, [src], #4 - strne tmp1, [dst], #4 - #else - /* Copy up to 15 full words of data. May not be aligned. */ - /* Cannot use VFP for unaligned data. */ - and tmp1, count, #0x3c - add dst, dst, tmp1 - add src, src, tmp1 - rsb tmp1, tmp1, #(60 - PC_OFFSET/2 + INSN_SIZE/2) - /* Jump directly into the sequence below at the correct offset. */ - add pc, pc, tmp1, lsl #1 - - ldr tmp1, [src, #-60] /* 15 words to go. */ - str tmp1, [dst, #-60] - - ldr tmp1, [src, #-56] /* 14 words to go. */ - str tmp1, [dst, #-56] - ldr tmp1, [src, #-52] - str tmp1, [dst, #-52] - - ldr tmp1, [src, #-48] /* 12 words to go. */ - str tmp1, [dst, #-48] - ldr tmp1, [src, #-44] - str tmp1, [dst, #-44] - - ldr tmp1, [src, #-40] /* 10 words to go. */ - str tmp1, [dst, #-40] - ldr tmp1, [src, #-36] - str tmp1, [dst, #-36] - - ldr tmp1, [src, #-32] /* 8 words to go. */ - str tmp1, [dst, #-32] - ldr tmp1, [src, #-28] - str tmp1, [dst, #-28] - - ldr tmp1, [src, #-24] /* 6 words to go. */ - str tmp1, [dst, #-24] - ldr tmp1, [src, #-20] - str tmp1, [dst, #-20] - - ldr tmp1, [src, #-16] /* 4 words to go. */ - str tmp1, [dst, #-16] - ldr tmp1, [src, #-12] - str tmp1, [dst, #-12] - - ldr tmp1, [src, #-8] /* 2 words to go. */ - str tmp1, [dst, #-8] - ldr tmp1, [src, #-4] - str tmp1, [dst, #-4] - #endif - - lsls count, count, #31 - ldrhcs tmp1, [src], #2 - ldrbne src, [src] /* Src is dead, use as a scratch. */ - strhcs tmp1, [dst], #2 - strbne src, [dst] - bx lr - - .Lcpy_not_short: - /* At least 64 bytes to copy, but don't know the alignment yet. */ - str tmp2, [sp, #-FRAME_SIZE]! - and tmp2, src, #7 - and tmp1, dst, #7 - cmp tmp1, tmp2 - bne .Lcpy_notaligned - - #ifdef USE_VFP - /* Magic dust alert! Force VFP on Cortex-A9. Experiments show - that the FP pipeline is much better at streaming loads and - stores. This is outside the critical loop. */ - vmov.f32 s0, s0 - #endif - - /* SRC and DST have the same mutual 64-bit alignment, but we may - still need to pre-copy some bytes to get to natural alignment. - We bring SRC and DST into full 64-bit alignment. */ - lsls tmp2, dst, #29 - beq 1f - rsbs tmp2, tmp2, #0 - sub count, count, tmp2, lsr #29 - ldrmi tmp1, [src], #4 - strmi tmp1, [dst], #4 - lsls tmp2, tmp2, #2 - ldrhcs tmp1, [src], #2 - ldrbne tmp2, [src], #1 - strhcs tmp1, [dst], #2 - strbne tmp2, [dst], #1 - - 1: - subs tmp2, count, #64 /* Use tmp2 for count. */ - blt .Ltail63aligned - - cmp tmp2, #512 - bge .Lcpy_body_long - - .Lcpy_body_medium: /* Count in tmp2. */ - #ifdef USE_VFP - 1: - vldr d0, [src, #0] - subs tmp2, tmp2, #64 - vldr d1, [src, #8] - vstr d0, [dst, #0] - vldr d0, [src, #16] - vstr d1, [dst, #8] - vldr d1, [src, #24] - vstr d0, [dst, #16] - vldr d0, [src, #32] - vstr d1, [dst, #24] - vldr d1, [src, #40] - vstr d0, [dst, #32] - vldr d0, [src, #48] - vstr d1, [dst, #40] - vldr d1, [src, #56] - vstr d0, [dst, #48] - add src, src, #64 - vstr d1, [dst, #56] - add dst, dst, #64 - bge 1b - tst tmp2, #0x3f - beq .Ldone - - .Ltail63aligned: /* Count in tmp2. */ - and tmp1, tmp2, #0x38 - add dst, dst, tmp1 - add src, src, tmp1 - rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE) - add pc, pc, tmp1 - - vldr d0, [src, #-56] /* 14 words to go. */ - vstr d0, [dst, #-56] - vldr d0, [src, #-48] /* 12 words to go. */ - vstr d0, [dst, #-48] - vldr d0, [src, #-40] /* 10 words to go. */ - vstr d0, [dst, #-40] - vldr d0, [src, #-32] /* 8 words to go. */ - vstr d0, [dst, #-32] - vldr d0, [src, #-24] /* 6 words to go. */ - vstr d0, [dst, #-24] - vldr d0, [src, #-16] /* 4 words to go. */ - vstr d0, [dst, #-16] - vldr d0, [src, #-8] /* 2 words to go. */ - vstr d0, [dst, #-8] - #else - sub src, src, #8 - sub dst, dst, #8 - 1: - ldrd A_l, A_h, [src, #8] - strd A_l, A_h, [dst, #8] - ldrd A_l, A_h, [src, #16] - strd A_l, A_h, [dst, #16] - ldrd A_l, A_h, [src, #24] - strd A_l, A_h, [dst, #24] - ldrd A_l, A_h, [src, #32] - strd A_l, A_h, [dst, #32] - ldrd A_l, A_h, [src, #40] - strd A_l, A_h, [dst, #40] - ldrd A_l, A_h, [src, #48] - strd A_l, A_h, [dst, #48] - ldrd A_l, A_h, [src, #56] - strd A_l, A_h, [dst, #56] - ldrd A_l, A_h, [src, #64]! - strd A_l, A_h, [dst, #64]! - subs tmp2, tmp2, #64 - bge 1b - tst tmp2, #0x3f - bne 1f - ldr tmp2,[sp], #FRAME_SIZE - bx lr - 1: - add src, src, #8 - add dst, dst, #8 - - .Ltail63aligned: /* Count in tmp2. */ - /* Copy up to 7 d-words of data. Similar to Ltail63unaligned, but - we know that the src and dest are 64-bit aligned so we can use - LDRD/STRD to improve efficiency. */ - /* TMP2 is now negative, but we don't care about that. The bottom - six bits still tell us how many bytes are left to copy. */ - - and tmp1, tmp2, #0x38 - add dst, dst, tmp1 - add src, src, tmp1 - rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE) - add pc, pc, tmp1 - ldrd A_l, A_h, [src, #-56] /* 14 words to go. */ - strd A_l, A_h, [dst, #-56] - ldrd A_l, A_h, [src, #-48] /* 12 words to go. */ - strd A_l, A_h, [dst, #-48] - ldrd A_l, A_h, [src, #-40] /* 10 words to go. */ - strd A_l, A_h, [dst, #-40] - ldrd A_l, A_h, [src, #-32] /* 8 words to go. */ - strd A_l, A_h, [dst, #-32] - ldrd A_l, A_h, [src, #-24] /* 6 words to go. */ - strd A_l, A_h, [dst, #-24] - ldrd A_l, A_h, [src, #-16] /* 4 words to go. */ - strd A_l, A_h, [dst, #-16] - ldrd A_l, A_h, [src, #-8] /* 2 words to go. */ - strd A_l, A_h, [dst, #-8] - - #endif - tst tmp2, #4 - ldrne tmp1, [src], #4 - strne tmp1, [dst], #4 - lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */ - ldrhcs tmp1, [src], #2 - ldrbne tmp2, [src] - strhcs tmp1, [dst], #2 - strbne tmp2, [dst] - - .Ldone: - ldr tmp2, [sp], #FRAME_SIZE - bx lr - - .Lcpy_body_long: /* Count in tmp2. */ - - /* Long copy. We know that there's at least (prefetch_lines * 64) - bytes to go. */ - #ifdef USE_VFP - /* Don't use PLD. Instead, read some data in advance of the current - copy position into a register. This should act like a PLD - operation but we won't have to repeat the transfer. */ - - vldr d3, [src, #0] - vldr d4, [src, #64] - vldr d5, [src, #128] - vldr d6, [src, #192] - vldr d7, [src, #256] - - vldr d0, [src, #8] - vldr d1, [src, #16] - vldr d2, [src, #24] - add src, src, #32 - - subs tmp2, tmp2, #prefetch_lines * 64 * 2 - blt 2f - 1: - cpy_line_vfp d3, 0 - cpy_line_vfp d4, 64 - cpy_line_vfp d5, 128 - add dst, dst, #3 * 64 - add src, src, #3 * 64 - cpy_line_vfp d6, 0 - cpy_line_vfp d7, 64 - add dst, dst, #2 * 64 - add src, src, #2 * 64 - subs tmp2, tmp2, #prefetch_lines * 64 - bge 1b - - 2: - cpy_tail_vfp d3, 0 - cpy_tail_vfp d4, 64 - cpy_tail_vfp d5, 128 - add src, src, #3 * 64 - add dst, dst, #3 * 64 - cpy_tail_vfp d6, 0 - vstr d7, [dst, #64] - vldr d7, [src, #64] - vstr d0, [dst, #64 + 8] - vldr d0, [src, #64 + 8] - vstr d1, [dst, #64 + 16] - vldr d1, [src, #64 + 16] - vstr d2, [dst, #64 + 24] - vldr d2, [src, #64 + 24] - vstr d7, [dst, #64 + 32] - add src, src, #96 - vstr d0, [dst, #64 + 40] - vstr d1, [dst, #64 + 48] - vstr d2, [dst, #64 + 56] - add dst, dst, #128 - add tmp2, tmp2, #prefetch_lines * 64 - b .Lcpy_body_medium - #else - /* Long copy. Use an SMS style loop to maximize the I/O - bandwidth of the core. We don't have enough spare registers - to synthesise prefetching, so use PLD operations. */ - /* Pre-bias src and dst. */ - sub src, src, #8 - sub dst, dst, #8 - pld [src, #8] - pld [src, #72] - subs tmp2, tmp2, #64 - pld [src, #136] - ldrd A_l, A_h, [src, #8] - strd B_l, B_h, [sp, #8] - ldrd B_l, B_h, [src, #16] - strd C_l, C_h, [sp, #16] - ldrd C_l, C_h, [src, #24] - strd D_l, D_h, [sp, #24] - pld [src, #200] - ldrd D_l, D_h, [src, #32]! - b 1f - .p2align 6 - 2: - pld [src, #232] - strd A_l, A_h, [dst, #40] - ldrd A_l, A_h, [src, #40] - strd B_l, B_h, [dst, #48] - ldrd B_l, B_h, [src, #48] - strd C_l, C_h, [dst, #56] - ldrd C_l, C_h, [src, #56] - strd D_l, D_h, [dst, #64]! - ldrd D_l, D_h, [src, #64]! - subs tmp2, tmp2, #64 - 1: - strd A_l, A_h, [dst, #8] - ldrd A_l, A_h, [src, #8] - strd B_l, B_h, [dst, #16] - ldrd B_l, B_h, [src, #16] - strd C_l, C_h, [dst, #24] - ldrd C_l, C_h, [src, #24] - strd D_l, D_h, [dst, #32] - ldrd D_l, D_h, [src, #32] - bcs 2b - /* Save the remaining bytes and restore the callee-saved regs. */ - strd A_l, A_h, [dst, #40] - add src, src, #40 - strd B_l, B_h, [dst, #48] - ldrd B_l, B_h, [sp, #8] - strd C_l, C_h, [dst, #56] - ldrd C_l, C_h, [sp, #16] - strd D_l, D_h, [dst, #64] - ldrd D_l, D_h, [sp, #24] - add dst, dst, #72 - tst tmp2, #0x3f - bne .Ltail63aligned - ldr tmp2, [sp], #FRAME_SIZE - bx lr - #endif - - .Lcpy_notaligned: - pld [src] - pld [src, #64] - /* There's at least 64 bytes to copy, but there is no mutual - alignment. */ - /* Bring DST to 64-bit alignment. */ - lsls tmp2, dst, #29 - pld [src, #(2 * 64)] - beq 1f - rsbs tmp2, tmp2, #0 - sub count, count, tmp2, lsr #29 - ldrmi tmp1, [src], #4 - strmi tmp1, [dst], #4 - lsls tmp2, tmp2, #2 - ldrbne tmp1, [src], #1 - ldrhcs tmp2, [src], #2 - strbne tmp1, [dst], #1 - strhcs tmp2, [dst], #2 - 1: - pld [src, #(3 * 64)] - subs count, count, #64 - ldrmi tmp2, [sp], #FRAME_SIZE - bmi .Ltail63unaligned - pld [src, #(4 * 64)] - - #ifdef USE_NEON - vld1.8 {d0-d3}, [src]! - vld1.8 {d4-d7}, [src]! - subs count, count, #64 - bmi 2f - 1: - pld [src, #(4 * 64)] - vst1.8 {d0-d3}, [ALIGN (dst, 64)]! - vld1.8 {d0-d3}, [src]! - vst1.8 {d4-d7}, [ALIGN (dst, 64)]! - vld1.8 {d4-d7}, [src]! - subs count, count, #64 - bpl 1b - 2: - vst1.8 {d0-d3}, [ALIGN (dst, 64)]! - vst1.8 {d4-d7}, [ALIGN (dst, 64)]! - ands count, count, #0x3f - #else - /* Use an SMS style loop to maximize the I/O bandwidth. */ - sub src, src, #4 - sub dst, dst, #8 - subs tmp2, count, #64 /* Use tmp2 for count. */ - ldr A_l, [src, #4] - ldr A_h, [src, #8] - strd B_l, B_h, [sp, #8] - ldr B_l, [src, #12] - ldr B_h, [src, #16] - strd C_l, C_h, [sp, #16] - ldr C_l, [src, #20] - ldr C_h, [src, #24] - strd D_l, D_h, [sp, #24] - ldr D_l, [src, #28] - ldr D_h, [src, #32]! - b 1f - .p2align 6 - 2: - pld [src, #(5 * 64) - (32 - 4)] - strd A_l, A_h, [dst, #40] - ldr A_l, [src, #36] - ldr A_h, [src, #40] - strd B_l, B_h, [dst, #48] - ldr B_l, [src, #44] - ldr B_h, [src, #48] - strd C_l, C_h, [dst, #56] - ldr C_l, [src, #52] - ldr C_h, [src, #56] - strd D_l, D_h, [dst, #64]! - ldr D_l, [src, #60] - ldr D_h, [src, #64]! - subs tmp2, tmp2, #64 - 1: - strd A_l, A_h, [dst, #8] - ldr A_l, [src, #4] - ldr A_h, [src, #8] - strd B_l, B_h, [dst, #16] - ldr B_l, [src, #12] - ldr B_h, [src, #16] - strd C_l, C_h, [dst, #24] - ldr C_l, [src, #20] - ldr C_h, [src, #24] - strd D_l, D_h, [dst, #32] - ldr D_l, [src, #28] - ldr D_h, [src, #32] - bcs 2b - - /* Save the remaining bytes and restore the callee-saved regs. */ - strd A_l, A_h, [dst, #40] - add src, src, #36 - strd B_l, B_h, [dst, #48] - ldrd B_l, B_h, [sp, #8] - strd C_l, C_h, [dst, #56] - ldrd C_l, C_h, [sp, #16] - strd D_l, D_h, [dst, #64] - ldrd D_l, D_h, [sp, #24] - add dst, dst, #72 - ands count, tmp2, #0x3f - #endif - ldr tmp2, [sp], #FRAME_SIZE - bne .Ltail63unaligned - bx lr - - END(memcpy) diff --git a/android-tools/static-binary/src/memcpy_impl_glibc217.S b/android-tools/static-binary/src/memcpy_impl_glibc217.S deleted file mode 100644 index 5bb8fad..0000000 --- a/android-tools/static-binary/src/memcpy_impl_glibc217.S +++ /dev/null @@ -1,296 +0,0 @@ -/* Copyright (C) 2006, 2009 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - Contributed by MontaVista Software, Inc. (written by Nicolas Pitre) - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - <http://www.gnu.org/licenses/>. */ - -#define memcpy ad_memcpy_glibc217 - -# define cfi_startproc .cfi_startproc -# define cfi_endproc .cfi_endproc -# define cfi_def_cfa(reg, off) .cfi_def_cfa reg, off -# define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg -# define cfi_def_cfa_offset(off) .cfi_def_cfa_offset off -# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off -# define cfi_offset(reg, off) .cfi_offset reg, off -# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off -# define cfi_register(r1, r2) .cfi_register r1, r2 -# define cfi_return_column(reg) .cfi_return_column reg -# define cfi_restore(reg) .cfi_restore reg -# define cfi_same_value(reg) .cfi_same_value reg -# define cfi_undefined(reg) .cfi_undefined reg -# define cfi_remember_state .cfi_remember_state -# define cfi_restore_state .cfi_restore_state -# define cfi_window_save .cfi_window_save -# define cfi_personality(enc, exp) .cfi_personality enc, exp -# define cfi_lsda(enc, exp) .cfi_lsda enc, exp - -/* Define an entry point visible from C. */ -#define C_LABEL(name) name: - -#define ENTRY(name) \ - .global name; \ - .type name, %function; \ - .align 4; \ - C_LABEL(name) \ - .cfi_sections .debug_frame; \ - cfi_startproc; - -#undef END -#define END(name) \ - cfi_endproc; - -/* - * Data preload for architectures that support it (ARM V5TE and above) - */ -#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \ - && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \ - && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \ - && !defined (__ARM_ARCH_5T__)) -#define PLD(code...) code -#else -#define PLD(code...) -#endif - -/* - * This can be used to enable code to cacheline align the source pointer. - * Experiments on tested architectures (StrongARM and XScale) didn't show - * this a worthwhile thing to do. That might be different in the future. - */ -//#define CALGN(code...) code -#define CALGN(code...) - -/* - * Endian independent macros for shifting bytes within registers. - */ -#ifndef __ARMEB__ -#define pull lsr -#define push lsl -#else -#define pull lsl -#define push lsr -#endif - - .text - -/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ - -ENTRY(memcpy) - - stmfd sp!, {r0, r4, lr} - cfi_adjust_cfa_offset (12) - cfi_rel_offset (r4, 4) - cfi_rel_offset (lr, 8) - - cfi_remember_state - - subs r2, r2, #4 - blt 8f - ands ip, r0, #3 - PLD( pld [r1, #0] ) - bne 9f - ands ip, r1, #3 - bne 10f - -1: subs r2, r2, #(28) - stmfd sp!, {r5 - r8} - cfi_adjust_cfa_offset (16) - cfi_rel_offset (r5, 0) - cfi_rel_offset (r6, 4) - cfi_rel_offset (r7, 8) - cfi_rel_offset (r8, 12) - blt 5f - - CALGN( ands ip, r1, #31 ) - CALGN( rsb r3, ip, #32 ) - CALGN( sbcnes r4, r3, r2 ) @ C is always set here - CALGN( bcs 2f ) - CALGN( adr r4, 6f ) - CALGN( subs r2, r2, r3 ) @ C gets set - CALGN( add pc, r4, ip ) - - PLD( pld [r1, #0] ) -2: PLD( subs r2, r2, #96 ) - PLD( pld [r1, #28] ) - PLD( blt 4f ) - PLD( pld [r1, #60] ) - PLD( pld [r1, #92] ) - -3: PLD( pld [r1, #124] ) -4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr} - subs r2, r2, #32 - stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr} - bge 3b - PLD( cmn r2, #96 ) - PLD( bge 4b ) - -5: ands ip, r2, #28 - rsb ip, ip, #32 - addne pc, pc, ip @ C is always clear here - b 7f -6: nop - ldr r3, [r1], #4 - ldr r4, [r1], #4 - ldr r5, [r1], #4 - ldr r6, [r1], #4 - ldr r7, [r1], #4 - ldr r8, [r1], #4 - ldr lr, [r1], #4 - - add pc, pc, ip - nop - nop - str r3, [r0], #4 - str r4, [r0], #4 - str r5, [r0], #4 - str r6, [r0], #4 - str r7, [r0], #4 - str r8, [r0], #4 - str lr, [r0], #4 - - CALGN( bcs 2b ) - -7: ldmfd sp!, {r5 - r8} - cfi_adjust_cfa_offset (-16) - cfi_restore (r5) - cfi_restore (r6) - cfi_restore (r7) - cfi_restore (r8) - -8: movs r2, r2, lsl #31 - ldrneb r3, [r1], #1 - ldrcsb r4, [r1], #1 - ldrcsb ip, [r1] - strneb r3, [r0], #1 - strcsb r4, [r0], #1 - strcsb ip, [r0] - -#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__) - ldmfd sp!, {r0, r4, lr} - cfi_adjust_cfa_offset (-12) - cfi_restore (r4) - cfi_restore (lr) - bx lr -#else - ldmfd sp!, {r0, r4, pc} -#endif - - cfi_restore_state - -9: rsb ip, ip, #4 - cmp ip, #2 - ldrgtb r3, [r1], #1 - ldrgeb r4, [r1], #1 - ldrb lr, [r1], #1 - strgtb r3, [r0], #1 - strgeb r4, [r0], #1 - subs r2, r2, ip - strb lr, [r0], #1 - blt 8b - ands ip, r1, #3 - beq 1b - -10: bic r1, r1, #3 - cmp ip, #2 - ldr lr, [r1], #4 - beq 17f - bgt 18f - - - .macro forward_copy_shift pull push - - subs r2, r2, #28 - blt 14f - - CALGN( ands ip, r1, #31 ) - CALGN( rsb ip, ip, #32 ) - CALGN( sbcnes r4, ip, r2 ) @ C is always set here - CALGN( subcc r2, r2, ip ) - CALGN( bcc 15f ) - -11: stmfd sp!, {r5 - r9} - cfi_adjust_cfa_offset (20) - cfi_rel_offset (r5, 0) - cfi_rel_offset (r6, 4) - cfi_rel_offset (r7, 8) - cfi_rel_offset (r8, 12) - cfi_rel_offset (r9, 16) - - PLD( pld [r1, #0] ) - PLD( subs r2, r2, #96 ) - PLD( pld [r1, #28] ) - PLD( blt 13f ) - PLD( pld [r1, #60] ) - PLD( pld [r1, #92] ) - -12: PLD( pld [r1, #124] ) -13: ldmia r1!, {r4, r5, r6, r7} - mov r3, lr, pull #\pull - subs r2, r2, #32 - ldmia r1!, {r8, r9, ip, lr} - orr r3, r3, r4, push #\push - mov r4, r4, pull #\pull - orr r4, r4, r5, push #\push - mov r5, r5, pull #\pull - orr r5, r5, r6, push #\push - mov r6, r6, pull #\pull - orr r6, r6, r7, push #\push - mov r7, r7, pull #\pull - orr r7, r7, r8, push #\push - mov r8, r8, pull #\pull - orr r8, r8, r9, push #\push - mov r9, r9, pull #\pull - orr r9, r9, ip, push #\push - mov ip, ip, pull #\pull - orr ip, ip, lr, push #\push - stmia r0!, {r3, r4, r5, r6, r7, r8, r9, ip} - bge 12b - PLD( cmn r2, #96 ) - PLD( bge 13b ) - - ldmfd sp!, {r5 - r9} - cfi_adjust_cfa_offset (-20) - cfi_restore (r5) - cfi_restore (r6) - cfi_restore (r7) - cfi_restore (r8) - cfi_restore (r9) - -14: ands ip, r2, #28 - beq 16f - -15: mov r3, lr, pull #\pull - ldr lr, [r1], #4 - subs ip, ip, #4 - orr r3, r3, lr, push #\push - str r3, [r0], #4 - bgt 15b - CALGN( cmp r2, #0 ) - CALGN( bge 11b ) - -16: sub r1, r1, #(\push / 8) - b 8b - - .endm - - - forward_copy_shift pull=8 push=24 - -17: forward_copy_shift pull=16 push=16 - -18: forward_copy_shift pull=24 push=8 - -END(memcpy) diff --git a/android-tools/static-binary/src/memcpy_neon.S b/android-tools/static-binary/src/memcpy_neon.S deleted file mode 100644 index fc8c316..0000000 --- a/android-tools/static-binary/src/memcpy_neon.S +++ /dev/null @@ -1,3 +0,0 @@ -#define MEMCPY_NEON -#define memcpy ad_memcpy_neon -#include "memcpy_impl.S" diff --git a/android-tools/static-binary/src/memcpy_none.S b/android-tools/static-binary/src/memcpy_none.S deleted file mode 100644 index e77d2af..0000000 --- a/android-tools/static-binary/src/memcpy_none.S +++ /dev/null @@ -1,2 +0,0 @@ -#define memcpy ad_memcpy_none -#include "memcpy_impl.S" diff --git a/android-tools/static-binary/src/memcpy_vfp.S b/android-tools/static-binary/src/memcpy_vfp.S deleted file mode 100644 index b17f344..0000000 --- a/android-tools/static-binary/src/memcpy_vfp.S +++ /dev/null @@ -1,3 +0,0 @@ -#define MEMCPY_VFP -#define memcpy ad_memcpy_vfp -#include "memcpy_impl.S" |