summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYongqin Liu <yongqin.liu@linaro.org>2016-01-06 19:25:56 +0800
committerYongqin Liu <yongqin.liu@linaro.org>2016-01-06 19:25:56 +0800
commit54ff4b4be83521c8cb989d4bf7253a04abaa376e (patch)
treec09435b37fa7182d09d6ace6f407c32757343c6d
parentc5f478ced67e06e5f43a2b948a0061eed7cb327f (diff)
clean for static-binary
Signed-off-by: Yongqin Liu <yongqin.liu@linaro.org>
-rw-r--r--android-tools/static-binary/src/main.c108
-rw-r--r--android-tools/static-binary/src/memcpy_base.S321
-rw-r--r--android-tools/static-binary/src/memcpy_bionic.S127
-rw-r--r--android-tools/static-binary/src/memcpy_impl.S627
-rw-r--r--android-tools/static-binary/src/memcpy_impl_glibc217.S296
-rw-r--r--android-tools/static-binary/src/memcpy_neon.S3
-rw-r--r--android-tools/static-binary/src/memcpy_none.S2
-rw-r--r--android-tools/static-binary/src/memcpy_vfp.S3
8 files changed, 0 insertions, 1487 deletions
diff --git a/android-tools/static-binary/src/main.c b/android-tools/static-binary/src/main.c
deleted file mode 100644
index 7bc2617..0000000
--- a/android-tools/static-binary/src/main.c
+++ /dev/null
@@ -1,108 +0,0 @@
-#include <stdio.h>
-#include <stdint.h>
-#include <string.h>
-#include <stdlib.h>
-#include <sys/time.h>
-
-void *ad_memcpy_neon(void *dest, const void *src, size_t n);
-void *ad_memcpy_vfp(void *dest, const void *src, size_t n);
-void *ad_memcpy_none(void *dest, const void *src, size_t n);
-void *ad_memcpy_glibc217(void *dest, const void *src, size_t n);
-void *ad_memcpy_bionic(void *dest, const void *src, size_t n);
-
-#define N_ELEMENTS(x) (sizeof(x) / sizeof((x)[0]))
-
-struct bench {
- unsigned int len;
- unsigned long long loops;
-};
-
-int
-main (int argc, char *argv[])
-{
- static const struct bench benches[] = {
- { .len = 8, .loops = 16000000LLU },
- { .len = 81, .loops = 8000000LLU },
- { .len = 8192, .loops = 200000LLU },
- { .len = 131072, .loops = 5000LLU },
- { .len = 1048576 * 10, .loops = 10LLU },
- };
-
- for (int iter = 0; iter < N_ELEMENTS (benches); ++iter) {
- const struct bench *bench = &benches[iter];
- unsigned int len = bench->len;
- unsigned long long loops;
-
- unsigned char *src;
- unsigned char *dst;
- struct timeval start, end;
- double mbps;
-
- printf ("benchmarking: len: %8u loops: %llu\n", len, bench->loops);
-
- src = malloc (len);
- dst = malloc (len);
- memset (src, 0xaa, len);
-
- gettimeofday (&start, NULL);
- loops = bench->loops + 1;
- while (--loops)
- ad_memcpy_neon (dst, src, len);
- gettimeofday (&end, NULL);
- timersub (&end, &start, &end);
- mbps = (bench->loops * bench->len * 1000000.0d) / (double)(end.tv_sec * 1000000.0d + end.tv_usec) / 1024 / 1024;
- printf (" memcpy (neon) took %2ju.%.6ju s ~ %'.1f MiB/s\n", (uintmax_t)end.tv_sec, (uintmax_t)end.tv_usec, mbps);
-
- gettimeofday (&start, NULL);
- loops = bench->loops + 1;
- while (--loops)
- ad_memcpy_vfp (dst, src, len);
- gettimeofday (&end, NULL);
- timersub (&end, &start, &end);
- mbps = (bench->loops * bench->len * 1000000.0d) / (double)(end.tv_sec * 1000000.0d + end.tv_usec) / 1024 / 1024;
- printf (" memcpy (vfp) took %2ju.%.6ju s ~ %'.1f MiB/s\n", (uintmax_t)end.tv_sec, (uintmax_t)end.tv_usec, mbps);
-
- gettimeofday (&start, NULL);
- loops = bench->loops + 1;
- while (--loops)
- ad_memcpy_none (dst, src, len);
- gettimeofday (&end, NULL);
- timersub (&end, &start, &end);
- mbps = (bench->loops * bench->len * 1000000.0d) / (double)(end.tv_sec * 1000000.0d + end.tv_usec) / 1024 / 1024;
- printf (" memcpy (none) took %2ju.%.6ju s ~ %'.1f MiB/s\n", (uintmax_t)end.tv_sec, (uintmax_t)end.tv_usec, mbps);
-
- gettimeofday (&start, NULL);
- loops = bench->loops + 1;
- while (--loops)
- ad_memcpy_glibc217 (dst, src, len);
- gettimeofday (&end, NULL);
- timersub (&end, &start, &end);
- mbps = (bench->loops * bench->len * 1000000.0d) / (double)(end.tv_sec * 1000000.0d + end.tv_usec) / 1024 / 1024;
- printf (" memcpy (old arm generic) took %2ju.%.6ju s ~ %'.1f MiB/s\n", (uintmax_t)end.tv_sec, (uintmax_t)end.tv_usec, mbps);
-
- gettimeofday (&start, NULL);
- loops = bench->loops + 1;
- while (--loops)
- ad_memcpy_bionic (dst, src, len);
- gettimeofday (&end, NULL);
- timersub (&end, &start, &end);
- mbps = (bench->loops * bench->len * 1000000.0d) / (double)(end.tv_sec * 1000000.0d + end.tv_usec) / 1024 / 1024;
- printf (" memcpy (bionic) took %2ju.%.6ju s ~ %'.1f MiB/s\n", (uintmax_t)end.tv_sec, (uintmax_t)end.tv_usec, mbps);
-
- gettimeofday (&start, NULL);
- loops = bench->loops + 1;
- while (--loops)
- memcpy (dst, src, len);
- gettimeofday (&end, NULL);
- timersub (&end, &start, &end);
- mbps = (bench->loops * bench->len * 1000000.0d) / (double)(end.tv_sec * 1000000.0d + end.tv_usec) / 1024 / 1024;
- printf (" memcpy (curr toolchain - dynamic) took %2ju.%.6ju s ~ %'.1f MiB/s\n", (uintmax_t)end.tv_sec, (uintmax_t)end.tv_usec, mbps);
-
- free (dst);
- free (src);
-
- puts ("");
- }
-
- return 0;
-}
diff --git a/android-tools/static-binary/src/memcpy_base.S b/android-tools/static-binary/src/memcpy_base.S
deleted file mode 100644
index 721e9bc..0000000
--- a/android-tools/static-binary/src/memcpy_base.S
+++ /dev/null
@@ -1,321 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Define an entry point visible from C. */
-#define C_LABEL(name) name:
-
-ENTRY_PRIVATE(MEMCPY_BASE)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
-
- // Assumes that n >= 0, and dst, src are valid pointers.
- // For any sizes less than 832 use the neon code that doesn't
- // care about the src alignment. This avoids any checks
- // for src alignment, and offers the best improvement since
- // smaller sized copies are dominated by the overhead of
- // the pre and post main loop.
- // For larger copies, if src and dst cannot both be aligned to
- // word boundaries, use the neon code.
- // For all other copies, align dst to a double word boundary
- // and copy using LDRD/STRD instructions.
-
- cmp r2, #16
- blo .L_copy_less_than_16_unknown_align
-
- // TODO: The aligned copy code is extremely slow copying some large
- // buffers so always go through the unaligned path for now.
- //cmp r2, #832
- //bge .L_check_alignment
-
-.L_copy_unknown_alignment:
- // Unknown alignment of src and dst.
- // Assumes that the first few bytes have already been prefetched.
-
- // Align destination to 128 bits. The mainloop store instructions
- // require this alignment or they will throw an exception.
- rsb r3, r0, #0
- ands r3, r3, #0xF
- beq 2f
-
- // Copy up to 15 bytes (count in r3).
- sub r2, r2, r3
- movs ip, r3, lsl #31
-
- itt mi
- ldrmib lr, [r1], #1
- strmib lr, [r0], #1
- itttt cs
- ldrcsb ip, [r1], #1
- ldrcsb lr, [r1], #1
- strcsb ip, [r0], #1
- strcsb lr, [r0], #1
-
- movs ip, r3, lsl #29
- bge 1f
- // Copies 4 bytes, dst 32 bits aligned before, at least 64 bits after.
- vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
- vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
-1: bcc 2f
- // Copies 8 bytes, dst 64 bits aligned before, at least 128 bits after.
- vld1.8 {d0}, [r1]!
- vst1.8 {d0}, [r0, :64]!
-
-2: // Make sure we have at least 64 bytes to copy.
- subs r2, r2, #64
- blo 2f
-
-1: // The main loop copies 64 bytes at a time.
- vld1.8 {d0 - d3}, [r1]!
- vld1.8 {d4 - d7}, [r1]!
- pld [r1, #(64*4)]
- subs r2, r2, #64
- vst1.8 {d0 - d3}, [r0, :128]!
- vst1.8 {d4 - d7}, [r0, :128]!
- bhs 1b
-
-2: // Fix-up the remaining count and make sure we have >= 32 bytes left.
- adds r2, r2, #32
- blo 3f
-
- // 32 bytes. These cache lines were already preloaded.
- vld1.8 {d0 - d3}, [r1]!
- sub r2, r2, #32
- vst1.8 {d0 - d3}, [r0, :128]!
-3: // Less than 32 left.
- add r2, r2, #32
- tst r2, #0x10
- beq .L_copy_less_than_16_unknown_align
- // Copies 16 bytes, destination 128 bits aligned.
- vld1.8 {d0, d1}, [r1]!
- vst1.8 {d0, d1}, [r0, :128]!
-
-.L_copy_less_than_16_unknown_align:
- // Copy up to 15 bytes (count in r2).
- movs ip, r2, lsl #29
- bcc 1f
- vld1.8 {d0}, [r1]!
- vst1.8 {d0}, [r0]!
-1: bge 2f
- vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
- vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]!
-
-2: // Copy 0 to 4 bytes.
- lsls r2, r2, #31
- itt ne
- ldrneb lr, [r1], #1
- strneb lr, [r0], #1
- itttt cs
- ldrcsb ip, [r1], #1
- ldrcsb lr, [r1]
- strcsb ip, [r0], #1
- strcsb lr, [r0]
-
- pop {r0, pc}
-
-.L_check_alignment:
- // If src and dst cannot both be aligned to a word boundary,
- // use the unaligned copy version.
- eor r3, r0, r1
- ands r3, r3, #0x3
- bne .L_copy_unknown_alignment
-END(MEMCPY_BASE)
-
-ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED)
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
-
- // To try and improve performance, stack layout changed,
- // i.e., not keeping the stack looking like users expect
- // (highest numbered register at highest address).
- strd r4, r5, [sp, #-8]!
- .cfi_adjust_cfa_offset 8
- .cfi_rel_offset r4, 0
- .cfi_rel_offset r5, 4
- strd r6, r7, [sp, #-8]!
- .cfi_adjust_cfa_offset 8
- .cfi_rel_offset r6, 0
- .cfi_rel_offset r7, 0
- strd r8, r9, [sp, #-8]!
- .cfi_adjust_cfa_offset 8
- .cfi_rel_offset r8, 0
- .cfi_rel_offset r9, 4
-
- // Optimized for already aligned dst code.
- ands ip, r0, #3
- bne .L_dst_not_word_aligned
-
-.L_word_aligned:
- // Align the destination buffer to 8 bytes, to make sure double
- // loads and stores don't cross a cache line boundary,
- // as they are then more expensive even if the data is in the cache
- // (require two load/store issue cycles instead of one).
- // If only one of the buffers is not 8 bytes aligned,
- // then it's more important to align dst than src,
- // because there is more penalty for stores
- // than loads that cross a cacheline boundary.
- // This check and realignment are only done if there is >= 832
- // bytes to copy.
-
- // Dst is word aligned, but check if it is already double word aligned.
- ands r3, r0, #4
- beq 1f
- ldr r3, [r1], #4
- str r3, [r0], #4
- sub r2, #4
-
-1: // Can only get here if > 64 bytes to copy, so don't do check r2.
- sub r2, #64
-
-2: // Every loop iteration copies 64 bytes.
- .irp offset, #0, #8, #16, #24, #32
- ldrd r4, r5, [r1, \offset]
- strd r4, r5, [r0, \offset]
- .endr
-
- ldrd r4, r5, [r1, #40]
- ldrd r6, r7, [r1, #48]
- ldrd r8, r9, [r1, #56]
-
- // Keep the pld as far from the next load as possible.
- // The amount to prefetch was determined experimentally using
- // large sizes, and verifying the prefetch size does not affect
- // the smaller copies too much.
- // WARNING: If the ldrd and strd instructions get too far away
- // from each other, performance suffers. Three loads
- // in a row is the best tradeoff.
- pld [r1, #(64*16)]
- strd r4, r5, [r0, #40]
- strd r6, r7, [r0, #48]
- strd r8, r9, [r0, #56]
-
- add r0, r0, #64
- add r1, r1, #64
- subs r2, r2, #64
- bge 2b
-
- // Fix-up the remaining count and make sure we have >= 32 bytes left.
- adds r2, r2, #32
- blo 4f
-
- // Copy 32 bytes. These cache lines were already preloaded.
- .irp offset, #0, #8, #16, #24
- ldrd r4, r5, [r1, \offset]
- strd r4, r5, [r0, \offset]
- .endr
- add r1, r1, #32
- add r0, r0, #32
- sub r2, r2, #32
-4: // Less than 32 left.
- add r2, r2, #32
- tst r2, #0x10
- beq 5f
- // Copy 16 bytes.
- .irp offset, #0, #8
- ldrd r4, r5, [r1, \offset]
- strd r4, r5, [r0, \offset]
- .endr
- add r1, r1, #16
- add r0, r0, #16
-
-5: // Copy up to 15 bytes (count in r2).
- movs ip, r2, lsl #29
- bcc 1f
- // Copy 8 bytes.
- ldrd r4, r5, [r1], #8
- strd r4, r5, [r0], #8
-1: bge 2f
- // Copy 4 bytes.
- ldr r4, [r1], #4
- str r4, [r0], #4
-2: // Copy 0 to 4 bytes.
- lsls r2, r2, #31
- itt ne
- ldrneb lr, [r1], #1
- strneb lr, [r0], #1
- itttt cs
- ldrcsb ip, [r1], #1
- ldrcsb lr, [r1]
- strcsb ip, [r0], #1
- strcsb lr, [r0]
-
- // Restore registers: optimized pop {r0, pc}
- ldrd r8, r9, [sp], #8
- ldrd r6, r7, [sp], #8
- ldrd r4, r5, [sp], #8
- pop {r0, pc}
-
-.L_dst_not_word_aligned:
- // Align dst to word.
- rsb ip, ip, #4
- cmp ip, #2
-
- itt gt
- ldrgtb lr, [r1], #1
- strgtb lr, [r0], #1
-
- itt ge
- ldrgeb lr, [r1], #1
- strgeb lr, [r0], #1
-
- ldrb lr, [r1], #1
- strb lr, [r0], #1
-
- sub r2, r2, ip
-
- // Src is guaranteed to be at least word aligned by this point.
- b .L_word_aligned
-END(MEMCPY_BASE_ALIGNED)
diff --git a/android-tools/static-binary/src/memcpy_bionic.S b/android-tools/static-binary/src/memcpy_bionic.S
deleted file mode 100644
index 6b3537c..0000000
--- a/android-tools/static-binary/src/memcpy_bionic.S
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// Prototype: void *memcpy (void *dst, const void *src, size_t count).
-
-#define memcpy ad_memcpy_bionic
-// TODO: Try below?
-//#define __memcpy_base ad_memcpy_bionic
-//#define __memcpy_base_aligned ad_memcpy_bionic
-
-/* Define an entry point visible from C. */
-#define C_LABEL(name) name:
-
-#define ENTRY(name) \
- .global name; \
- .type name, %function; \
- .align 4; \
- C_LABEL(name) \
- .cfi_sections .debug_frame; \
- .cfi_startproc;
-
-#undef END
-#define END(name) \
- .cfi_endproc;
-
-/* Like ENTRY, but with hidden visibility. */
-#define ENTRY_PRIVATE(f) \
- ENTRY(f); \
- .hidden f
-
- .text
- .syntax unified
- .fpu neon
-
-/*
-ENTRY(__memcpy_chk)
- cmp r2, r3
- bhi __memcpy_chk_fail
-
- // Fall through to memcpy...
-END(__memcpy_chk)
-*/
-
-ENTRY(memcpy)
- pld [r1, #64]
- push {r0, lr}
- .cfi_def_cfa_offset 8
- .cfi_rel_offset r0, 0
- .cfi_rel_offset lr, 4
-END(memcpy)
-
-#define MEMCPY_BASE __memcpy_base
-#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
-#include "memcpy_base.S"
-
-ENTRY_PRIVATE(__memcpy_chk_fail)
- // Preserve lr for backtrace.
- push {lr}
- .cfi_def_cfa_offset 4
- .cfi_rel_offset lr, 0
-
- ldr r0, error_message
- ldr r1, error_code
-1:
- add r0, pc
- //bl __fortify_chk_fail
-error_code:
- .word 80100 //BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
-error_message:
- .word error_string-(1b+8)
-END(__memcpy_chk_fail)
-
- .data
-error_string:
- .string "memcpy: prevented write past end of buffer"
diff --git a/android-tools/static-binary/src/memcpy_impl.S b/android-tools/static-binary/src/memcpy_impl.S
deleted file mode 100644
index f6e06f4..0000000
--- a/android-tools/static-binary/src/memcpy_impl.S
+++ /dev/null
@@ -1,627 +0,0 @@
- /* Copyright (c) 2013, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the name of Linaro Limited nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- */
-
- /*
- This memcpy routine is optimised for Cortex-A15 cores and takes advantage
- of VFP or NEON when built with the appropriate flags.
-
- Assumptions:
-
- ARMv6 (ARMv7-a if using Neon)
- ARM state
- Unaligned accesses
-
- */
-
-/* Define an entry point visible from C. */
-#define C_LABEL(name) name:
-
-#define ENTRY(name) \
- .global name; \
- .type name, %function; \
- .align 4; \
- C_LABEL(name) \
- .cfi_sections .debug_frame; \
- .cfi_startproc;
-
-#undef END
-#define END(name) \
- .cfi_endproc;
-
-
- .syntax unified
- /* This implementation requires ARM state. */
- .arm
-
-#ifdef MEMCPY_NEON
-
- .fpu neon
- .arch armv7-a
- # define FRAME_SIZE 4
- # define USE_VFP
- # define USE_NEON
-
-#elif defined (MEMCPY_VFP)
-
- .arch armv6
- .fpu vfpv2
- # define FRAME_SIZE 32
- # define USE_VFP
-
-#else
- .arch armv6
- # define FRAME_SIZE 32
-
-#endif
-
- /* Old versions of GAS incorrectly implement the NEON align semantics. */
- #ifdef BROKEN_ASM_NEON_ALIGN
- #define ALIGN(addr, align) addr,:align
- #else
- #define ALIGN(addr, align) addr:align
- #endif
-
- #define PC_OFFSET 8 /* PC pipeline compensation. */
- #define INSN_SIZE 4
-
- /* Call parameters. */
- #define dstin r0
- #define src r1
- #define count r2
-
- /* Locals. */
- #define tmp1 r3
- #define dst ip
- #define tmp2 r10
-
- #ifndef USE_NEON
- /* For bulk copies using GP registers. */
- #define A_l r2 /* Call-clobbered. */
- #define A_h r3 /* Call-clobbered. */
- #define B_l r4
- #define B_h r5
- #define C_l r6
- #define C_h r7
- #define D_l r8
- #define D_h r9
- #endif
-
- /* Number of lines ahead to pre-fetch data. If you change this the code
- below will need adjustment to compensate. */
-
- #define prefetch_lines 5
-
- #ifdef USE_VFP
- .macro cpy_line_vfp vreg, base
- vstr \vreg, [dst, #\base]
- vldr \vreg, [src, #\base]
- vstr d0, [dst, #\base + 8]
- vldr d0, [src, #\base + 8]
- vstr d1, [dst, #\base + 16]
- vldr d1, [src, #\base + 16]
- vstr d2, [dst, #\base + 24]
- vldr d2, [src, #\base + 24]
- vstr \vreg, [dst, #\base + 32]
- vldr \vreg, [src, #\base + prefetch_lines * 64 - 32]
- vstr d0, [dst, #\base + 40]
- vldr d0, [src, #\base + 40]
- vstr d1, [dst, #\base + 48]
- vldr d1, [src, #\base + 48]
- vstr d2, [dst, #\base + 56]
- vldr d2, [src, #\base + 56]
- .endm
-
- .macro cpy_tail_vfp vreg, base
- vstr \vreg, [dst, #\base]
- vldr \vreg, [src, #\base]
- vstr d0, [dst, #\base + 8]
- vldr d0, [src, #\base + 8]
- vstr d1, [dst, #\base + 16]
- vldr d1, [src, #\base + 16]
- vstr d2, [dst, #\base + 24]
- vldr d2, [src, #\base + 24]
- vstr \vreg, [dst, #\base + 32]
- vstr d0, [dst, #\base + 40]
- vldr d0, [src, #\base + 40]
- vstr d1, [dst, #\base + 48]
- vldr d1, [src, #\base + 48]
- vstr d2, [dst, #\base + 56]
- vldr d2, [src, #\base + 56]
- .endm
- #endif
-
- .p2align 6
-ENTRY(memcpy)
-
- mov dst, dstin /* Preserve dstin, we need to return it. */
- cmp count, #64
- bge .Lcpy_not_short
- /* Deal with small copies quickly by dropping straight into the
- exit block. */
-
- .Ltail63unaligned:
- #ifdef USE_NEON
- and tmp1, count, #0x38
- rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
- add pc, pc, tmp1
- vld1.8 {d0}, [src]! /* 14 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 12 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 10 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 8 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 6 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 4 words to go. */
- vst1.8 {d0}, [dst]!
- vld1.8 {d0}, [src]! /* 2 words to go. */
- vst1.8 {d0}, [dst]!
-
- tst count, #4
- ldrne tmp1, [src], #4
- strne tmp1, [dst], #4
- #else
- /* Copy up to 15 full words of data. May not be aligned. */
- /* Cannot use VFP for unaligned data. */
- and tmp1, count, #0x3c
- add dst, dst, tmp1
- add src, src, tmp1
- rsb tmp1, tmp1, #(60 - PC_OFFSET/2 + INSN_SIZE/2)
- /* Jump directly into the sequence below at the correct offset. */
- add pc, pc, tmp1, lsl #1
-
- ldr tmp1, [src, #-60] /* 15 words to go. */
- str tmp1, [dst, #-60]
-
- ldr tmp1, [src, #-56] /* 14 words to go. */
- str tmp1, [dst, #-56]
- ldr tmp1, [src, #-52]
- str tmp1, [dst, #-52]
-
- ldr tmp1, [src, #-48] /* 12 words to go. */
- str tmp1, [dst, #-48]
- ldr tmp1, [src, #-44]
- str tmp1, [dst, #-44]
-
- ldr tmp1, [src, #-40] /* 10 words to go. */
- str tmp1, [dst, #-40]
- ldr tmp1, [src, #-36]
- str tmp1, [dst, #-36]
-
- ldr tmp1, [src, #-32] /* 8 words to go. */
- str tmp1, [dst, #-32]
- ldr tmp1, [src, #-28]
- str tmp1, [dst, #-28]
-
- ldr tmp1, [src, #-24] /* 6 words to go. */
- str tmp1, [dst, #-24]
- ldr tmp1, [src, #-20]
- str tmp1, [dst, #-20]
-
- ldr tmp1, [src, #-16] /* 4 words to go. */
- str tmp1, [dst, #-16]
- ldr tmp1, [src, #-12]
- str tmp1, [dst, #-12]
-
- ldr tmp1, [src, #-8] /* 2 words to go. */
- str tmp1, [dst, #-8]
- ldr tmp1, [src, #-4]
- str tmp1, [dst, #-4]
- #endif
-
- lsls count, count, #31
- ldrhcs tmp1, [src], #2
- ldrbne src, [src] /* Src is dead, use as a scratch. */
- strhcs tmp1, [dst], #2
- strbne src, [dst]
- bx lr
-
- .Lcpy_not_short:
- /* At least 64 bytes to copy, but don't know the alignment yet. */
- str tmp2, [sp, #-FRAME_SIZE]!
- and tmp2, src, #7
- and tmp1, dst, #7
- cmp tmp1, tmp2
- bne .Lcpy_notaligned
-
- #ifdef USE_VFP
- /* Magic dust alert! Force VFP on Cortex-A9. Experiments show
- that the FP pipeline is much better at streaming loads and
- stores. This is outside the critical loop. */
- vmov.f32 s0, s0
- #endif
-
- /* SRC and DST have the same mutual 64-bit alignment, but we may
- still need to pre-copy some bytes to get to natural alignment.
- We bring SRC and DST into full 64-bit alignment. */
- lsls tmp2, dst, #29
- beq 1f
- rsbs tmp2, tmp2, #0
- sub count, count, tmp2, lsr #29
- ldrmi tmp1, [src], #4
- strmi tmp1, [dst], #4
- lsls tmp2, tmp2, #2
- ldrhcs tmp1, [src], #2
- ldrbne tmp2, [src], #1
- strhcs tmp1, [dst], #2
- strbne tmp2, [dst], #1
-
- 1:
- subs tmp2, count, #64 /* Use tmp2 for count. */
- blt .Ltail63aligned
-
- cmp tmp2, #512
- bge .Lcpy_body_long
-
- .Lcpy_body_medium: /* Count in tmp2. */
- #ifdef USE_VFP
- 1:
- vldr d0, [src, #0]
- subs tmp2, tmp2, #64
- vldr d1, [src, #8]
- vstr d0, [dst, #0]
- vldr d0, [src, #16]
- vstr d1, [dst, #8]
- vldr d1, [src, #24]
- vstr d0, [dst, #16]
- vldr d0, [src, #32]
- vstr d1, [dst, #24]
- vldr d1, [src, #40]
- vstr d0, [dst, #32]
- vldr d0, [src, #48]
- vstr d1, [dst, #40]
- vldr d1, [src, #56]
- vstr d0, [dst, #48]
- add src, src, #64
- vstr d1, [dst, #56]
- add dst, dst, #64
- bge 1b
- tst tmp2, #0x3f
- beq .Ldone
-
- .Ltail63aligned: /* Count in tmp2. */
- and tmp1, tmp2, #0x38
- add dst, dst, tmp1
- add src, src, tmp1
- rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
- add pc, pc, tmp1
-
- vldr d0, [src, #-56] /* 14 words to go. */
- vstr d0, [dst, #-56]
- vldr d0, [src, #-48] /* 12 words to go. */
- vstr d0, [dst, #-48]
- vldr d0, [src, #-40] /* 10 words to go. */
- vstr d0, [dst, #-40]
- vldr d0, [src, #-32] /* 8 words to go. */
- vstr d0, [dst, #-32]
- vldr d0, [src, #-24] /* 6 words to go. */
- vstr d0, [dst, #-24]
- vldr d0, [src, #-16] /* 4 words to go. */
- vstr d0, [dst, #-16]
- vldr d0, [src, #-8] /* 2 words to go. */
- vstr d0, [dst, #-8]
- #else
- sub src, src, #8
- sub dst, dst, #8
- 1:
- ldrd A_l, A_h, [src, #8]
- strd A_l, A_h, [dst, #8]
- ldrd A_l, A_h, [src, #16]
- strd A_l, A_h, [dst, #16]
- ldrd A_l, A_h, [src, #24]
- strd A_l, A_h, [dst, #24]
- ldrd A_l, A_h, [src, #32]
- strd A_l, A_h, [dst, #32]
- ldrd A_l, A_h, [src, #40]
- strd A_l, A_h, [dst, #40]
- ldrd A_l, A_h, [src, #48]
- strd A_l, A_h, [dst, #48]
- ldrd A_l, A_h, [src, #56]
- strd A_l, A_h, [dst, #56]
- ldrd A_l, A_h, [src, #64]!
- strd A_l, A_h, [dst, #64]!
- subs tmp2, tmp2, #64
- bge 1b
- tst tmp2, #0x3f
- bne 1f
- ldr tmp2,[sp], #FRAME_SIZE
- bx lr
- 1:
- add src, src, #8
- add dst, dst, #8
-
- .Ltail63aligned: /* Count in tmp2. */
- /* Copy up to 7 d-words of data. Similar to Ltail63unaligned, but
- we know that the src and dest are 64-bit aligned so we can use
- LDRD/STRD to improve efficiency. */
- /* TMP2 is now negative, but we don't care about that. The bottom
- six bits still tell us how many bytes are left to copy. */
-
- and tmp1, tmp2, #0x38
- add dst, dst, tmp1
- add src, src, tmp1
- rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
- add pc, pc, tmp1
- ldrd A_l, A_h, [src, #-56] /* 14 words to go. */
- strd A_l, A_h, [dst, #-56]
- ldrd A_l, A_h, [src, #-48] /* 12 words to go. */
- strd A_l, A_h, [dst, #-48]
- ldrd A_l, A_h, [src, #-40] /* 10 words to go. */
- strd A_l, A_h, [dst, #-40]
- ldrd A_l, A_h, [src, #-32] /* 8 words to go. */
- strd A_l, A_h, [dst, #-32]
- ldrd A_l, A_h, [src, #-24] /* 6 words to go. */
- strd A_l, A_h, [dst, #-24]
- ldrd A_l, A_h, [src, #-16] /* 4 words to go. */
- strd A_l, A_h, [dst, #-16]
- ldrd A_l, A_h, [src, #-8] /* 2 words to go. */
- strd A_l, A_h, [dst, #-8]
-
- #endif
- tst tmp2, #4
- ldrne tmp1, [src], #4
- strne tmp1, [dst], #4
- lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */
- ldrhcs tmp1, [src], #2
- ldrbne tmp2, [src]
- strhcs tmp1, [dst], #2
- strbne tmp2, [dst]
-
- .Ldone:
- ldr tmp2, [sp], #FRAME_SIZE
- bx lr
-
- .Lcpy_body_long: /* Count in tmp2. */
-
- /* Long copy. We know that there's at least (prefetch_lines * 64)
- bytes to go. */
- #ifdef USE_VFP
- /* Don't use PLD. Instead, read some data in advance of the current
- copy position into a register. This should act like a PLD
- operation but we won't have to repeat the transfer. */
-
- vldr d3, [src, #0]
- vldr d4, [src, #64]
- vldr d5, [src, #128]
- vldr d6, [src, #192]
- vldr d7, [src, #256]
-
- vldr d0, [src, #8]
- vldr d1, [src, #16]
- vldr d2, [src, #24]
- add src, src, #32
-
- subs tmp2, tmp2, #prefetch_lines * 64 * 2
- blt 2f
- 1:
- cpy_line_vfp d3, 0
- cpy_line_vfp d4, 64
- cpy_line_vfp d5, 128
- add dst, dst, #3 * 64
- add src, src, #3 * 64
- cpy_line_vfp d6, 0
- cpy_line_vfp d7, 64
- add dst, dst, #2 * 64
- add src, src, #2 * 64
- subs tmp2, tmp2, #prefetch_lines * 64
- bge 1b
-
- 2:
- cpy_tail_vfp d3, 0
- cpy_tail_vfp d4, 64
- cpy_tail_vfp d5, 128
- add src, src, #3 * 64
- add dst, dst, #3 * 64
- cpy_tail_vfp d6, 0
- vstr d7, [dst, #64]
- vldr d7, [src, #64]
- vstr d0, [dst, #64 + 8]
- vldr d0, [src, #64 + 8]
- vstr d1, [dst, #64 + 16]
- vldr d1, [src, #64 + 16]
- vstr d2, [dst, #64 + 24]
- vldr d2, [src, #64 + 24]
- vstr d7, [dst, #64 + 32]
- add src, src, #96
- vstr d0, [dst, #64 + 40]
- vstr d1, [dst, #64 + 48]
- vstr d2, [dst, #64 + 56]
- add dst, dst, #128
- add tmp2, tmp2, #prefetch_lines * 64
- b .Lcpy_body_medium
- #else
- /* Long copy. Use an SMS style loop to maximize the I/O
- bandwidth of the core. We don't have enough spare registers
- to synthesise prefetching, so use PLD operations. */
- /* Pre-bias src and dst. */
- sub src, src, #8
- sub dst, dst, #8
- pld [src, #8]
- pld [src, #72]
- subs tmp2, tmp2, #64
- pld [src, #136]
- ldrd A_l, A_h, [src, #8]
- strd B_l, B_h, [sp, #8]
- ldrd B_l, B_h, [src, #16]
- strd C_l, C_h, [sp, #16]
- ldrd C_l, C_h, [src, #24]
- strd D_l, D_h, [sp, #24]
- pld [src, #200]
- ldrd D_l, D_h, [src, #32]!
- b 1f
- .p2align 6
- 2:
- pld [src, #232]
- strd A_l, A_h, [dst, #40]
- ldrd A_l, A_h, [src, #40]
- strd B_l, B_h, [dst, #48]
- ldrd B_l, B_h, [src, #48]
- strd C_l, C_h, [dst, #56]
- ldrd C_l, C_h, [src, #56]
- strd D_l, D_h, [dst, #64]!
- ldrd D_l, D_h, [src, #64]!
- subs tmp2, tmp2, #64
- 1:
- strd A_l, A_h, [dst, #8]
- ldrd A_l, A_h, [src, #8]
- strd B_l, B_h, [dst, #16]
- ldrd B_l, B_h, [src, #16]
- strd C_l, C_h, [dst, #24]
- ldrd C_l, C_h, [src, #24]
- strd D_l, D_h, [dst, #32]
- ldrd D_l, D_h, [src, #32]
- bcs 2b
- /* Save the remaining bytes and restore the callee-saved regs. */
- strd A_l, A_h, [dst, #40]
- add src, src, #40
- strd B_l, B_h, [dst, #48]
- ldrd B_l, B_h, [sp, #8]
- strd C_l, C_h, [dst, #56]
- ldrd C_l, C_h, [sp, #16]
- strd D_l, D_h, [dst, #64]
- ldrd D_l, D_h, [sp, #24]
- add dst, dst, #72
- tst tmp2, #0x3f
- bne .Ltail63aligned
- ldr tmp2, [sp], #FRAME_SIZE
- bx lr
- #endif
-
- .Lcpy_notaligned:
- pld [src]
- pld [src, #64]
- /* There's at least 64 bytes to copy, but there is no mutual
- alignment. */
- /* Bring DST to 64-bit alignment. */
- lsls tmp2, dst, #29
- pld [src, #(2 * 64)]
- beq 1f
- rsbs tmp2, tmp2, #0
- sub count, count, tmp2, lsr #29
- ldrmi tmp1, [src], #4
- strmi tmp1, [dst], #4
- lsls tmp2, tmp2, #2
- ldrbne tmp1, [src], #1
- ldrhcs tmp2, [src], #2
- strbne tmp1, [dst], #1
- strhcs tmp2, [dst], #2
- 1:
- pld [src, #(3 * 64)]
- subs count, count, #64
- ldrmi tmp2, [sp], #FRAME_SIZE
- bmi .Ltail63unaligned
- pld [src, #(4 * 64)]
-
- #ifdef USE_NEON
- vld1.8 {d0-d3}, [src]!
- vld1.8 {d4-d7}, [src]!
- subs count, count, #64
- bmi 2f
- 1:
- pld [src, #(4 * 64)]
- vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
- vld1.8 {d0-d3}, [src]!
- vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
- vld1.8 {d4-d7}, [src]!
- subs count, count, #64
- bpl 1b
- 2:
- vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
- vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
- ands count, count, #0x3f
- #else
- /* Use an SMS style loop to maximize the I/O bandwidth. */
- sub src, src, #4
- sub dst, dst, #8
- subs tmp2, count, #64 /* Use tmp2 for count. */
- ldr A_l, [src, #4]
- ldr A_h, [src, #8]
- strd B_l, B_h, [sp, #8]
- ldr B_l, [src, #12]
- ldr B_h, [src, #16]
- strd C_l, C_h, [sp, #16]
- ldr C_l, [src, #20]
- ldr C_h, [src, #24]
- strd D_l, D_h, [sp, #24]
- ldr D_l, [src, #28]
- ldr D_h, [src, #32]!
- b 1f
- .p2align 6
- 2:
- pld [src, #(5 * 64) - (32 - 4)]
- strd A_l, A_h, [dst, #40]
- ldr A_l, [src, #36]
- ldr A_h, [src, #40]
- strd B_l, B_h, [dst, #48]
- ldr B_l, [src, #44]
- ldr B_h, [src, #48]
- strd C_l, C_h, [dst, #56]
- ldr C_l, [src, #52]
- ldr C_h, [src, #56]
- strd D_l, D_h, [dst, #64]!
- ldr D_l, [src, #60]
- ldr D_h, [src, #64]!
- subs tmp2, tmp2, #64
- 1:
- strd A_l, A_h, [dst, #8]
- ldr A_l, [src, #4]
- ldr A_h, [src, #8]
- strd B_l, B_h, [dst, #16]
- ldr B_l, [src, #12]
- ldr B_h, [src, #16]
- strd C_l, C_h, [dst, #24]
- ldr C_l, [src, #20]
- ldr C_h, [src, #24]
- strd D_l, D_h, [dst, #32]
- ldr D_l, [src, #28]
- ldr D_h, [src, #32]
- bcs 2b
-
- /* Save the remaining bytes and restore the callee-saved regs. */
- strd A_l, A_h, [dst, #40]
- add src, src, #36
- strd B_l, B_h, [dst, #48]
- ldrd B_l, B_h, [sp, #8]
- strd C_l, C_h, [dst, #56]
- ldrd C_l, C_h, [sp, #16]
- strd D_l, D_h, [dst, #64]
- ldrd D_l, D_h, [sp, #24]
- add dst, dst, #72
- ands count, tmp2, #0x3f
- #endif
- ldr tmp2, [sp], #FRAME_SIZE
- bne .Ltail63unaligned
- bx lr
-
- END(memcpy)
diff --git a/android-tools/static-binary/src/memcpy_impl_glibc217.S b/android-tools/static-binary/src/memcpy_impl_glibc217.S
deleted file mode 100644
index 5bb8fad..0000000
--- a/android-tools/static-binary/src/memcpy_impl_glibc217.S
+++ /dev/null
@@ -1,296 +0,0 @@
-/* Copyright (C) 2006, 2009 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#define memcpy ad_memcpy_glibc217
-
-# define cfi_startproc .cfi_startproc
-# define cfi_endproc .cfi_endproc
-# define cfi_def_cfa(reg, off) .cfi_def_cfa reg, off
-# define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg
-# define cfi_def_cfa_offset(off) .cfi_def_cfa_offset off
-# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
-# define cfi_offset(reg, off) .cfi_offset reg, off
-# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
-# define cfi_register(r1, r2) .cfi_register r1, r2
-# define cfi_return_column(reg) .cfi_return_column reg
-# define cfi_restore(reg) .cfi_restore reg
-# define cfi_same_value(reg) .cfi_same_value reg
-# define cfi_undefined(reg) .cfi_undefined reg
-# define cfi_remember_state .cfi_remember_state
-# define cfi_restore_state .cfi_restore_state
-# define cfi_window_save .cfi_window_save
-# define cfi_personality(enc, exp) .cfi_personality enc, exp
-# define cfi_lsda(enc, exp) .cfi_lsda enc, exp
-
-/* Define an entry point visible from C. */
-#define C_LABEL(name) name:
-
-#define ENTRY(name) \
- .global name; \
- .type name, %function; \
- .align 4; \
- C_LABEL(name) \
- .cfi_sections .debug_frame; \
- cfi_startproc;
-
-#undef END
-#define END(name) \
- cfi_endproc;
-
-/*
- * Data preload for architectures that support it (ARM V5TE and above)
- */
-#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
- && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
- && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
- && !defined (__ARM_ARCH_5T__))
-#define PLD(code...) code
-#else
-#define PLD(code...)
-#endif
-
-/*
- * This can be used to enable code to cacheline align the source pointer.
- * Experiments on tested architectures (StrongARM and XScale) didn't show
- * this a worthwhile thing to do. That might be different in the future.
- */
-//#define CALGN(code...) code
-#define CALGN(code...)
-
-/*
- * Endian independent macros for shifting bytes within registers.
- */
-#ifndef __ARMEB__
-#define pull lsr
-#define push lsl
-#else
-#define pull lsl
-#define push lsr
-#endif
-
- .text
-
-/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
-
-ENTRY(memcpy)
-
- stmfd sp!, {r0, r4, lr}
- cfi_adjust_cfa_offset (12)
- cfi_rel_offset (r4, 4)
- cfi_rel_offset (lr, 8)
-
- cfi_remember_state
-
- subs r2, r2, #4
- blt 8f
- ands ip, r0, #3
- PLD( pld [r1, #0] )
- bne 9f
- ands ip, r1, #3
- bne 10f
-
-1: subs r2, r2, #(28)
- stmfd sp!, {r5 - r8}
- cfi_adjust_cfa_offset (16)
- cfi_rel_offset (r5, 0)
- cfi_rel_offset (r6, 4)
- cfi_rel_offset (r7, 8)
- cfi_rel_offset (r8, 12)
- blt 5f
-
- CALGN( ands ip, r1, #31 )
- CALGN( rsb r3, ip, #32 )
- CALGN( sbcnes r4, r3, r2 ) @ C is always set here
- CALGN( bcs 2f )
- CALGN( adr r4, 6f )
- CALGN( subs r2, r2, r3 ) @ C gets set
- CALGN( add pc, r4, ip )
-
- PLD( pld [r1, #0] )
-2: PLD( subs r2, r2, #96 )
- PLD( pld [r1, #28] )
- PLD( blt 4f )
- PLD( pld [r1, #60] )
- PLD( pld [r1, #92] )
-
-3: PLD( pld [r1, #124] )
-4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
- subs r2, r2, #32
- stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
- bge 3b
- PLD( cmn r2, #96 )
- PLD( bge 4b )
-
-5: ands ip, r2, #28
- rsb ip, ip, #32
- addne pc, pc, ip @ C is always clear here
- b 7f
-6: nop
- ldr r3, [r1], #4
- ldr r4, [r1], #4
- ldr r5, [r1], #4
- ldr r6, [r1], #4
- ldr r7, [r1], #4
- ldr r8, [r1], #4
- ldr lr, [r1], #4
-
- add pc, pc, ip
- nop
- nop
- str r3, [r0], #4
- str r4, [r0], #4
- str r5, [r0], #4
- str r6, [r0], #4
- str r7, [r0], #4
- str r8, [r0], #4
- str lr, [r0], #4
-
- CALGN( bcs 2b )
-
-7: ldmfd sp!, {r5 - r8}
- cfi_adjust_cfa_offset (-16)
- cfi_restore (r5)
- cfi_restore (r6)
- cfi_restore (r7)
- cfi_restore (r8)
-
-8: movs r2, r2, lsl #31
- ldrneb r3, [r1], #1
- ldrcsb r4, [r1], #1
- ldrcsb ip, [r1]
- strneb r3, [r0], #1
- strcsb r4, [r0], #1
- strcsb ip, [r0]
-
-#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
- ldmfd sp!, {r0, r4, lr}
- cfi_adjust_cfa_offset (-12)
- cfi_restore (r4)
- cfi_restore (lr)
- bx lr
-#else
- ldmfd sp!, {r0, r4, pc}
-#endif
-
- cfi_restore_state
-
-9: rsb ip, ip, #4
- cmp ip, #2
- ldrgtb r3, [r1], #1
- ldrgeb r4, [r1], #1
- ldrb lr, [r1], #1
- strgtb r3, [r0], #1
- strgeb r4, [r0], #1
- subs r2, r2, ip
- strb lr, [r0], #1
- blt 8b
- ands ip, r1, #3
- beq 1b
-
-10: bic r1, r1, #3
- cmp ip, #2
- ldr lr, [r1], #4
- beq 17f
- bgt 18f
-
-
- .macro forward_copy_shift pull push
-
- subs r2, r2, #28
- blt 14f
-
- CALGN( ands ip, r1, #31 )
- CALGN( rsb ip, ip, #32 )
- CALGN( sbcnes r4, ip, r2 ) @ C is always set here
- CALGN( subcc r2, r2, ip )
- CALGN( bcc 15f )
-
-11: stmfd sp!, {r5 - r9}
- cfi_adjust_cfa_offset (20)
- cfi_rel_offset (r5, 0)
- cfi_rel_offset (r6, 4)
- cfi_rel_offset (r7, 8)
- cfi_rel_offset (r8, 12)
- cfi_rel_offset (r9, 16)
-
- PLD( pld [r1, #0] )
- PLD( subs r2, r2, #96 )
- PLD( pld [r1, #28] )
- PLD( blt 13f )
- PLD( pld [r1, #60] )
- PLD( pld [r1, #92] )
-
-12: PLD( pld [r1, #124] )
-13: ldmia r1!, {r4, r5, r6, r7}
- mov r3, lr, pull #\pull
- subs r2, r2, #32
- ldmia r1!, {r8, r9, ip, lr}
- orr r3, r3, r4, push #\push
- mov r4, r4, pull #\pull
- orr r4, r4, r5, push #\push
- mov r5, r5, pull #\pull
- orr r5, r5, r6, push #\push
- mov r6, r6, pull #\pull
- orr r6, r6, r7, push #\push
- mov r7, r7, pull #\pull
- orr r7, r7, r8, push #\push
- mov r8, r8, pull #\pull
- orr r8, r8, r9, push #\push
- mov r9, r9, pull #\pull
- orr r9, r9, ip, push #\push
- mov ip, ip, pull #\pull
- orr ip, ip, lr, push #\push
- stmia r0!, {r3, r4, r5, r6, r7, r8, r9, ip}
- bge 12b
- PLD( cmn r2, #96 )
- PLD( bge 13b )
-
- ldmfd sp!, {r5 - r9}
- cfi_adjust_cfa_offset (-20)
- cfi_restore (r5)
- cfi_restore (r6)
- cfi_restore (r7)
- cfi_restore (r8)
- cfi_restore (r9)
-
-14: ands ip, r2, #28
- beq 16f
-
-15: mov r3, lr, pull #\pull
- ldr lr, [r1], #4
- subs ip, ip, #4
- orr r3, r3, lr, push #\push
- str r3, [r0], #4
- bgt 15b
- CALGN( cmp r2, #0 )
- CALGN( bge 11b )
-
-16: sub r1, r1, #(\push / 8)
- b 8b
-
- .endm
-
-
- forward_copy_shift pull=8 push=24
-
-17: forward_copy_shift pull=16 push=16
-
-18: forward_copy_shift pull=24 push=8
-
-END(memcpy)
diff --git a/android-tools/static-binary/src/memcpy_neon.S b/android-tools/static-binary/src/memcpy_neon.S
deleted file mode 100644
index fc8c316..0000000
--- a/android-tools/static-binary/src/memcpy_neon.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define MEMCPY_NEON
-#define memcpy ad_memcpy_neon
-#include "memcpy_impl.S"
diff --git a/android-tools/static-binary/src/memcpy_none.S b/android-tools/static-binary/src/memcpy_none.S
deleted file mode 100644
index e77d2af..0000000
--- a/android-tools/static-binary/src/memcpy_none.S
+++ /dev/null
@@ -1,2 +0,0 @@
-#define memcpy ad_memcpy_none
-#include "memcpy_impl.S"
diff --git a/android-tools/static-binary/src/memcpy_vfp.S b/android-tools/static-binary/src/memcpy_vfp.S
deleted file mode 100644
index b17f344..0000000
--- a/android-tools/static-binary/src/memcpy_vfp.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define MEMCPY_VFP
-#define memcpy ad_memcpy_vfp
-#include "memcpy_impl.S"