diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-03-03 12:06:09 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-03-03 12:06:09 -0800 |
commit | 8fd5e7a2d9574b3cac1c9264ad1aed3b613ed6fe (patch) | |
tree | 5696f5d31c6c75b71bfc4852fb234b773e266cfe /arch/metag/lib/memmove.S | |
parent | 529e5fbcd8d3cb48cf824ac8fde91cc80a9e985f (diff) | |
parent | c60ac31542e93499b58dcfc1e3f6550ba5b5728e (diff) |
Merge tag 'metag-v3.9-rc1-v4' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/metag
Pull new ImgTec Meta architecture from James Hogan:
"This adds core architecture support for Imagination's Meta processor
cores, followed by some later miscellaneous arch/metag cleanups and
fixes which I kept separate to ease review:
- Support for basic Meta 1 (ATP) and Meta 2 (HTP) core architecture
- A few fixes all over, particularly for symbol prefixes
- A few privilege protection fixes
- Several cleanups (setup.c includes, split out a lot of
metag_ksyms.c)
- Fix some missing exports
- Convert hugetlb to use vm_unmapped_area()
- Copy device tree to non-init memory
- Provide dma_get_sgtable()"
* tag 'metag-v3.9-rc1-v4' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/metag: (61 commits)
metag: Provide dma_get_sgtable()
metag: prom.h: remove declaration of metag_dt_memblock_reserve()
metag: copy devicetree to non-init memory
metag: cleanup metag_ksyms.c includes
metag: move mm/init.c exports out of metag_ksyms.c
metag: move usercopy.c exports out of metag_ksyms.c
metag: move setup.c exports out of metag_ksyms.c
metag: move kick.c exports out of metag_ksyms.c
metag: move traps.c exports out of metag_ksyms.c
metag: move irq enable out of irqflags.h on SMP
genksyms: fix metag symbol prefix on crc symbols
metag: hugetlb: convert to vm_unmapped_area()
metag: export clear_page and copy_page
metag: export metag_code_cache_flush_all
metag: protect more non-MMU memory regions
metag: make TXPRIVEXT bits explicit
metag: kernel/setup.c: sort includes
perf: Enable building perf tools for Meta
metag: add boot time LNKGET/LNKSET check
metag: add __init to metag_cache_probe()
...
Diffstat (limited to 'arch/metag/lib/memmove.S')
-rw-r--r-- | arch/metag/lib/memmove.S | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/arch/metag/lib/memmove.S b/arch/metag/lib/memmove.S new file mode 100644 index 00000000000..228ea04d7b3 --- /dev/null +++ b/arch/metag/lib/memmove.S @@ -0,0 +1,345 @@ +! Copyright (C) 2008-2012 Imagination Technologies Ltd. + + .text + .global _memmove + .type _memmove,function +! D1Ar1 dst +! D0Ar2 src +! D1Ar3 cnt +! D0Re0 dst +_memmove: + CMP D1Ar3, #0 + MOV D0Re0, D1Ar1 + BZ $LEND2 + MSETL [A0StP], D0.5, D0.6, D0.7 + MOV D1Ar5, D0Ar2 + CMP D1Ar1, D1Ar5 + BLT $Lforwards_copy + SUB D0Ar4, D1Ar1, D1Ar3 + ADD D0Ar4, D0Ar4, #1 + CMP D0Ar2, D0Ar4 + BLT $Lforwards_copy + ! should copy backwards + MOV D1Re0, D0Ar2 + ! adjust pointer to the end of mem + ADD D0Ar2, D1Re0, D1Ar3 + ADD D1Ar1, D1Ar1, D1Ar3 + + MOV A1.2, D0Ar2 + MOV A0.2, D1Ar1 + CMP D1Ar3, #8 + BLT $Lbbyte_loop + + MOV D0Ar4, D0Ar2 + MOV D1Ar5, D1Ar1 + + ! test 8 byte alignment + ANDS D1Ar5, D1Ar5, #7 + BNE $Lbdest_unaligned + + ANDS D0Ar4, D0Ar4, #7 + BNE $Lbsrc_unaligned + + LSR D1Ar5, D1Ar3, #3 + +$Lbaligned_loop: + GETL D0Re0, D1Re0, [--A1.2] + SETL [--A0.2], D0Re0, D1Re0 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbaligned_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit +$Lbbyte_loop: + GETB D1Re0, [--A1.2] + SETB [--A0.2], D1Re0 + SUBS D1Ar3, D1Ar3, #1 + BNE $Lbbyte_loop +$Lbbyte_loop_exit: + MOV D0Re0, A0.2 +$LEND: + SUB A0.2, A0StP, #24 + MGETL D0.5, D0.6, D0.7, [A0.2] + SUB A0StP, A0StP, #24 +$LEND2: + MOV PC, D1RtP + +$Lbdest_unaligned: + GETB D0Re0, [--A1.2] + SETB [--A0.2], D0Re0 + SUBS D1Ar5, D1Ar5, #1 + SUB D1Ar3, D1Ar3, #1 + BNE $Lbdest_unaligned + CMP D1Ar3, #8 + BLT $Lbbyte_loop +$Lbsrc_unaligned: + LSR D1Ar5, D1Ar3, #3 + ! adjust A1.2 + MOV D0Ar4, A1.2 + ! save original address + MOV D0Ar6, A1.2 + + ADD D0Ar4, D0Ar4, #7 + ANDMB D0Ar4, D0Ar4, #0xfff8 + ! new address is the 8-byte aligned one above the original + MOV A1.2, D0Ar4 + + ! A0.2 dst 64-bit is aligned + ! measure the gap size + SUB D0Ar6, D0Ar4, D0Ar6 + MOVS D0Ar4, D0Ar6 + ! keep this information for the later adjustment + ! both aligned + BZ $Lbaligned_loop + + ! prefetch + GETL D0Re0, D1Re0, [--A1.2] + + CMP D0Ar6, #4 + BLT $Lbunaligned_1_2_3 + ! 32-bit aligned + BZ $Lbaligned_4 + + SUB D0Ar6, D0Ar6, #4 + ! D1.6 stores the gap size in bits + MULW D1.6, D0Ar6, #8 + MOV D0.6, #32 + ! D0.6 stores the complement of the gap size + SUB D0.6, D0.6, D1.6 + +$Lbunaligned_5_6_7: + GETL D0.7, D1.7, [--A1.2] + ! form 64-bit data in D0Re0, D1Re0 + MOV D1Re0, D0Re0 + ! D1Re0 << gap-size + LSL D1Re0, D1Re0, D1.6 + MOV D0Re0, D1.7 + ! D0Re0 >> complement + LSR D0Re0, D0Re0, D0.6 + MOV D1.5, D0Re0 + ! combine the both + ADD D1Re0, D1Re0, D1.5 + + MOV D1.5, D1.7 + LSL D1.5, D1.5, D1.6 + MOV D0Re0, D0.7 + LSR D0Re0, D0Re0, D0.6 + MOV D0.5, D1.5 + ADD D0Re0, D0Re0, D0.5 + + SETL [--A0.2], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbunaligned_5_6_7 + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit + ! Adjust A1.2 + ! A1.2 <- A1.2 +8 - gapsize + ADD A1.2, A1.2, #8 + SUB A1.2, A1.2, D0Ar4 + B $Lbbyte_loop + +$Lbunaligned_1_2_3: + MULW D1.6, D0Ar6, #8 + MOV D0.6, #32 + SUB D0.6, D0.6, D1.6 + +$Lbunaligned_1_2_3_loop: + GETL D0.7, D1.7, [--A1.2] + ! form 64-bit data in D0Re0, D1Re0 + LSL D1Re0, D1Re0, D1.6 + ! save D0Re0 for later use + MOV D0.5, D0Re0 + LSR D0Re0, D0Re0, D0.6 + MOV D1.5, D0Re0 + ADD D1Re0, D1Re0, D1.5 + + ! orignal data in D0Re0 + MOV D1.5, D0.5 + LSL D1.5, D1.5, D1.6 + MOV D0Re0, D1.7 + LSR D0Re0, D0Re0, D0.6 + MOV D0.5, D1.5 + ADD D0Re0, D0Re0, D0.5 + + SETL [--A0.2], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbunaligned_1_2_3_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, #8 + SUB A1.2, A1.2, D0Ar4 + B $Lbbyte_loop + +$Lbaligned_4: + GETL D0.7, D1.7, [--A1.2] + MOV D1Re0, D0Re0 + MOV D0Re0, D1.7 + SETL [--A0.2], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbaligned_4 + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, #8 + SUB A1.2, A1.2, D0Ar4 + B $Lbbyte_loop + +$Lforwards_copy: + MOV A1.2, D0Ar2 + MOV A0.2, D1Ar1 + CMP D1Ar3, #8 + BLT $Lfbyte_loop + + MOV D0Ar4, D0Ar2 + MOV D1Ar5, D1Ar1 + + ANDS D1Ar5, D1Ar5, #7 + BNE $Lfdest_unaligned + + ANDS D0Ar4, D0Ar4, #7 + BNE $Lfsrc_unaligned + + LSR D1Ar5, D1Ar3, #3 + +$Lfaligned_loop: + GETL D0Re0, D1Re0, [A1.2++] + SUBS D1Ar5, D1Ar5, #1 + SETL [A0.2++], D0Re0, D1Re0 + BNE $Lfaligned_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit +$Lfbyte_loop: + GETB D1Re0, [A1.2++] + SETB [A0.2++], D1Re0 + SUBS D1Ar3, D1Ar3, #1 + BNE $Lfbyte_loop +$Lfbyte_loop_exit: + MOV D0Re0, D1Ar1 + B $LEND + +$Lfdest_unaligned: + GETB D0Re0, [A1.2++] + ADD D1Ar5, D1Ar5, #1 + SUB D1Ar3, D1Ar3, #1 + SETB [A0.2++], D0Re0 + CMP D1Ar5, #8 + BNE $Lfdest_unaligned + CMP D1Ar3, #8 + BLT $Lfbyte_loop +$Lfsrc_unaligned: + ! adjust A1.2 + LSR D1Ar5, D1Ar3, #3 + + MOV D0Ar4, A1.2 + MOV D0Ar6, A1.2 + ANDMB D0Ar4, D0Ar4, #0xfff8 + MOV A1.2, D0Ar4 + + ! A0.2 dst 64-bit is aligned + SUB D0Ar6, D0Ar6, D0Ar4 + ! keep the information for the later adjustment + MOVS D0Ar4, D0Ar6 + + ! both aligned + BZ $Lfaligned_loop + + ! prefetch + GETL D0Re0, D1Re0, [A1.2] + + CMP D0Ar6, #4 + BLT $Lfunaligned_1_2_3 + BZ $Lfaligned_4 + + SUB D0Ar6, D0Ar6, #4 + MULW D0.6, D0Ar6, #8 + MOV D1.6, #32 + SUB D1.6, D1.6, D0.6 + +$Lfunaligned_5_6_7: + GETL D0.7, D1.7, [++A1.2] + ! form 64-bit data in D0Re0, D1Re0 + MOV D0Re0, D1Re0 + LSR D0Re0, D0Re0, D0.6 + MOV D1Re0, D0.7 + LSL D1Re0, D1Re0, D1.6 + MOV D0.5, D1Re0 + ADD D0Re0, D0Re0, D0.5 + + MOV D0.5, D0.7 + LSR D0.5, D0.5, D0.6 + MOV D1Re0, D1.7 + LSL D1Re0, D1Re0, D1.6 + MOV D1.5, D0.5 + ADD D1Re0, D1Re0, D1.5 + + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lfunaligned_5_6_7 + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, D0Ar4 + B $Lfbyte_loop + +$Lfunaligned_1_2_3: + MULW D0.6, D0Ar6, #8 + MOV D1.6, #32 + SUB D1.6, D1.6, D0.6 + +$Lfunaligned_1_2_3_loop: + GETL D0.7, D1.7, [++A1.2] + ! form 64-bit data in D0Re0, D1Re0 + LSR D0Re0, D0Re0, D0.6 + MOV D1.5, D1Re0 + LSL D1Re0, D1Re0, D1.6 + MOV D0.5, D1Re0 + ADD D0Re0, D0Re0, D0.5 + + MOV D0.5, D1.5 + LSR D0.5, D0.5, D0.6 + MOV D1Re0, D0.7 + LSL D1Re0, D1Re0, D1.6 + MOV D1.5, D0.5 + ADD D1Re0, D1Re0, D1.5 + + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lfunaligned_1_2_3_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, D0Ar4 + B $Lfbyte_loop + +$Lfaligned_4: + GETL D0.7, D1.7, [++A1.2] + MOV D0Re0, D1Re0 + MOV D1Re0, D0.7 + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lfaligned_4 + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, D0Ar4 + B $Lfbyte_loop + + .size _memmove,.-_memmove |