diff options
author | Christophe Lyon <christophe.lyon@linaro.org> | 2013-06-05 15:02:26 +0000 |
---|---|---|
committer | Christophe Lyon <christophe.lyon@linaro.org> | 2013-06-05 15:02:26 +0000 |
commit | bfa414d888c437f1738fd9b72cc3fa0748e6c8db (patch) | |
tree | 38a68502e271cda972b3b48d7438b3768a270c5a /gcc/config | |
parent | 3970692648055ed4e80ef13533ee57caf46e63fa (diff) |
2013-05-23 Christophe Lyon <christophe.lyon@linaro.org>
Backport from trunk r198970.
2013-05-16 Greta Yorsh <Greta.Yorsh@arm.com>
* config/arm/arm-protos.h (gen_movmem_ldrd_strd): New declaration.
* config/arm/arm.c (next_consecutive_mem): New function.
(gen_movmem_ldrd_strd): Likewise.
* config/arm/arm.md (movmemqi): Update condition and code.
(unaligned_loaddi, unaligned_storedi): New patterns.
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_8-branch@199696 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/arm/arm-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 128 | ||||
-rw-r--r-- | gcc/config/arm/arm.md | 68 |
3 files changed, 196 insertions, 1 deletions
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index a6af9275712..c791341f69b 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -120,6 +120,7 @@ extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT); extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool); extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool); extern int arm_gen_movmemqi (rtx *); +extern bool gen_movmem_ldrd_strd (rtx *); extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx); extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx, HOST_WIDE_INT); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 2399423ce9a..75e3cf35a9d 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -11840,6 +11840,134 @@ arm_gen_movmemqi (rtx *operands) return 1; } +/* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx +by mode size. */ +inline static rtx +next_consecutive_mem (rtx mem) +{ + enum machine_mode mode = GET_MODE (mem); + HOST_WIDE_INT offset = GET_MODE_SIZE (mode); + rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset); + + return adjust_automodify_address (mem, mode, addr, offset); +} + +/* Copy using LDRD/STRD instructions whenever possible. + Returns true upon success. */ +bool +gen_movmem_ldrd_strd (rtx *operands) +{ + unsigned HOST_WIDE_INT len; + HOST_WIDE_INT align; + rtx src, dst, base; + rtx reg0; + bool src_aligned, dst_aligned; + bool src_volatile, dst_volatile; + + gcc_assert (CONST_INT_P (operands[2])); + gcc_assert (CONST_INT_P (operands[3])); + + len = UINTVAL (operands[2]); + if (len > 64) + return false; + + /* Maximum alignment we can assume for both src and dst buffers. */ + align = INTVAL (operands[3]); + + if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0)) + return false; + + /* Place src and dst addresses in registers + and update the corresponding mem rtx. */ + dst = operands[0]; + dst_volatile = MEM_VOLATILE_P (dst); + dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD; + base = copy_to_mode_reg (SImode, XEXP (dst, 0)); + dst = adjust_automodify_address (dst, VOIDmode, base, 0); + + src = operands[1]; + src_volatile = MEM_VOLATILE_P (src); + src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD; + base = copy_to_mode_reg (SImode, XEXP (src, 0)); + src = adjust_automodify_address (src, VOIDmode, base, 0); + + if (!unaligned_access && !(src_aligned && dst_aligned)) + return false; + + if (src_volatile || dst_volatile) + return false; + + /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */ + if (!(dst_aligned || src_aligned)) + return arm_gen_movmemqi (operands); + + src = adjust_address (src, DImode, 0); + dst = adjust_address (dst, DImode, 0); + while (len >= 8) + { + len -= 8; + reg0 = gen_reg_rtx (DImode); + if (src_aligned) + emit_move_insn (reg0, src); + else + emit_insn (gen_unaligned_loaddi (reg0, src)); + + if (dst_aligned) + emit_move_insn (dst, reg0); + else + emit_insn (gen_unaligned_storedi (dst, reg0)); + + src = next_consecutive_mem (src); + dst = next_consecutive_mem (dst); + } + + gcc_assert (len < 8); + if (len >= 4) + { + /* More than a word but less than a double-word to copy. Copy a word. */ + reg0 = gen_reg_rtx (SImode); + src = adjust_address (src, SImode, 0); + dst = adjust_address (dst, SImode, 0); + if (src_aligned) + emit_move_insn (reg0, src); + else + emit_insn (gen_unaligned_loadsi (reg0, src)); + + if (dst_aligned) + emit_move_insn (dst, reg0); + else + emit_insn (gen_unaligned_storesi (dst, reg0)); + + src = next_consecutive_mem (src); + dst = next_consecutive_mem (dst); + len -= 4; + } + + if (len == 0) + return true; + + /* Copy the remaining bytes. */ + if (len >= 2) + { + dst = adjust_address (dst, HImode, 0); + src = adjust_address (src, HImode, 0); + reg0 = gen_reg_rtx (SImode); + emit_insn (gen_unaligned_loadhiu (reg0, src)); + emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0))); + src = next_consecutive_mem (src); + dst = next_consecutive_mem (dst); + if (len == 2) + return true; + } + + dst = adjust_address (dst, QImode, 0); + src = adjust_address (src, QImode, 0); + reg0 = gen_reg_rtx (QImode); + emit_move_insn (reg0, src); + emit_move_insn (dst, reg0); + return true; +} + /* Select a dominance comparison mode if possible for a test of the general form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms. COND_OR == DOM_CC_X_AND_Y => (X && Y) diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 1026ddeba3b..f2fa54ad3c9 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -4402,6 +4402,64 @@ (set_attr "predicable" "yes") (set_attr "type" "store1")]) +;; Unaligned double-word load and store. +;; Split after reload into two unaligned single-word accesses. +;; It prevents lower_subreg from splitting some other aligned +;; double-word accesses too early. Used for internal memcpy. + +(define_insn_and_split "unaligned_loaddi" + [(set (match_operand:DI 0 "s_register_operand" "=l,r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "o,o")] + UNSPEC_UNALIGNED_LOAD))] + "unaligned_access && TARGET_32BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_LOAD)) + (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_LOAD))] + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + + /* If the first destination register overlaps with the base address, + swap the order in which the loads are emitted. */ + if (reg_overlap_mentioned_p (operands[0], operands[1])) + { + rtx tmp = operands[1]; + operands[1] = operands[3]; + operands[3] = tmp; + tmp = operands[0]; + operands[0] = operands[2]; + operands[2] = tmp; + } + } + [(set_attr "arch" "t2,any") + (set_attr "length" "4,8") + (set_attr "predicable" "yes") + (set_attr "type" "load2")]) + +(define_insn_and_split "unaligned_storedi" + [(set (match_operand:DI 0 "memory_operand" "=o,o") + (unspec:DI [(match_operand:DI 1 "s_register_operand" "l,r")] + UNSPEC_UNALIGNED_STORE))] + "unaligned_access && TARGET_32BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_STORE)) + (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_STORE))] + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } + [(set_attr "arch" "t2,any") + (set_attr "length" "4,8") + (set_attr "predicable" "yes") + (set_attr "type" "store2")]) + + (define_insn "*extv_reg" [(set (match_operand:SI 0 "s_register_operand" "=r") (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r") @@ -7355,10 +7413,18 @@ (match_operand:BLK 1 "general_operand" "") (match_operand:SI 2 "const_int_operand" "") (match_operand:SI 3 "const_int_operand" "")] - "TARGET_EITHER" + "" " if (TARGET_32BIT) { + if (TARGET_LDRD && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)) + { + if (gen_movmem_ldrd_strd (operands)) + DONE; + FAIL; + } + if (arm_gen_movmemqi (operands)) DONE; FAIL; |