aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorChristophe Lyon <christophe.lyon@linaro.org>2013-06-05 15:02:26 +0000
committerChristophe Lyon <christophe.lyon@linaro.org>2013-06-05 15:02:26 +0000
commitbfa414d888c437f1738fd9b72cc3fa0748e6c8db (patch)
tree38a68502e271cda972b3b48d7438b3768a270c5a /gcc/config
parent3970692648055ed4e80ef13533ee57caf46e63fa (diff)
2013-05-23 Christophe Lyon <christophe.lyon@linaro.org>
Backport from trunk r198970. 2013-05-16 Greta Yorsh <Greta.Yorsh@arm.com> * config/arm/arm-protos.h (gen_movmem_ldrd_strd): New declaration. * config/arm/arm.c (next_consecutive_mem): New function. (gen_movmem_ldrd_strd): Likewise. * config/arm/arm.md (movmemqi): Update condition and code. (unaligned_loaddi, unaligned_storedi): New patterns. git-svn-id: https://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_8-branch@199696 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/arm/arm-protos.h1
-rw-r--r--gcc/config/arm/arm.c128
-rw-r--r--gcc/config/arm/arm.md68
3 files changed, 196 insertions, 1 deletions
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index a6af9275712..c791341f69b 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -120,6 +120,7 @@ extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT);
extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool);
extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool);
extern int arm_gen_movmemqi (rtx *);
+extern bool gen_movmem_ldrd_strd (rtx *);
extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx,
HOST_WIDE_INT);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 2399423ce9a..75e3cf35a9d 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -11840,6 +11840,134 @@ arm_gen_movmemqi (rtx *operands)
return 1;
}
+/* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
+by mode size. */
+inline static rtx
+next_consecutive_mem (rtx mem)
+{
+ enum machine_mode mode = GET_MODE (mem);
+ HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
+ rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
+
+ return adjust_automodify_address (mem, mode, addr, offset);
+}
+
+/* Copy using LDRD/STRD instructions whenever possible.
+ Returns true upon success. */
+bool
+gen_movmem_ldrd_strd (rtx *operands)
+{
+ unsigned HOST_WIDE_INT len;
+ HOST_WIDE_INT align;
+ rtx src, dst, base;
+ rtx reg0;
+ bool src_aligned, dst_aligned;
+ bool src_volatile, dst_volatile;
+
+ gcc_assert (CONST_INT_P (operands[2]));
+ gcc_assert (CONST_INT_P (operands[3]));
+
+ len = UINTVAL (operands[2]);
+ if (len > 64)
+ return false;
+
+ /* Maximum alignment we can assume for both src and dst buffers. */
+ align = INTVAL (operands[3]);
+
+ if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
+ return false;
+
+ /* Place src and dst addresses in registers
+ and update the corresponding mem rtx. */
+ dst = operands[0];
+ dst_volatile = MEM_VOLATILE_P (dst);
+ dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
+ base = copy_to_mode_reg (SImode, XEXP (dst, 0));
+ dst = adjust_automodify_address (dst, VOIDmode, base, 0);
+
+ src = operands[1];
+ src_volatile = MEM_VOLATILE_P (src);
+ src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
+ base = copy_to_mode_reg (SImode, XEXP (src, 0));
+ src = adjust_automodify_address (src, VOIDmode, base, 0);
+
+ if (!unaligned_access && !(src_aligned && dst_aligned))
+ return false;
+
+ if (src_volatile || dst_volatile)
+ return false;
+
+ /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
+ if (!(dst_aligned || src_aligned))
+ return arm_gen_movmemqi (operands);
+
+ src = adjust_address (src, DImode, 0);
+ dst = adjust_address (dst, DImode, 0);
+ while (len >= 8)
+ {
+ len -= 8;
+ reg0 = gen_reg_rtx (DImode);
+ if (src_aligned)
+ emit_move_insn (reg0, src);
+ else
+ emit_insn (gen_unaligned_loaddi (reg0, src));
+
+ if (dst_aligned)
+ emit_move_insn (dst, reg0);
+ else
+ emit_insn (gen_unaligned_storedi (dst, reg0));
+
+ src = next_consecutive_mem (src);
+ dst = next_consecutive_mem (dst);
+ }
+
+ gcc_assert (len < 8);
+ if (len >= 4)
+ {
+ /* More than a word but less than a double-word to copy. Copy a word. */
+ reg0 = gen_reg_rtx (SImode);
+ src = adjust_address (src, SImode, 0);
+ dst = adjust_address (dst, SImode, 0);
+ if (src_aligned)
+ emit_move_insn (reg0, src);
+ else
+ emit_insn (gen_unaligned_loadsi (reg0, src));
+
+ if (dst_aligned)
+ emit_move_insn (dst, reg0);
+ else
+ emit_insn (gen_unaligned_storesi (dst, reg0));
+
+ src = next_consecutive_mem (src);
+ dst = next_consecutive_mem (dst);
+ len -= 4;
+ }
+
+ if (len == 0)
+ return true;
+
+ /* Copy the remaining bytes. */
+ if (len >= 2)
+ {
+ dst = adjust_address (dst, HImode, 0);
+ src = adjust_address (src, HImode, 0);
+ reg0 = gen_reg_rtx (SImode);
+ emit_insn (gen_unaligned_loadhiu (reg0, src));
+ emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
+ src = next_consecutive_mem (src);
+ dst = next_consecutive_mem (dst);
+ if (len == 2)
+ return true;
+ }
+
+ dst = adjust_address (dst, QImode, 0);
+ src = adjust_address (src, QImode, 0);
+ reg0 = gen_reg_rtx (QImode);
+ emit_move_insn (reg0, src);
+ emit_move_insn (dst, reg0);
+ return true;
+}
+
/* Select a dominance comparison mode if possible for a test of the general
form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
COND_OR == DOM_CC_X_AND_Y => (X && Y)
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 1026ddeba3b..f2fa54ad3c9 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -4402,6 +4402,64 @@
(set_attr "predicable" "yes")
(set_attr "type" "store1")])
+;; Unaligned double-word load and store.
+;; Split after reload into two unaligned single-word accesses.
+;; It prevents lower_subreg from splitting some other aligned
+;; double-word accesses too early. Used for internal memcpy.
+
+(define_insn_and_split "unaligned_loaddi"
+ [(set (match_operand:DI 0 "s_register_operand" "=l,r")
+ (unspec:DI [(match_operand:DI 1 "memory_operand" "o,o")]
+ UNSPEC_UNALIGNED_LOAD))]
+ "unaligned_access && TARGET_32BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_LOAD))
+ (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_LOAD))]
+ {
+ operands[2] = gen_highpart (SImode, operands[0]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[3] = gen_highpart (SImode, operands[1]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+
+ /* If the first destination register overlaps with the base address,
+ swap the order in which the loads are emitted. */
+ if (reg_overlap_mentioned_p (operands[0], operands[1]))
+ {
+ rtx tmp = operands[1];
+ operands[1] = operands[3];
+ operands[3] = tmp;
+ tmp = operands[0];
+ operands[0] = operands[2];
+ operands[2] = tmp;
+ }
+ }
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "4,8")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "load2")])
+
+(define_insn_and_split "unaligned_storedi"
+ [(set (match_operand:DI 0 "memory_operand" "=o,o")
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" "l,r")]
+ UNSPEC_UNALIGNED_STORE))]
+ "unaligned_access && TARGET_32BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_STORE))
+ (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_STORE))]
+ {
+ operands[2] = gen_highpart (SImode, operands[0]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[3] = gen_highpart (SImode, operands[1]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ }
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "4,8")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "store2")])
+
+
(define_insn "*extv_reg"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
@@ -7355,10 +7413,18 @@
(match_operand:BLK 1 "general_operand" "")
(match_operand:SI 2 "const_int_operand" "")
(match_operand:SI 3 "const_int_operand" "")]
- "TARGET_EITHER"
+ ""
"
if (TARGET_32BIT)
{
+ if (TARGET_LDRD && current_tune->prefer_ldrd_strd
+ && !optimize_function_for_size_p (cfun))
+ {
+ if (gen_movmem_ldrd_strd (operands))
+ DONE;
+ FAIL;
+ }
+
if (arm_gen_movmemqi (operands))
DONE;
FAIL;