aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristophe Lyon <christophe.lyon@linaro.org>2013-04-02 16:33:00 +0200
committerChristophe Lyon <christophe.lyon@linaro.org>2013-04-02 16:33:00 +0200
commit6238849811847b080577091a4a6b6c6658429716 (patch)
tree94e647b56a07e68c11fca963b0917f26b401f22b
parent3cf22210d185be694cb06509170ac694fbc40834 (diff)
parent00254af064643964e4c784a2ee2c19ea7f14e61a (diff)
Backport "Turn off 64bits ops in Neon" from mainline r196876.
-rw-r--r--ChangeLog.linaro34
-rw-r--r--gcc/config/arm/arm-protos.h2
-rw-r--r--gcc/config/arm/arm.c50
-rw-r--r--gcc/config/arm/arm.h7
-rw-r--r--gcc/config/arm/arm.md24
-rw-r--r--gcc/config/arm/arm.opt4
-rw-r--r--gcc/config/arm/neon.md14
-rw-r--r--gcc/doc/invoke.texi8
-rw-r--r--gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c54
-rw-r--r--gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c57
10 files changed, 220 insertions, 34 deletions
diff --git a/ChangeLog.linaro b/ChangeLog.linaro
index 748eeb230e5..b6d6fd9821f 100644
--- a/ChangeLog.linaro
+++ b/ChangeLog.linaro
@@ -1,3 +1,37 @@
+2013-02-12 Christophe Lyon <christophe.lyon@linaro.org>
+
+ Backport from mainline r196876:
+ 2013-02-12 Christophe Lyon <christophe.lyon@linaro.org>
+
+ gcc/
+ * config/arm/arm-protos.h (tune_params): Add
+ prefer_neon_for_64bits field.
+ * config/arm/arm.c (prefer_neon_for_64bits): New variable.
+ (arm_slowmul_tune): Default prefer_neon_for_64bits to false.
+ (arm_fastmul_tune, arm_strongarm_tune, arm_xscale_tune): Ditto.
+ (arm_9e_tune, arm_v6t2_tune, arm_cortex_tune): Ditto.
+ (arm_cortex_a5_tune, arm_cortex_a15_tune): Ditto.
+ (arm_cortex_a9_tune, arm_fa726te_tune): Ditto.
+ (arm_option_override): Handle -mneon-for-64bits new option.
+ * config/arm/arm.h (TARGET_PREFER_NEON_64BITS): New macro.
+ (prefer_neon_for_64bits): Declare new variable.
+ * config/arm/arm.md (arch): Rename neon_onlya8 and neon_nota8 to
+ avoid_neon_for_64bits and neon_for_64bits. Remove onlya8 and
+ nota8.
+ (arch_enabled): Handle new arch types. Remove support for onlya8
+ and nota8.
+ (one_cmpldi2): Use new arch names.
+ * config/arm/arm.opt (mneon-for-64bits): Add option.
+ * config/arm/neon.md (adddi3_neon, subdi3_neon, iordi3_neon)
+ (anddi3_neon, xordi3_neon, ashldi3_neon, <shift>di3_neon): Use
+ neon_for_64bits instead of nota8 and avoid_neon_for_64bits instead
+ of onlya8.
+ * doc/invoke.texi (-mneon-for-64bits): Document.
+
+ gcc/testsuite/
+ * gcc.target/arm/neon-for-64bits-1.c: New tests.
+ * gcc.target/arm/neon-for-64bits-2.c: Likewise.
+
2013-03-11 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org>
gcc/
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 9ad1540195d..297f876aa79 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -241,6 +241,8 @@ struct tune_params
int l1_cache_line_size;
bool prefer_constant_pool;
int (*branch_cost) (bool, bool);
+ /* Prefer Neon for 64-bit bitops. */
+ bool prefer_neon_for_64bits;
};
extern const struct tune_params *current_tune;
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 7788e727fcb..2e75610f0e7 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -806,6 +806,10 @@ int arm_arch_thumb2;
int arm_arch_arm_hwdiv;
int arm_arch_thumb_hwdiv;
+/* Nonzero if we should use Neon to handle 64-bits operations rather
+ than core registers. */
+int prefer_neon_for_64bits = 0;
+
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
we must report the mode of the memory reference from
TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
@@ -881,7 +885,9 @@ const struct tune_params arm_slowmul_tune =
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ false /* Prefer Neon for
+ 64-bits bitops. */
};
const struct tune_params arm_fastmul_tune =
@@ -892,7 +898,9 @@ const struct tune_params arm_fastmul_tune =
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ false /* Prefer Neon for
+ 64-bits bitops. */
};
/* StrongARM has early execution of branches, so a sequence that is worth
@@ -906,7 +914,9 @@ const struct tune_params arm_strongarm_tune =
3, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ false /* Prefer Neon for
+ 64-bits bitops. */
};
const struct tune_params arm_xscale_tune =
@@ -917,7 +927,9 @@ const struct tune_params arm_xscale_tune =
3, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ false /* Prefer Neon for
+ 64-bits bitops. */
};
const struct tune_params arm_9e_tune =
@@ -928,7 +940,9 @@ const struct tune_params arm_9e_tune =
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ false /* Prefer Neon for
+ 64-bits bitops. */
};
const struct tune_params arm_v6t2_tune =
@@ -939,7 +953,9 @@ const struct tune_params arm_v6t2_tune =
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
false, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ false /* Prefer Neon for
+ 64-bits bitops. */
};
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
@@ -951,7 +967,9 @@ const struct tune_params arm_cortex_tune =
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
false, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ false /* Prefer Neon for
+ 64-bits bitops. */
};
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
@@ -965,7 +983,9 @@ const struct tune_params arm_cortex_a5_tune =
1, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
false, /* Prefer constant pool. */
- arm_cortex_a5_branch_cost
+ arm_cortex_a5_branch_cost,
+ false /* Prefer Neon for
+ 64-bits bitops. */
};
const struct tune_params arm_cortex_a9_tune =
@@ -976,7 +996,9 @@ const struct tune_params arm_cortex_a9_tune =
5, /* Max cond insns. */
ARM_PREFETCH_BENEFICIAL(4,32,32),
false, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ false /* Prefer Neon for
+ 64-bits bitops. */
};
const struct tune_params arm_fa726te_tune =
@@ -987,7 +1009,9 @@ const struct tune_params arm_fa726te_tune =
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ false /* Prefer Neon for
+ 64-bits bitops. */
};
@@ -2038,6 +2062,12 @@ arm_option_override (void)
global_options.x_param_values,
global_options_set.x_param_values);
+ /* Use Neon to perform 64-bits operations rather than core
+ registers. */
+ prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
+ if (use_neon_for_64bits == 1)
+ prefer_neon_for_64bits = true;
+
/* Register global variables with the garbage collector. */
arm_add_gc_roots ();
}
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 1e2f542e806..eabc6f8e03b 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -296,6 +296,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
#define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \
|| (TARGET_THUMB2 && arm_arch_thumb_hwdiv))
+/* Should NEON be used for 64-bits bitops. */
+#define TARGET_PREFER_NEON_64BITS (prefer_neon_for_64bits)
+
/* True iff the full BPABI is being used. If TARGET_BPABI is true,
then TARGET_AAPCS_BASED must be true -- but the converse does not
hold. TARGET_BPABI implies the use of the BPABI runtime library,
@@ -447,6 +450,10 @@ extern int arm_arch_arm_hwdiv;
/* Nonzero if chip supports integer division instruction in Thumb mode. */
extern int arm_arch_thumb_hwdiv;
+/* Nonzero if we should use Neon to handle 64-bits operations rather
+ than core registers. */
+extern int prefer_neon_for_64bits;
+
#ifndef TARGET_DEFAULT
#define TARGET_DEFAULT (MASK_APCS_FRAME)
#endif
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index bd3ba19b44e..85f9609dfd3 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -196,7 +196,7 @@
; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without
; arm_arch6. This attribute is used to compute attribute "enabled",
; use type "any" to enable an alternative in all cases.
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,onlya8,neon_onlya8,nota8,neon_nota8"
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,neon_for_64bits,avoid_neon_for_64bits"
(const_string "any"))
(define_attr "arch_enabled" "no,yes"
@@ -231,22 +231,14 @@
(match_test "TARGET_32BIT && !arm_arch6"))
(const_string "yes")
- (and (eq_attr "arch" "onlya8")
- (eq_attr "tune" "cortexa8"))
+ (and (eq_attr "arch" "avoid_neon_for_64bits")
+ (match_test "TARGET_NEON")
+ (not (match_test "TARGET_PREFER_NEON_64BITS")))
(const_string "yes")
- (and (eq_attr "arch" "neon_onlya8")
- (eq_attr "tune" "cortexa8")
- (match_test "TARGET_NEON"))
- (const_string "yes")
-
- (and (eq_attr "arch" "nota8")
- (not (eq_attr "tune" "cortexa8")))
- (const_string "yes")
-
- (and (eq_attr "arch" "neon_nota8")
- (not (eq_attr "tune" "cortexa8"))
- (match_test "TARGET_NEON"))
+ (and (eq_attr "arch" "neon_for_64bits")
+ (match_test "TARGET_NEON")
+ (match_test "TARGET_PREFER_NEON_64BITS"))
(const_string "yes")]
(const_string "no")))
@@ -4439,7 +4431,7 @@
[(set_attr "length" "*,8,8,*")
(set_attr "predicable" "no,yes,yes,no")
(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
- (set_attr "arch" "neon_nota8,*,*,neon_onlya8")]
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
)
(define_expand "one_cmplsi2"
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index 934aa35775e..bcd1ef0a668 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -267,3 +267,7 @@ that may trigger Cortex-M3 errata.
munaligned-access
Target Report Var(unaligned_access) Init(2)
Enable unaligned word and halfword accesses to packed data.
+
+mneon-for-64bits
+Target Report RejectNegative Var(use_neon_for_64bits) Init(0)
+Use Neon to perform 64-bits operations rather than core registers.
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index c3ee904b49c..12d5f2dc00a 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -618,7 +618,7 @@
[(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
(set_attr "conds" "*,clob,clob,*,clob,clob,clob")
(set_attr "length" "*,8,8,*,8,8,8")
- (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
)
(define_insn "*sub<mode>3_neon"
@@ -655,7 +655,7 @@
[(set_attr "neon_type" "neon_int_2,*,*,*,neon_int_2")
(set_attr "conds" "*,clob,clob,clob,*")
(set_attr "length" "*,8,8,8,*")
- (set_attr "arch" "nota8,*,*,*,onlya8")]
+ (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
)
(define_insn "*mul<mode>3_neon"
@@ -817,7 +817,7 @@
}
[(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
(set_attr "length" "*,*,8,8,*,*")
- (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
+ (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
)
;; The concrete forms of the Neon immediate-logic instructions are vbic and
@@ -862,7 +862,7 @@
}
[(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
(set_attr "length" "*,*,8,8,*,*")
- (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
+ (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
)
(define_insn "orn<mode>3_neon"
@@ -958,7 +958,7 @@
veor\t%P0, %P1, %P2"
[(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
(set_attr "length" "*,8,8,*")
- (set_attr "arch" "nota8,*,*,onlya8")]
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
)
(define_insn "one_cmpl<mode>2"
@@ -1280,7 +1280,7 @@
}
DONE;
}"
- [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
+ [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
(set_attr "opt" "*,*,speed,speed,*,*")]
)
@@ -1381,7 +1381,7 @@
DONE;
}"
- [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
+ [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
(set_attr "opt" "*,*,speed,speed,*,*")]
)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 90e9229a1a1..f7ca1b2f0e5 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -505,7 +505,8 @@ Objective-C and Objective-C++ Dialects}.
-mtp=@var{name} -mtls-dialect=@var{dialect} @gol
-mword-relocations @gol
-mfix-cortex-m3-ldrd @gol
--munaligned-access}
+-munaligned-access @gol
+-mneon-for-64bits}
@emph{AVR Options}
@gccoptlist{-mmcu=@var{mcu} -maccumulate-args -mbranch-cost=@var{cost} @gol
@@ -11103,6 +11104,11 @@ setting of this option. If unaligned access is enabled then the
preprocessor symbol @code{__ARM_FEATURE_UNALIGNED} will also be
defined.
+@item -mneon-for-64bits
+@opindex mneon-for-64bits
+Enables using Neon to handle scalar 64-bits operations. This is
+disabled by default since the cost of moving data from core registers
+to Neon is high.
@end table
@node AVR Options
diff --git a/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c b/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c
new file mode 100644
index 00000000000..a2a4103b9a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c
@@ -0,0 +1,54 @@
+/* Check that Neon is *not* used by default to handle 64-bits scalar
+ operations. */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+typedef long long i64;
+typedef unsigned long long u64;
+typedef unsigned int u32;
+typedef int i32;
+
+/* Unary operators */
+#define UNARY_OP(name, op) \
+ void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; }
+
+/* Binary operators */
+#define BINARY_OP(name, op) \
+ void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; }
+
+/* Unsigned shift */
+#define SHIFT_U(name, op, amount) \
+ void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; }
+
+/* Signed shift */
+#define SHIFT_S(name, op, amount) \
+ void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; }
+
+UNARY_OP(not, ~)
+
+BINARY_OP(add, +)
+BINARY_OP(sub, -)
+BINARY_OP(and, &)
+BINARY_OP(or, |)
+BINARY_OP(xor, ^)
+
+SHIFT_U(right1, >>, 1)
+SHIFT_U(right2, >>, 2)
+SHIFT_U(right5, >>, 5)
+SHIFT_U(rightn, >>, c)
+
+SHIFT_S(right1, >>, 1)
+SHIFT_S(right2, >>, 2)
+SHIFT_S(right5, >>, 5)
+SHIFT_S(rightn, >>, c)
+
+/* { dg-final {scan-assembler-times "vmvn" 0} } */
+/* { dg-final {scan-assembler-times "vadd" 0} } */
+/* { dg-final {scan-assembler-times "vsub" 0} } */
+/* { dg-final {scan-assembler-times "vand" 0} } */
+/* { dg-final {scan-assembler-times "vorr" 0} } */
+/* { dg-final {scan-assembler-times "veor" 0} } */
+/* { dg-final {scan-assembler-times "vshr" 0} } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c b/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c
new file mode 100644
index 00000000000..035bfb77a37
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c
@@ -0,0 +1,57 @@
+/* Check that Neon is used to handle 64-bits scalar operations. */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mneon-for-64bits" } */
+/* { dg-add-options arm_neon } */
+
+typedef long long i64;
+typedef unsigned long long u64;
+typedef unsigned int u32;
+typedef int i32;
+
+/* Unary operators */
+#define UNARY_OP(name, op) \
+ void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; }
+
+/* Binary operators */
+#define BINARY_OP(name, op) \
+ void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; }
+
+/* Unsigned shift */
+#define SHIFT_U(name, op, amount) \
+ void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; }
+
+/* Signed shift */
+#define SHIFT_S(name, op, amount) \
+ void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; }
+
+UNARY_OP(not, ~)
+
+BINARY_OP(add, +)
+BINARY_OP(sub, -)
+BINARY_OP(and, &)
+BINARY_OP(or, |)
+BINARY_OP(xor, ^)
+
+SHIFT_U(right1, >>, 1)
+SHIFT_U(right2, >>, 2)
+SHIFT_U(right5, >>, 5)
+SHIFT_U(rightn, >>, c)
+
+SHIFT_S(right1, >>, 1)
+SHIFT_S(right2, >>, 2)
+SHIFT_S(right5, >>, 5)
+SHIFT_S(rightn, >>, c)
+
+/* { dg-final {scan-assembler-times "vmvn" 1} } */
+/* Two vadd: 1 in unary_not, 1 in binary_add */
+/* { dg-final {scan-assembler-times "vadd" 2} } */
+/* { dg-final {scan-assembler-times "vsub" 1} } */
+/* { dg-final {scan-assembler-times "vand" 1} } */
+/* { dg-final {scan-assembler-times "vorr" 1} } */
+/* { dg-final {scan-assembler-times "veor" 1} } */
+/* 6 vshr for right shifts by constant, and variable right shift uses
+ vshl with a negative amount in register. */
+/* { dg-final {scan-assembler-times "vshr" 6} } */
+/* { dg-final {scan-assembler-times "vshl" 2} } */