diff options
author | Christophe Lyon <christophe.lyon@linaro.org> | 2013-04-02 16:33:00 +0200 |
---|---|---|
committer | Christophe Lyon <christophe.lyon@linaro.org> | 2013-04-02 16:33:00 +0200 |
commit | 6238849811847b080577091a4a6b6c6658429716 (patch) | |
tree | 94e647b56a07e68c11fca963b0917f26b401f22b | |
parent | 3cf22210d185be694cb06509170ac694fbc40834 (diff) | |
parent | 00254af064643964e4c784a2ee2c19ea7f14e61a (diff) |
Backport "Turn off 64bits ops in Neon" from mainline r196876.
-rw-r--r-- | ChangeLog.linaro | 34 | ||||
-rw-r--r-- | gcc/config/arm/arm-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 50 | ||||
-rw-r--r-- | gcc/config/arm/arm.h | 7 | ||||
-rw-r--r-- | gcc/config/arm/arm.md | 24 | ||||
-rw-r--r-- | gcc/config/arm/arm.opt | 4 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 14 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c | 54 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c | 57 |
10 files changed, 220 insertions, 34 deletions
diff --git a/ChangeLog.linaro b/ChangeLog.linaro index 748eeb230e5..b6d6fd9821f 100644 --- a/ChangeLog.linaro +++ b/ChangeLog.linaro @@ -1,3 +1,37 @@ +2013-02-12 Christophe Lyon <christophe.lyon@linaro.org> + + Backport from mainline r196876: + 2013-02-12 Christophe Lyon <christophe.lyon@linaro.org> + + gcc/ + * config/arm/arm-protos.h (tune_params): Add + prefer_neon_for_64bits field. + * config/arm/arm.c (prefer_neon_for_64bits): New variable. + (arm_slowmul_tune): Default prefer_neon_for_64bits to false. + (arm_fastmul_tune, arm_strongarm_tune, arm_xscale_tune): Ditto. + (arm_9e_tune, arm_v6t2_tune, arm_cortex_tune): Ditto. + (arm_cortex_a5_tune, arm_cortex_a15_tune): Ditto. + (arm_cortex_a9_tune, arm_fa726te_tune): Ditto. + (arm_option_override): Handle -mneon-for-64bits new option. + * config/arm/arm.h (TARGET_PREFER_NEON_64BITS): New macro. + (prefer_neon_for_64bits): Declare new variable. + * config/arm/arm.md (arch): Rename neon_onlya8 and neon_nota8 to + avoid_neon_for_64bits and neon_for_64bits. Remove onlya8 and + nota8. + (arch_enabled): Handle new arch types. Remove support for onlya8 + and nota8. + (one_cmpldi2): Use new arch names. + * config/arm/arm.opt (mneon-for-64bits): Add option. + * config/arm/neon.md (adddi3_neon, subdi3_neon, iordi3_neon) + (anddi3_neon, xordi3_neon, ashldi3_neon, <shift>di3_neon): Use + neon_for_64bits instead of nota8 and avoid_neon_for_64bits instead + of onlya8. + * doc/invoke.texi (-mneon-for-64bits): Document. + + gcc/testsuite/ + * gcc.target/arm/neon-for-64bits-1.c: New tests. + * gcc.target/arm/neon-for-64bits-2.c: Likewise. + 2013-03-11 Matthew Gretton-Dann <matthew.gretton-dann@linaro.org> gcc/ diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 9ad1540195d..297f876aa79 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -241,6 +241,8 @@ struct tune_params int l1_cache_line_size; bool prefer_constant_pool; int (*branch_cost) (bool, bool); + /* Prefer Neon for 64-bit bitops. */ + bool prefer_neon_for_64bits; }; extern const struct tune_params *current_tune; diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 7788e727fcb..2e75610f0e7 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -806,6 +806,10 @@ int arm_arch_thumb2; int arm_arch_arm_hwdiv; int arm_arch_thumb_hwdiv; +/* Nonzero if we should use Neon to handle 64-bits operations rather + than core registers. */ +int prefer_neon_for_64bits = 0; + /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we must report the mode of the memory reference from TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */ @@ -881,7 +885,9 @@ const struct tune_params arm_slowmul_tune = 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer Neon for + 64-bits bitops. */ }; const struct tune_params arm_fastmul_tune = @@ -892,7 +898,9 @@ const struct tune_params arm_fastmul_tune = 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer Neon for + 64-bits bitops. */ }; /* StrongARM has early execution of branches, so a sequence that is worth @@ -906,7 +914,9 @@ const struct tune_params arm_strongarm_tune = 3, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer Neon for + 64-bits bitops. */ }; const struct tune_params arm_xscale_tune = @@ -917,7 +927,9 @@ const struct tune_params arm_xscale_tune = 3, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer Neon for + 64-bits bitops. */ }; const struct tune_params arm_9e_tune = @@ -928,7 +940,9 @@ const struct tune_params arm_9e_tune = 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer Neon for + 64-bits bitops. */ }; const struct tune_params arm_v6t2_tune = @@ -939,7 +953,9 @@ const struct tune_params arm_v6t2_tune = 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer Neon for + 64-bits bitops. */ }; /* Generic Cortex tuning. Use more specific tunings if appropriate. */ @@ -951,7 +967,9 @@ const struct tune_params arm_cortex_tune = 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer Neon for + 64-bits bitops. */ }; /* Branches can be dual-issued on Cortex-A5, so conditional execution is @@ -965,7 +983,9 @@ const struct tune_params arm_cortex_a5_tune = 1, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ - arm_cortex_a5_branch_cost + arm_cortex_a5_branch_cost, + false /* Prefer Neon for + 64-bits bitops. */ }; const struct tune_params arm_cortex_a9_tune = @@ -976,7 +996,9 @@ const struct tune_params arm_cortex_a9_tune = 5, /* Max cond insns. */ ARM_PREFETCH_BENEFICIAL(4,32,32), false, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer Neon for + 64-bits bitops. */ }; const struct tune_params arm_fa726te_tune = @@ -987,7 +1009,9 @@ const struct tune_params arm_fa726te_tune = 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ - arm_default_branch_cost + arm_default_branch_cost, + false /* Prefer Neon for + 64-bits bitops. */ }; @@ -2038,6 +2062,12 @@ arm_option_override (void) global_options.x_param_values, global_options_set.x_param_values); + /* Use Neon to perform 64-bits operations rather than core + registers. */ + prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits; + if (use_neon_for_64bits == 1) + prefer_neon_for_64bits = true; + /* Register global variables with the garbage collector. */ arm_add_gc_roots (); } diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 1e2f542e806..eabc6f8e03b 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -296,6 +296,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void); #define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \ || (TARGET_THUMB2 && arm_arch_thumb_hwdiv)) +/* Should NEON be used for 64-bits bitops. */ +#define TARGET_PREFER_NEON_64BITS (prefer_neon_for_64bits) + /* True iff the full BPABI is being used. If TARGET_BPABI is true, then TARGET_AAPCS_BASED must be true -- but the converse does not hold. TARGET_BPABI implies the use of the BPABI runtime library, @@ -447,6 +450,10 @@ extern int arm_arch_arm_hwdiv; /* Nonzero if chip supports integer division instruction in Thumb mode. */ extern int arm_arch_thumb_hwdiv; +/* Nonzero if we should use Neon to handle 64-bits operations rather + than core registers. */ +extern int prefer_neon_for_64bits; + #ifndef TARGET_DEFAULT #define TARGET_DEFAULT (MASK_APCS_FRAME) #endif diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index bd3ba19b44e..85f9609dfd3 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -196,7 +196,7 @@ ; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without ; arm_arch6. This attribute is used to compute attribute "enabled", ; use type "any" to enable an alternative in all cases. -(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,onlya8,neon_onlya8,nota8,neon_nota8" +(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,neon_for_64bits,avoid_neon_for_64bits" (const_string "any")) (define_attr "arch_enabled" "no,yes" @@ -231,22 +231,14 @@ (match_test "TARGET_32BIT && !arm_arch6")) (const_string "yes") - (and (eq_attr "arch" "onlya8") - (eq_attr "tune" "cortexa8")) + (and (eq_attr "arch" "avoid_neon_for_64bits") + (match_test "TARGET_NEON") + (not (match_test "TARGET_PREFER_NEON_64BITS"))) (const_string "yes") - (and (eq_attr "arch" "neon_onlya8") - (eq_attr "tune" "cortexa8") - (match_test "TARGET_NEON")) - (const_string "yes") - - (and (eq_attr "arch" "nota8") - (not (eq_attr "tune" "cortexa8"))) - (const_string "yes") - - (and (eq_attr "arch" "neon_nota8") - (not (eq_attr "tune" "cortexa8")) - (match_test "TARGET_NEON")) + (and (eq_attr "arch" "neon_for_64bits") + (match_test "TARGET_NEON") + (match_test "TARGET_PREFER_NEON_64BITS")) (const_string "yes")] (const_string "no"))) @@ -4439,7 +4431,7 @@ [(set_attr "length" "*,8,8,*") (set_attr "predicable" "no,yes,yes,no") (set_attr "neon_type" "neon_int_1,*,*,neon_int_1") - (set_attr "arch" "neon_nota8,*,*,neon_onlya8")] + (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")] ) (define_expand "one_cmplsi2" diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt index 934aa35775e..bcd1ef0a668 100644 --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -267,3 +267,7 @@ that may trigger Cortex-M3 errata. munaligned-access Target Report Var(unaligned_access) Init(2) Enable unaligned word and halfword accesses to packed data. + +mneon-for-64bits +Target Report RejectNegative Var(use_neon_for_64bits) Init(0) +Use Neon to perform 64-bits operations rather than core registers. diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index c3ee904b49c..12d5f2dc00a 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -618,7 +618,7 @@ [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*") (set_attr "conds" "*,clob,clob,*,clob,clob,clob") (set_attr "length" "*,8,8,*,8,8,8") - (set_attr "arch" "nota8,*,*,onlya8,*,*,*")] + (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")] ) (define_insn "*sub<mode>3_neon" @@ -655,7 +655,7 @@ [(set_attr "neon_type" "neon_int_2,*,*,*,neon_int_2") (set_attr "conds" "*,clob,clob,clob,*") (set_attr "length" "*,8,8,8,*") - (set_attr "arch" "nota8,*,*,*,onlya8")] + (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")] ) (define_insn "*mul<mode>3_neon" @@ -817,7 +817,7 @@ } [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1") (set_attr "length" "*,*,8,8,*,*") - (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")] + (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")] ) ;; The concrete forms of the Neon immediate-logic instructions are vbic and @@ -862,7 +862,7 @@ } [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1") (set_attr "length" "*,*,8,8,*,*") - (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")] + (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")] ) (define_insn "orn<mode>3_neon" @@ -958,7 +958,7 @@ veor\t%P0, %P1, %P2" [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1") (set_attr "length" "*,8,8,*") - (set_attr "arch" "nota8,*,*,onlya8")] + (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")] ) (define_insn "one_cmpl<mode>2" @@ -1280,7 +1280,7 @@ } DONE; }" - [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8") + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") (set_attr "opt" "*,*,speed,speed,*,*")] ) @@ -1381,7 +1381,7 @@ DONE; }" - [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8") + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") (set_attr "opt" "*,*,speed,speed,*,*")] ) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 90e9229a1a1..f7ca1b2f0e5 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -505,7 +505,8 @@ Objective-C and Objective-C++ Dialects}. -mtp=@var{name} -mtls-dialect=@var{dialect} @gol -mword-relocations @gol -mfix-cortex-m3-ldrd @gol --munaligned-access} +-munaligned-access @gol +-mneon-for-64bits} @emph{AVR Options} @gccoptlist{-mmcu=@var{mcu} -maccumulate-args -mbranch-cost=@var{cost} @gol @@ -11103,6 +11104,11 @@ setting of this option. If unaligned access is enabled then the preprocessor symbol @code{__ARM_FEATURE_UNALIGNED} will also be defined. +@item -mneon-for-64bits +@opindex mneon-for-64bits +Enables using Neon to handle scalar 64-bits operations. This is +disabled by default since the cost of moving data from core registers +to Neon is high. @end table @node AVR Options diff --git a/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c b/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c new file mode 100644 index 00000000000..a2a4103b9a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c @@ -0,0 +1,54 @@ +/* Check that Neon is *not* used by default to handle 64-bits scalar + operations. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +typedef long long i64; +typedef unsigned long long u64; +typedef unsigned int u32; +typedef int i32; + +/* Unary operators */ +#define UNARY_OP(name, op) \ + void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; } + +/* Binary operators */ +#define BINARY_OP(name, op) \ + void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; } + +/* Unsigned shift */ +#define SHIFT_U(name, op, amount) \ + void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; } + +/* Signed shift */ +#define SHIFT_S(name, op, amount) \ + void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; } + +UNARY_OP(not, ~) + +BINARY_OP(add, +) +BINARY_OP(sub, -) +BINARY_OP(and, &) +BINARY_OP(or, |) +BINARY_OP(xor, ^) + +SHIFT_U(right1, >>, 1) +SHIFT_U(right2, >>, 2) +SHIFT_U(right5, >>, 5) +SHIFT_U(rightn, >>, c) + +SHIFT_S(right1, >>, 1) +SHIFT_S(right2, >>, 2) +SHIFT_S(right5, >>, 5) +SHIFT_S(rightn, >>, c) + +/* { dg-final {scan-assembler-times "vmvn" 0} } */ +/* { dg-final {scan-assembler-times "vadd" 0} } */ +/* { dg-final {scan-assembler-times "vsub" 0} } */ +/* { dg-final {scan-assembler-times "vand" 0} } */ +/* { dg-final {scan-assembler-times "vorr" 0} } */ +/* { dg-final {scan-assembler-times "veor" 0} } */ +/* { dg-final {scan-assembler-times "vshr" 0} } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c b/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c new file mode 100644 index 00000000000..035bfb77a37 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c @@ -0,0 +1,57 @@ +/* Check that Neon is used to handle 64-bits scalar operations. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2 -mneon-for-64bits" } */ +/* { dg-add-options arm_neon } */ + +typedef long long i64; +typedef unsigned long long u64; +typedef unsigned int u32; +typedef int i32; + +/* Unary operators */ +#define UNARY_OP(name, op) \ + void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; } + +/* Binary operators */ +#define BINARY_OP(name, op) \ + void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; } + +/* Unsigned shift */ +#define SHIFT_U(name, op, amount) \ + void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; } + +/* Signed shift */ +#define SHIFT_S(name, op, amount) \ + void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; } + +UNARY_OP(not, ~) + +BINARY_OP(add, +) +BINARY_OP(sub, -) +BINARY_OP(and, &) +BINARY_OP(or, |) +BINARY_OP(xor, ^) + +SHIFT_U(right1, >>, 1) +SHIFT_U(right2, >>, 2) +SHIFT_U(right5, >>, 5) +SHIFT_U(rightn, >>, c) + +SHIFT_S(right1, >>, 1) +SHIFT_S(right2, >>, 2) +SHIFT_S(right5, >>, 5) +SHIFT_S(rightn, >>, c) + +/* { dg-final {scan-assembler-times "vmvn" 1} } */ +/* Two vadd: 1 in unary_not, 1 in binary_add */ +/* { dg-final {scan-assembler-times "vadd" 2} } */ +/* { dg-final {scan-assembler-times "vsub" 1} } */ +/* { dg-final {scan-assembler-times "vand" 1} } */ +/* { dg-final {scan-assembler-times "vorr" 1} } */ +/* { dg-final {scan-assembler-times "veor" 1} } */ +/* 6 vshr for right shifts by constant, and variable right shift uses + vshl with a negative amount in register. */ +/* { dg-final {scan-assembler-times "vshr" 6} } */ +/* { dg-final {scan-assembler-times "vshl" 2} } */ |