diff options
author | Christophe Lyon <christophe.lyon@linaro.org> | 2013-04-03 15:41:22 +0200 |
---|---|---|
committer | Christophe Lyon <christophe.lyon@linaro.org> | 2013-04-03 15:41:22 +0200 |
commit | 9eca2b532ac8521a21b41de48b6a7544ca3b2fe5 (patch) | |
tree | c74c36b6e07abb3bae7e26a1f0b90a72ced1cef5 | |
parent | de9b5dfe5c0c30badb223e7a63fdc888d0fc0290 (diff) | |
parent | c6c3148be9edfa669239d1f308198f64f75571e6 (diff) |
Partial backport Vectorizer Cost Model from mainline r195977
(partial because some generic hooks do not exist in the 4.7 branch)
-rw-r--r-- | ChangeLog.linaro | 15 | ||||
-rw-r--r-- | gcc/config/arm/arm-protos.h | 23 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 113 |
3 files changed, 141 insertions, 10 deletions
diff --git a/ChangeLog.linaro b/ChangeLog.linaro index 3faf18eac0a..06bb6f8a9e2 100644 --- a/ChangeLog.linaro +++ b/ChangeLog.linaro @@ -1,3 +1,18 @@ +2013-04-03 Christophe Lyon <christophe.lyon@linaro.org> + + Partial backport from mainline r195977: + 2013-02-12 Christophe Lyon <christophe.lyon@linaro.org> + + * config/arm/arm-protos.h (struct cpu_vec_costs): New struct type. + (struct tune_params): Add vec_costs field. + * config/arm/arm.c (arm_builtin_vectorization_cost): New function. + (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Define. + (arm_default_vec_cost): New struct of type cpu_vec_costs. + (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune) + (arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune) + (arm_cortex_a15_tune, arm_cortex_a5_tune, arm_cortex_a9_tune) + (arm_v6m_tune, arm_fa726te_tune): Define new vec_costs field. + 2013-04-02 Christophe Lyon <christophe.lyon@linaro.org> Backport from mainline r196876: diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 297f876aa79..039ecfcb906 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -225,6 +225,27 @@ extern const char *arm_mangle_type (const_tree); extern void arm_order_regs_for_local_alloc (void); +/* Vectorizer cost model implementation. */ +struct cpu_vec_costs { + const int scalar_stmt_cost; /* Cost of any scalar operation, excluding + load and store. */ + const int scalar_load_cost; /* Cost of scalar load. */ + const int scalar_store_cost; /* Cost of scalar store. */ + const int vec_stmt_cost; /* Cost of any vector operation, excluding + load, store, vector-to-scalar and + scalar-to-vector operation. */ + const int vec_to_scalar_cost; /* Cost of vect-to-scalar operation. */ + const int scalar_to_vec_cost; /* Cost of scalar-to-vector operation. */ + const int vec_align_load_cost; /* Cost of aligned vector load. */ + const int vec_unalign_load_cost; /* Cost of unaligned vector load. */ + const int vec_unalign_store_cost; /* Cost of unaligned vector load. */ + const int vec_store_cost; /* Cost of vector store. */ + const int cond_taken_branch_cost; /* Cost of taken branch for vectorizer + cost model. */ + const int cond_not_taken_branch_cost;/* Cost of not taken branch for + vectorizer cost model. */ +}; + #ifdef RTX_CODE /* This needs to be here because we need RTX_CODE and similar. */ @@ -243,6 +264,8 @@ struct tune_params int (*branch_cost) (bool, bool); /* Prefer Neon for 64-bit bitops. */ bool prefer_neon_for_64bits; + /* Vectorizer costs. */ + const struct cpu_vec_costs* vec_costs; }; extern const struct tune_params *current_tune; diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 2e75610f0e7..32e0dc97bd5 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -274,6 +274,11 @@ static int arm_cortex_a5_branch_cost (bool, bool); static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode, const unsigned char *sel); + +static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + tree vectype, + int misalign ATTRIBUTE_UNUSED); + /* Table of machine attributes. */ static const struct attribute_spec arm_attribute_table[] = @@ -627,6 +632,10 @@ static const struct attribute_spec arm_attribute_table[] = #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ arm_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + arm_builtin_vectorization_cost + struct gcc_target targetm = TARGET_INITIALIZER; /* Obstack for minipool constant handling. */ @@ -877,6 +886,23 @@ struct processors l1_size, \ l1_line_size +/* arm generic vectorizer costs. */ +static const +struct cpu_vec_costs arm_default_vec_cost = { + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 1, /* vec_unalign_load_cost. */ + 1, /* vec_unalign_store_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + const struct tune_params arm_slowmul_tune = { arm_slowmul_rtx_costs, @@ -886,8 +912,9 @@ const struct tune_params arm_slowmul_tune = ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer Neon for + false, /* Prefer Neon for 64-bits bitops. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; const struct tune_params arm_fastmul_tune = @@ -899,8 +926,9 @@ const struct tune_params arm_fastmul_tune = ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer Neon for + false, /* Prefer Neon for 64-bits bitops. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; /* StrongARM has early execution of branches, so a sequence that is worth @@ -915,8 +943,9 @@ const struct tune_params arm_strongarm_tune = ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer Neon for + false, /* Prefer Neon for 64-bits bitops. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; const struct tune_params arm_xscale_tune = @@ -928,8 +957,9 @@ const struct tune_params arm_xscale_tune = ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer Neon for + false, /* Prefer Neon for 64-bits bitops. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; const struct tune_params arm_9e_tune = @@ -941,8 +971,9 @@ const struct tune_params arm_9e_tune = ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer Neon for + false, /* Prefer Neon for 64-bits bitops. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; const struct tune_params arm_v6t2_tune = @@ -954,8 +985,9 @@ const struct tune_params arm_v6t2_tune = ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer Neon for + false, /* Prefer Neon for 64-bits bitops. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; /* Generic Cortex tuning. Use more specific tunings if appropriate. */ @@ -968,8 +1000,9 @@ const struct tune_params arm_cortex_tune = ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer Neon for + false, /* Prefer Neon for 64-bits bitops. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; /* Branches can be dual-issued on Cortex-A5, so conditional execution is @@ -984,8 +1017,9 @@ const struct tune_params arm_cortex_a5_tune = ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ arm_cortex_a5_branch_cost, - false /* Prefer Neon for + false, /* Prefer Neon for 64-bits bitops. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; const struct tune_params arm_cortex_a9_tune = @@ -997,8 +1031,9 @@ const struct tune_params arm_cortex_a9_tune = ARM_PREFETCH_BENEFICIAL(4,32,32), false, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer Neon for + false, /* Prefer Neon for 64-bits bitops. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; const struct tune_params arm_fa726te_tune = @@ -1010,8 +1045,9 @@ const struct tune_params arm_fa726te_tune = ARM_PREFETCH_NOT_BENEFICIAL, true, /* Prefer constant pool. */ arm_default_branch_cost, - false /* Prefer Neon for + false, /* Prefer Neon for 64-bits bitops. */ + &arm_default_vec_cost, /* Vectorizer costs. */ }; @@ -8704,6 +8740,63 @@ arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass, } +/* Vectorizer cost model implementation. */ + +/* Implement targetm.vectorize.builtin_vectorization_cost. */ +static int +arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + tree vectype, + int misalign ATTRIBUTE_UNUSED) +{ + unsigned elements; + + switch (type_of_cost) + { + case scalar_stmt: + return current_tune->vec_costs->scalar_stmt_cost; + + case scalar_load: + return current_tune->vec_costs->scalar_load_cost; + + case scalar_store: + return current_tune->vec_costs->scalar_store_cost; + + case vector_stmt: + return current_tune->vec_costs->vec_stmt_cost; + + case vector_load: + return current_tune->vec_costs->vec_align_load_cost; + + case vector_store: + return current_tune->vec_costs->vec_store_cost; + + case vec_to_scalar: + return current_tune->vec_costs->vec_to_scalar_cost; + + case scalar_to_vec: + return current_tune->vec_costs->scalar_to_vec_cost; + + case unaligned_load: + return current_tune->vec_costs->vec_unalign_load_cost; + + case unaligned_store: + return current_tune->vec_costs->vec_unalign_store_cost; + + case cond_branch_taken: + return current_tune->vec_costs->cond_taken_branch_cost; + + case cond_branch_not_taken: + return current_tune->vec_costs->cond_not_taken_branch_cost; + + case vec_perm: + case vec_promote_demote: + return current_tune->vec_costs->vec_stmt_cost; + + default: + gcc_unreachable (); + } +} + /* Return true if and only if this insn can dual-issue only as older. */ static bool cortexa7_older_only (rtx insn) |