aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristophe Lyon <christophe.lyon@linaro.org>2013-04-03 15:41:22 +0200
committerChristophe Lyon <christophe.lyon@linaro.org>2013-04-03 15:41:22 +0200
commit9eca2b532ac8521a21b41de48b6a7544ca3b2fe5 (patch)
treec74c36b6e07abb3bae7e26a1f0b90a72ced1cef5
parentde9b5dfe5c0c30badb223e7a63fdc888d0fc0290 (diff)
parentc6c3148be9edfa669239d1f308198f64f75571e6 (diff)
Partial backport Vectorizer Cost Model from mainline r195977
(partial because some generic hooks do not exist in the 4.7 branch)
-rw-r--r--ChangeLog.linaro15
-rw-r--r--gcc/config/arm/arm-protos.h23
-rw-r--r--gcc/config/arm/arm.c113
3 files changed, 141 insertions, 10 deletions
diff --git a/ChangeLog.linaro b/ChangeLog.linaro
index 3faf18eac0a..06bb6f8a9e2 100644
--- a/ChangeLog.linaro
+++ b/ChangeLog.linaro
@@ -1,3 +1,18 @@
+2013-04-03 Christophe Lyon <christophe.lyon@linaro.org>
+
+ Partial backport from mainline r195977:
+ 2013-02-12 Christophe Lyon <christophe.lyon@linaro.org>
+
+ * config/arm/arm-protos.h (struct cpu_vec_costs): New struct type.
+ (struct tune_params): Add vec_costs field.
+ * config/arm/arm.c (arm_builtin_vectorization_cost): New function.
+ (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Define.
+ (arm_default_vec_cost): New struct of type cpu_vec_costs.
+ (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune)
+ (arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune)
+ (arm_cortex_a15_tune, arm_cortex_a5_tune, arm_cortex_a9_tune)
+ (arm_v6m_tune, arm_fa726te_tune): Define new vec_costs field.
+
2013-04-02 Christophe Lyon <christophe.lyon@linaro.org>
Backport from mainline r196876:
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 297f876aa79..039ecfcb906 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -225,6 +225,27 @@ extern const char *arm_mangle_type (const_tree);
extern void arm_order_regs_for_local_alloc (void);
+/* Vectorizer cost model implementation. */
+struct cpu_vec_costs {
+ const int scalar_stmt_cost; /* Cost of any scalar operation, excluding
+ load and store. */
+ const int scalar_load_cost; /* Cost of scalar load. */
+ const int scalar_store_cost; /* Cost of scalar store. */
+ const int vec_stmt_cost; /* Cost of any vector operation, excluding
+ load, store, vector-to-scalar and
+ scalar-to-vector operation. */
+ const int vec_to_scalar_cost; /* Cost of vect-to-scalar operation. */
+ const int scalar_to_vec_cost; /* Cost of scalar-to-vector operation. */
+ const int vec_align_load_cost; /* Cost of aligned vector load. */
+ const int vec_unalign_load_cost; /* Cost of unaligned vector load. */
+ const int vec_unalign_store_cost; /* Cost of unaligned vector load. */
+ const int vec_store_cost; /* Cost of vector store. */
+ const int cond_taken_branch_cost; /* Cost of taken branch for vectorizer
+ cost model. */
+ const int cond_not_taken_branch_cost;/* Cost of not taken branch for
+ vectorizer cost model. */
+};
+
#ifdef RTX_CODE
/* This needs to be here because we need RTX_CODE and similar. */
@@ -243,6 +264,8 @@ struct tune_params
int (*branch_cost) (bool, bool);
/* Prefer Neon for 64-bit bitops. */
bool prefer_neon_for_64bits;
+ /* Vectorizer costs. */
+ const struct cpu_vec_costs* vec_costs;
};
extern const struct tune_params *current_tune;
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 2e75610f0e7..32e0dc97bd5 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -274,6 +274,11 @@ static int arm_cortex_a5_branch_cost (bool, bool);
static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
const unsigned char *sel);
+
+static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype,
+ int misalign ATTRIBUTE_UNUSED);
+
/* Table of machine attributes. */
static const struct attribute_spec arm_attribute_table[] =
@@ -627,6 +632,10 @@ static const struct attribute_spec arm_attribute_table[] =
#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
arm_vectorize_vec_perm_const_ok
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+ arm_builtin_vectorization_cost
+
struct gcc_target targetm = TARGET_INITIALIZER;
/* Obstack for minipool constant handling. */
@@ -877,6 +886,23 @@ struct processors
l1_size, \
l1_line_size
+/* arm generic vectorizer costs. */
+static const
+struct cpu_vec_costs arm_default_vec_cost = {
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 1, /* vec_unalign_load_cost. */
+ 1, /* vec_unalign_store_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
const struct tune_params arm_slowmul_tune =
{
arm_slowmul_rtx_costs,
@@ -886,8 +912,9 @@ const struct tune_params arm_slowmul_tune =
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
arm_default_branch_cost,
- false /* Prefer Neon for
+ false, /* Prefer Neon for
64-bits bitops. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_fastmul_tune =
@@ -899,8 +926,9 @@ const struct tune_params arm_fastmul_tune =
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
arm_default_branch_cost,
- false /* Prefer Neon for
+ false, /* Prefer Neon for
64-bits bitops. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
/* StrongARM has early execution of branches, so a sequence that is worth
@@ -915,8 +943,9 @@ const struct tune_params arm_strongarm_tune =
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
arm_default_branch_cost,
- false /* Prefer Neon for
+ false, /* Prefer Neon for
64-bits bitops. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_xscale_tune =
@@ -928,8 +957,9 @@ const struct tune_params arm_xscale_tune =
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
arm_default_branch_cost,
- false /* Prefer Neon for
+ false, /* Prefer Neon for
64-bits bitops. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_9e_tune =
@@ -941,8 +971,9 @@ const struct tune_params arm_9e_tune =
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
arm_default_branch_cost,
- false /* Prefer Neon for
+ false, /* Prefer Neon for
64-bits bitops. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_v6t2_tune =
@@ -954,8 +985,9 @@ const struct tune_params arm_v6t2_tune =
ARM_PREFETCH_NOT_BENEFICIAL,
false, /* Prefer constant pool. */
arm_default_branch_cost,
- false /* Prefer Neon for
+ false, /* Prefer Neon for
64-bits bitops. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
@@ -968,8 +1000,9 @@ const struct tune_params arm_cortex_tune =
ARM_PREFETCH_NOT_BENEFICIAL,
false, /* Prefer constant pool. */
arm_default_branch_cost,
- false /* Prefer Neon for
+ false, /* Prefer Neon for
64-bits bitops. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
@@ -984,8 +1017,9 @@ const struct tune_params arm_cortex_a5_tune =
ARM_PREFETCH_NOT_BENEFICIAL,
false, /* Prefer constant pool. */
arm_cortex_a5_branch_cost,
- false /* Prefer Neon for
+ false, /* Prefer Neon for
64-bits bitops. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_cortex_a9_tune =
@@ -997,8 +1031,9 @@ const struct tune_params arm_cortex_a9_tune =
ARM_PREFETCH_BENEFICIAL(4,32,32),
false, /* Prefer constant pool. */
arm_default_branch_cost,
- false /* Prefer Neon for
+ false, /* Prefer Neon for
64-bits bitops. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_fa726te_tune =
@@ -1010,8 +1045,9 @@ const struct tune_params arm_fa726te_tune =
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
arm_default_branch_cost,
- false /* Prefer Neon for
+ false, /* Prefer Neon for
64-bits bitops. */
+ &arm_default_vec_cost, /* Vectorizer costs. */
};
@@ -8704,6 +8740,63 @@ arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
}
+/* Vectorizer cost model implementation. */
+
+/* Implement targetm.vectorize.builtin_vectorization_cost. */
+static int
+arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype,
+ int misalign ATTRIBUTE_UNUSED)
+{
+ unsigned elements;
+
+ switch (type_of_cost)
+ {
+ case scalar_stmt:
+ return current_tune->vec_costs->scalar_stmt_cost;
+
+ case scalar_load:
+ return current_tune->vec_costs->scalar_load_cost;
+
+ case scalar_store:
+ return current_tune->vec_costs->scalar_store_cost;
+
+ case vector_stmt:
+ return current_tune->vec_costs->vec_stmt_cost;
+
+ case vector_load:
+ return current_tune->vec_costs->vec_align_load_cost;
+
+ case vector_store:
+ return current_tune->vec_costs->vec_store_cost;
+
+ case vec_to_scalar:
+ return current_tune->vec_costs->vec_to_scalar_cost;
+
+ case scalar_to_vec:
+ return current_tune->vec_costs->scalar_to_vec_cost;
+
+ case unaligned_load:
+ return current_tune->vec_costs->vec_unalign_load_cost;
+
+ case unaligned_store:
+ return current_tune->vec_costs->vec_unalign_store_cost;
+
+ case cond_branch_taken:
+ return current_tune->vec_costs->cond_taken_branch_cost;
+
+ case cond_branch_not_taken:
+ return current_tune->vec_costs->cond_not_taken_branch_cost;
+
+ case vec_perm:
+ case vec_promote_demote:
+ return current_tune->vec_costs->vec_stmt_cost;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Return true if and only if this insn can dual-issue only as older. */
static bool
cortexa7_older_only (rtx insn)