aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjye2 <jye2@138bc75d-0d04-0410-961f-82ee72b054a4>2011-09-21 08:49:13 +0000
committerjye2 <jye2@138bc75d-0d04-0410-961f-82ee72b054a4>2011-09-21 08:49:13 +0000
commited33297afdb5c5277fc4fffd2ffcae01a15945db (patch)
treed2cda8051ecdc853a9518eefe4990d8099254c83
parent5834927983d4572f7552da5a35e384e24a0f6bf6 (diff)
2011-09-21 Jiangning Liu <jiangning.liu@arm.com>
Tune loop unrolling for cortex-m * config/arm/arm-cores.def (cortex-m0): Change to new tune cortex_v6m. (cortex-m1): Likewise. * config/arm/arm-protos.h (max_unroll_times): New. * config/arm/arm.c (arm_default_unroll_times): New. (arm_cortex_m_unroll_times): New. (arm_cortex_v6m_tune): New. (arm_slowmul_tune): Add max_unroll_times function pointer. (arm_fastmul_tune, arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune, arm_cortex_a9_tune, arm_cortex_v7m_tune, arm_cortex_v6m_tune, arm_fa726te_tune): Likewise. (arm_option_override): Enable loop unroll for all all M class Cores, if optimization level is >= 1. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/ARM/embedded-4_6-branch@179039 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog.arm18
-rw-r--r--gcc/config/arm/arm-cores.def4
-rw-r--r--gcc/config/arm/arm-protos.h1
-rw-r--r--gcc/config/arm/arm.c69
4 files changed, 81 insertions, 11 deletions
diff --git a/gcc/ChangeLog.arm b/gcc/ChangeLog.arm
index e6d59e71513..82f9417f431 100644
--- a/gcc/ChangeLog.arm
+++ b/gcc/ChangeLog.arm
@@ -1,3 +1,21 @@
+2011-09-21 Jiangning Liu <jiangning.liu@arm.com>
+
+ Tune loop unrolling for cortex-m
+ * config/arm/arm-cores.def (cortex-m0): Change to new tune
+ cortex_v6m.
+ (cortex-m1): Likewise.
+ * config/arm/arm-protos.h (max_unroll_times): New.
+ * config/arm/arm.c (arm_default_unroll_times): New.
+ (arm_cortex_m_unroll_times): New.
+ (arm_cortex_v6m_tune): New.
+ (arm_slowmul_tune): Add max_unroll_times function pointer.
+ (arm_fastmul_tune, arm_xscale_tune, arm_9e_tune,
+ arm_v6t2_tune, arm_cortex_tune, arm_cortex_a9_tune,
+ arm_cortex_v7m_tune, arm_cortex_v6m_tune,
+ arm_fa726te_tune): Likewise.
+ (arm_option_override): Enable loop unroll for all all M class
+ Cores, if optimization level is >= 1.
+
2011-09-20 Jiangning Liu <jiangning.liu@arm.com>
Tune branch cost for armv7-m
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index f24cf9a7bc2..2a51ab87c64 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -133,5 +133,5 @@ ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex)
ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex_v7m)
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex_v7m)
-ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, cortex)
-ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, cortex)
+ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, cortex_v6m)
+ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, cortex_v6m)
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index e5ec1a1bfa1..80b8960bf9c 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -226,6 +226,7 @@ struct tune_params
int l1_cache_line_size;
bool prefer_constant_pool;
int (*branch_cost) (bool, bool);
+ int (*max_unroll_times) (void);
};
extern const struct tune_params *current_tune;
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 0fc3d67cea7..1020ddc80fe 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -250,6 +250,8 @@ static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
bool is_packed);
static void arm_conditional_register_usage (void);
static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
+static int arm_default_unroll_times(void);
+static int arm_cortex_m_unroll_times(void);
static int arm_default_branch_cost (bool, bool);
static int arm_cortex_v7m_branch_cost (bool, bool);
@@ -858,7 +860,8 @@ const struct tune_params arm_slowmul_tune =
3, /* Constant limit. */
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ arm_default_unroll_times
};
const struct tune_params arm_fastmul_tune =
@@ -868,7 +871,8 @@ const struct tune_params arm_fastmul_tune =
1, /* Constant limit. */
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ arm_default_unroll_times
};
const struct tune_params arm_xscale_tune =
@@ -878,7 +882,8 @@ const struct tune_params arm_xscale_tune =
2, /* Constant limit. */
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ arm_default_unroll_times
};
const struct tune_params arm_9e_tune =
@@ -888,7 +893,8 @@ const struct tune_params arm_9e_tune =
1, /* Constant limit. */
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ arm_default_unroll_times
};
const struct tune_params arm_v6t2_tune =
@@ -898,7 +904,8 @@ const struct tune_params arm_v6t2_tune =
1, /* Constant limit. */
ARM_PREFETCH_NOT_BENEFICIAL,
false, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ arm_default_unroll_times
};
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
@@ -909,7 +916,8 @@ const struct tune_params arm_cortex_tune =
1, /* Constant limit. */
ARM_PREFETCH_NOT_BENEFICIAL,
false, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ arm_default_unroll_times
};
const struct tune_params arm_cortex_a9_tune =
@@ -919,7 +927,8 @@ const struct tune_params arm_cortex_a9_tune =
1, /* Constant limit. */
ARM_PREFETCH_BENEFICIAL(4,32,32),
false, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ arm_default_unroll_times
};
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
@@ -930,7 +939,20 @@ const struct tune_params arm_cortex_v7m_tune =
1, /* Constant limit. */
ARM_PREFETCH_NOT_BENEFICIAL,
false, /* Prefer constant pool. */
- arm_cortex_v7m_branch_cost
+ arm_cortex_v7m_branch_cost,
+ arm_cortex_m_unroll_times
+};
+
+/* Generic Cortex tuning. Use more specific tunings if appropriate. */
+const struct tune_params arm_cortex_v6m_tune =
+{
+ arm_9e_rtx_costs,
+ NULL,
+ 1, /* Constant limit. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ false, /* Prefer constant pool. */
+ arm_default_branch_cost,
+ arm_cortex_m_unroll_times
};
const struct tune_params arm_fa726te_tune =
@@ -940,7 +962,8 @@ const struct tune_params arm_fa726te_tune =
1, /* Constant limit. */
ARM_PREFETCH_NOT_BENEFICIAL,
true, /* Prefer constant pool. */
- arm_default_branch_cost
+ arm_default_branch_cost,
+ arm_default_unroll_times
};
@@ -2240,6 +2263,22 @@ arm_option_override (void)
&& current_tune->num_prefetch_slots > 0)
flag_prefetch_loop_arrays = 1;
+ /* Enable loop unroll for all all M class cores. */
+ if (optimize >=1
+ && (arm_selected_cpu->core == cortexm0 ||
+ arm_selected_cpu->core == cortexm1 ||
+ arm_selected_cpu->core == cortexm3 ||
+ arm_selected_cpu->core == cortexm4))
+ {
+ flag_unroll_loops = 1;
+
+ /* Set default loop unroll times. */
+ maybe_set_param_value (PARAM_MAX_UNROLL_TIMES,
+ current_tune->max_unroll_times(),
+ global_options.x_param_values,
+ global_options_set.x_param_values);
+ }
+
/* Set up parameters to be used in prefetching algorithm. Do not override the
defaults unless we are tuning for a core we have researched values for. */
if (current_tune->num_prefetch_slots > 0)
@@ -8511,6 +8550,18 @@ arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
}
static int
+arm_default_unroll_times (void)
+{
+ return PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
+}
+
+static int
+arm_cortex_m_unroll_times (void)
+{
+ return 2;
+}
+
+static int
arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
{
if (TARGET_32BIT)