aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIra Rosen <irar@il.ibm.com>2010-08-08 10:28:29 +0000
committerIra Rosen <irar@il.ibm.com>2010-08-08 10:28:29 +0000
commit857f6c13c818c4483d3153cbe1898e087020bfac (patch)
tree6da3a025cf6cb75533df8fc4f177053947fd4f49
parentc2e32a5627602a31e7dd8b87e61462ec69f092cc (diff)
Add inner/outer loop vectorization versioning based on "double
supported" builtin. git-svn-id: https://gcc.gnu.org/svn/gcc/branches/st/cli-be-vect@162996 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/config/cil32/cil-builtins.def6
-rw-r--r--gcc/config/cil32/cil32.c25
-rw-r--r--gcc/config/cil32/cil32.opt4
-rw-r--r--gcc/target-def.h4
-rw-r--r--gcc/target.h3
-rw-r--r--gcc/tree-vect-analyze.c20
-rw-r--r--gcc/tree-vect-transform.c130
-rw-r--r--gcc/tree-vectorizer.c2
-rw-r--r--gcc/tree-vectorizer.h3
9 files changed, 153 insertions, 44 deletions
diff --git a/gcc/config/cil32/cil-builtins.def b/gcc/config/cil32/cil-builtins.def
index 0c92cd65d63..e0442ff7d09 100644
--- a/gcc/config/cil32/cil-builtins.def
+++ b/gcc/config/cil32/cil-builtins.def
@@ -1480,6 +1480,12 @@ DEF_CILBUILTIN(GEN_VQI_PACK, "[genvec_support]genvec_support.VQI::VQI_pack", \
VHI_type_node, VHI_type_node)
+/* inner or outer loop? */
+DEF_CILBUILTIN(GCC_DOUBLE_SUPPORTED, "[genvec_support]genvec_support.VSI::VSI_double_supported", \
+ ATTR_CONST_NOTHROW_LIST, \
+ boolean_type_node, \
+ 0)
+
/* V DF */
DEF_CILBUILTIN(GEN_VDF_CTOR, "[Xxxx.Simd]Xxxx.Simd.VecGenDF::.ctor", \
ATTR_CONST_NOTHROW_LIST, \
diff --git a/gcc/config/cil32/cil32.c b/gcc/config/cil32/cil32.c
index 651e31e3fef..5e8af763140 100644
--- a/gcc/config/cil32/cil32.c
+++ b/gcc/config/cil32/cil32.c
@@ -101,6 +101,7 @@ static tree cil32_builtin_conversion (enum tree_code, tree);
static tree cil32_builtin_interleave_high_low (enum tree_code, tree);
static tree cil32_builtin_extract_even_odd (enum tree_code, tree);
static tree cil32_builtin_pack (enum tree_code, tree);
+static tree cil32_builtin_double_supported (void);
/* Initialize the GCC target structure. */
#undef TARGET_ATTRIBUTE_TABLE
@@ -109,6 +110,9 @@ static tree cil32_builtin_pack (enum tree_code, tree);
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS cil_init_builtins
+#undef TARGET_VECTORIZE_BUILTIN_DOUBLE_SUPPORTED
+#define TARGET_VECTORIZE_BUILTIN_DOUBLE_SUPPORTED \
+ cil32_builtin_double_supported
#undef TARGET_ASM_FILE_START
#define TARGET_ASM_FILE_START cil32_file_start
@@ -724,8 +728,14 @@ static tree cil32_builtin_realign_load (tree type)
}
else
{
- if (element_size == 4)
- return cil32_builtins[CIL32_GEN_VSF_REALIGN_LOAD];
+ switch (element_size)
+ {
+ case 4:
+ return cil32_builtins[CIL32_GEN_VSF_REALIGN_LOAD];
+
+ case 8:
+ return cil32_builtins[CIL32_GEN_VDF_REALIGN_LOAD];
+ }
}
return NULL_TREE;
@@ -840,6 +850,9 @@ static tree cil32_builtin_build_init_vec (tree type)
{
switch (element_size)
{
+ case 8:
+ return cil32_builtins[CIL32_GCC_BUILD_INIT_VEC_VDI];
+
case 4:
return cil32_builtins[CIL32_GCC_BUILD_INIT_VEC_VSI];
@@ -857,6 +870,9 @@ static tree cil32_builtin_build_init_vec (tree type)
{
switch (element_size)
{
+ case 8:
+ return cil32_builtins[CIL32_GCC_BUILD_INIT_VEC_VDF];
+
case 4:
return cil32_builtins[CIL32_GCC_BUILD_INIT_VEC_VSF];
@@ -1023,4 +1039,9 @@ cil32_builtin_pack (enum tree_code code, tree type)
return NULL_TREE;
}
}
+
+static tree cil32_builtin_double_supported (void)
+{
+ return cil32_builtins[CIL32_GCC_DOUBLE_SUPPORTED];
+}
diff --git a/gcc/config/cil32/cil32.opt b/gcc/config/cil32/cil32.opt
index 82af68f3447..2d49e60d09d 100644
--- a/gcc/config/cil32/cil32.opt
+++ b/gcc/config/cil32/cil32.opt
@@ -99,3 +99,7 @@ Common Report Var(flag_peel_loop_bound_hints) Init(1) Optimization
Generate peel loop bound hints
+mloop-nest-version
+Common Report Var(flag_loop_nest_version) Optimization
+Apply versioning to choose which loop in the loop nest to vectorize
+
diff --git a/gcc/target-def.h b/gcc/target-def.h
index 90eb2ea0d9d..9360f978dca 100644
--- a/gcc/target-def.h
+++ b/gcc/target-def.h
@@ -400,6 +400,7 @@
#define TARGET_VECTORIZE_BUILTIN_BUILD_INIT_VEC 0
#define TARGET_VECTORIZE_BUILTIN_EXTRACT_EVEN_ODD 0
#define TARGET_VECTORIZE_BUILTIN_PACK 0
+#define TARGET_VECTORIZE_BUILTIN_DOUBLE_SUPPORTED 0
#define TARGET_VECTORIZE \
{ \
@@ -427,7 +428,8 @@
TARGET_VECTORIZE_BUILTIN_BUILD_INIT_VEC, \
TARGET_VECTORIZE_BUILTIN_EXTRACT_EVEN_ODD, \
TARGET_VECTORIZE_BUILTIN_INTERLEAVE_HIGH_LOW, \
- TARGET_VECTORIZE_BUILTIN_PACK \
+ TARGET_VECTORIZE_BUILTIN_PACK, \
+ TARGET_VECTORIZE_BUILTIN_DOUBLE_SUPPORTED \
}
#define TARGET_DEFAULT_TARGET_FLAGS 0
diff --git a/gcc/target.h b/gcc/target.h
index 6e40d3e82fe..2378698e915 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -507,6 +507,9 @@ struct gcc_target
tree (* builtin_interleave_high_low) (enum tree_code, tree);
tree (* builtin_pack) (enum tree_code, tree);
+
+ tree (* builtin_double_supported) (void);
+
} vectorize;
/* The initial value of target_flags. */
diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c
index 9fcfc66bda8..3a95bbb27e6 100644
--- a/gcc/tree-vect-analyze.c
+++ b/gcc/tree-vect-analyze.c
@@ -127,8 +127,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
unsigned int nunits;
stmt_vec_info stmt_info;
int i;
- HOST_WIDE_INT dummy;
tree biggest_type = NULL;
+ HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
@@ -154,6 +154,9 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
scalar_type = TREE_TYPE (PHI_RESULT (phi));
+ if (scalar_type == double_type_node)
+ LOOP_VINFO_NEEDS_DOUBLE (loop_vinfo) = true;
+
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "get vectype for scalar type: ");
@@ -245,12 +248,18 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
}
else
{
-
gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
&& !is_pattern_stmt_p (stmt_info));
- scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
- &dummy);
+ scalar_type = vect_get_smallest_scalar_type (stmt, &lhs_size_unit,
+ &rhs_size_unit);
+
+ if (scalar_type == double_type_node
+ || (is_gimple_assign (stmt)
+ && TREE_TYPE (gimple_assign_rhs1 (stmt))
+ == double_type_node))
+ LOOP_VINFO_NEEDS_DOUBLE (loop_vinfo) = true;
+
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "get vectype for scalar type: ");
@@ -3819,6 +3828,9 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo)
/* Set vectype for STMT. */
scalar_type = TREE_TYPE (DR_REF (dr));
+ if (scalar_type == double_type_node)
+ LOOP_VINFO_NEEDS_DOUBLE (loop_vinfo) = true;
+
STMT_VINFO_VECTYPE (stmt_info) =
get_vectype_for_scalar_type (scalar_type);
if (!STMT_VINFO_VECTYPE (stmt_info))
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index c7bd06e1b89..f987a402846 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -8979,14 +8979,15 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
cost model initially. */
static struct loop *
-vect_loop_versioning (loop_vec_info loop_vinfo)
+vect_loop_versioning (loop_vec_info loop_vinfo,
+ gimple_stmt_iterator *cond_exp_gsi, bool inner)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
struct loop *nloop;
tree cond_expr = NULL_TREE;
gimple_seq cond_expr_stmt_list = NULL;
basic_block condition_bb;
- gimple_stmt_iterator gsi, cond_exp_gsi;
+ gimple_stmt_iterator gsi;
basic_block merge_bb;
basic_block new_exit_bb;
edge new_exit_e, e;
@@ -8997,46 +8998,80 @@ vect_loop_versioning (loop_vec_info loop_vinfo)
tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
int min_profitable_iters = 0;
unsigned int th;
+ tree builtin_decl = NULL_TREE;
- if (VEC_length (data_reference_p, LOOP_VINFO_DRS_FOR_ALIGN_CHECKS (loop_vinfo))
- && !LOOP_VINFO_ALIGN_SCHEME (loop_vinfo))
+ if (inner)
{
- vect_create_cond_for_dr_align_checks (loop_vinfo, &cond_expr,
- &cond_expr_stmt_list);
- VEC_free (data_reference_p, heap, LOOP_VINFO_DRS_FOR_ALIGN_CHECKS (loop_vinfo));
+ if (LOOP_VINFO_NEEDS_DOUBLE (loop_vinfo)
+ && targetm.vectorize.builtin_double_supported
+ && (builtin_decl = targetm.vectorize.builtin_double_supported ()))
+ {
+ tree var = create_tmp_var (boolean_type_node, "double");
+ gimple new_stmt = gimple_build_call (builtin_decl, 0);
+ tree tmp_cond_expr;
+
+ add_referenced_var (var);
+ tmp_cond_expr = make_ssa_name (var, new_stmt);
+ gimple_call_set_lhs (new_stmt, tmp_cond_expr);
+ gimple_seq_add_stmt (&cond_expr_stmt_list, new_stmt);
+
+ if (cond_expr)
+ {
+ cond_expr =
+ fold_build2 (TRUTH_AND_EXPR, boolean_type_node, cond_expr, tmp_cond_expr);
+ cond_expr =
+ force_gimple_operand (cond_expr, &gimplify_stmt_list, true, NULL_TREE);
+ gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
+ }
+ else
+ cond_expr = tmp_cond_expr;
+ }
+
+ if (!cond_expr)
+ {
+ cond_expr = create_tmp_var (boolean_type_node, "dummy");
+ }
}
else
{
- /* Get profitability threshold for vectorized loop. */
- min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
+ if (VEC_length (data_reference_p, LOOP_VINFO_DRS_FOR_ALIGN_CHECKS (loop_vinfo))
+ && !LOOP_VINFO_ALIGN_SCHEME (loop_vinfo) && !flag_bases_aligned)
+ {
+ vect_create_cond_for_dr_align_checks (loop_vinfo, &cond_expr,
+ &cond_expr_stmt_list);
+ VEC_free (data_reference_p, heap,
+ LOOP_VINFO_DRS_FOR_ALIGN_CHECKS (loop_vinfo));
+ }
+ else
+ {
+ /* Get profitability threshold for vectorized loop. */
+ min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
- th = conservative_cost_threshold (loop_vinfo,
- min_profitable_iters);
+ th = conservative_cost_threshold (loop_vinfo,
+ min_profitable_iters);
- cond_expr =
- fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
- build_int_cst (TREE_TYPE (scalar_loop_iters), th));
+ cond_expr =
+ fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
+ build_int_cst (TREE_TYPE (scalar_loop_iters), th));
- cond_expr = force_gimple_operand (cond_expr, &cond_expr_stmt_list,
- false, NULL_TREE);
+ cond_expr = force_gimple_operand (cond_expr, &cond_expr_stmt_list,
+ false, NULL_TREE);
- if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
- vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
- &cond_expr_stmt_list);
+ if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
+ vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
+ &cond_expr_stmt_list);
- if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
- vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
- &cond_expr_stmt_list);
- }
-
- cond_expr =
- fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
- cond_expr =
- force_gimple_operand (cond_expr, &gimplify_stmt_list, true, NULL_TREE);
- gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
+ if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+ vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
+ &cond_expr_stmt_list);
+ }
- //vect_mark_split_info_for_renaming (loop_vinfo);
- //update_ssa (TODO_update_ssa);
+ cond_expr =
+ fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
+ cond_expr =
+ force_gimple_operand (cond_expr, &gimplify_stmt_list, true, NULL_TREE);
+ gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
+ }
initialize_original_copy_tables ();
nloop = loop_version (loop, cond_expr, &condition_bb,
@@ -9072,8 +9107,8 @@ vect_loop_versioning (loop_vec_info loop_vinfo)
update_ssa (TODO_update_ssa);
if (cond_expr_stmt_list)
{
- cond_exp_gsi = gsi_last_bb (condition_bb);
- gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list, GSI_SAME_STMT);
+ *cond_exp_gsi = gsi_last_bb (condition_bb);
+ gsi_insert_seq_before (cond_exp_gsi, cond_expr_stmt_list, GSI_SAME_STMT);
}
return nloop;
@@ -9241,15 +9276,18 @@ vect_transform_loop (loop_vec_info loop_vinfo)
tree vf;
gimple new_stmt;
basic_block new_bb;
+ gimple_stmt_iterator cond_expr_gsi;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vec_transform_loop ===");
- if (VEC_length (data_reference_p, LOOP_VINFO_DRS_FOR_ALIGN_CHECKS (loop_vinfo))
+ if (VEC_length (data_reference_p,
+ LOOP_VINFO_DRS_FOR_ALIGN_CHECKS (loop_vinfo))
&& !LOOP_VINFO_ALIGN_SCHEME (loop_vinfo) && !flag_bases_aligned
&& flag_alignment_hints)
{
- struct loop *new_loop = vect_loop_versioning (loop_vinfo);
+ struct loop *new_loop = vect_loop_versioning (loop_vinfo, &cond_expr_gsi,
+ false);
loop_vec_info new_loop_vinfo = vect_analyze_loop (new_loop, true);
new_loop->aux = new_loop_vinfo;
@@ -9257,9 +9295,29 @@ vect_transform_loop (loop_vec_info loop_vinfo)
vect_transform_loop (new_loop_vinfo);
}
+ if (loop->inner && flag_loop_nest_version
+ && LOOP_VINFO_NEEDS_DOUBLE (loop_vinfo))
+ {
+ struct loop *new_loop = vect_loop_versioning (loop_vinfo, &cond_expr_gsi,
+ true);
+ loop_vec_info inner_loop_vinfo;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== SWITCHING TO INNER LOOP ===");
+
+ inner_loop_vinfo = vect_analyze_loop (new_loop->inner, false);
+ new_loop->inner->aux = inner_loop_vinfo;
+
+ if (inner_loop_vinfo && LOOP_VINFO_VECTORIZABLE_P (inner_loop_vinfo))
+ vect_transform_loop (inner_loop_vinfo);
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== SWITCHING BACK TO OUTER LOOP ===");
+ }
+
if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
|| VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
- vect_loop_versioning (loop_vinfo);
+ vect_loop_versioning (loop_vinfo, &cond_expr_gsi, false);
/* CHECKME: we wouldn't need this if we called update_ssa once
for all loops. */
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index eb73f00b703..aab139a8c44 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -1971,7 +1971,7 @@ new_loop_vec_info (struct loop *loop)
LOOP_VINFO_VF (res) = NULL_TREE;
LOOP_VINFO_ALIGN_SCHEME (res) = no_forced_scheme;
LOOP_VINFO_DRS_FOR_ALIGN_CHECKS (res) = VEC_alloc (data_reference_p, heap, 10);
-
+ LOOP_VINFO_NEEDS_DOUBLE (res) = false;
return res;
}
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index f84683e7094..eac10e88177 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -264,6 +264,8 @@ typedef struct _loop_vec_info {
VEC (data_reference_p, heap) *drs_for_alignment_checks;
+ /* Inner-outer loop versioning flags. */
+ bool needs_double;
} *loop_vec_info;
/* Access Functions. */
@@ -292,6 +294,7 @@ typedef struct _loop_vec_info {
#define LOOP_VINFO_SPLIT_INFO(L) (L)->split_info
#define LOOP_VINFO_ALIGN_SCHEME(L) (L)->align_scheme
#define LOOP_VINFO_DRS_FOR_ALIGN_CHECKS(L) (L)-> drs_for_alignment_checks
+#define LOOP_VINFO_NEEDS_DOUBLE(L) (L)->needs_double
#define NITERS_KNOWN_P(n) \
(host_integerp ((n),0) \