diff options
author | Ira Rosen <irar@il.ibm.com> | 2010-08-08 10:28:29 +0000 |
---|---|---|
committer | Ira Rosen <irar@il.ibm.com> | 2010-08-08 10:28:29 +0000 |
commit | 857f6c13c818c4483d3153cbe1898e087020bfac (patch) | |
tree | 6da3a025cf6cb75533df8fc4f177053947fd4f49 | |
parent | c2e32a5627602a31e7dd8b87e61462ec69f092cc (diff) |
Add inner/outer loop vectorization versioning based on "double
supported" builtin.
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/st/cli-be-vect@162996 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/config/cil32/cil-builtins.def | 6 | ||||
-rw-r--r-- | gcc/config/cil32/cil32.c | 25 | ||||
-rw-r--r-- | gcc/config/cil32/cil32.opt | 4 | ||||
-rw-r--r-- | gcc/target-def.h | 4 | ||||
-rw-r--r-- | gcc/target.h | 3 | ||||
-rw-r--r-- | gcc/tree-vect-analyze.c | 20 | ||||
-rw-r--r-- | gcc/tree-vect-transform.c | 130 | ||||
-rw-r--r-- | gcc/tree-vectorizer.c | 2 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 3 |
9 files changed, 153 insertions, 44 deletions
diff --git a/gcc/config/cil32/cil-builtins.def b/gcc/config/cil32/cil-builtins.def index 0c92cd65d63..e0442ff7d09 100644 --- a/gcc/config/cil32/cil-builtins.def +++ b/gcc/config/cil32/cil-builtins.def @@ -1480,6 +1480,12 @@ DEF_CILBUILTIN(GEN_VQI_PACK, "[genvec_support]genvec_support.VQI::VQI_pack", \ VHI_type_node, VHI_type_node) +/* inner or outer loop? */ +DEF_CILBUILTIN(GCC_DOUBLE_SUPPORTED, "[genvec_support]genvec_support.VSI::VSI_double_supported", \ + ATTR_CONST_NOTHROW_LIST, \ + boolean_type_node, \ + 0) + /* V DF */ DEF_CILBUILTIN(GEN_VDF_CTOR, "[Xxxx.Simd]Xxxx.Simd.VecGenDF::.ctor", \ ATTR_CONST_NOTHROW_LIST, \ diff --git a/gcc/config/cil32/cil32.c b/gcc/config/cil32/cil32.c index 651e31e3fef..5e8af763140 100644 --- a/gcc/config/cil32/cil32.c +++ b/gcc/config/cil32/cil32.c @@ -101,6 +101,7 @@ static tree cil32_builtin_conversion (enum tree_code, tree); static tree cil32_builtin_interleave_high_low (enum tree_code, tree); static tree cil32_builtin_extract_even_odd (enum tree_code, tree); static tree cil32_builtin_pack (enum tree_code, tree); +static tree cil32_builtin_double_supported (void); /* Initialize the GCC target structure. */ #undef TARGET_ATTRIBUTE_TABLE @@ -109,6 +110,9 @@ static tree cil32_builtin_pack (enum tree_code, tree); #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS cil_init_builtins +#undef TARGET_VECTORIZE_BUILTIN_DOUBLE_SUPPORTED +#define TARGET_VECTORIZE_BUILTIN_DOUBLE_SUPPORTED \ + cil32_builtin_double_supported #undef TARGET_ASM_FILE_START #define TARGET_ASM_FILE_START cil32_file_start @@ -724,8 +728,14 @@ static tree cil32_builtin_realign_load (tree type) } else { - if (element_size == 4) - return cil32_builtins[CIL32_GEN_VSF_REALIGN_LOAD]; + switch (element_size) + { + case 4: + return cil32_builtins[CIL32_GEN_VSF_REALIGN_LOAD]; + + case 8: + return cil32_builtins[CIL32_GEN_VDF_REALIGN_LOAD]; + } } return NULL_TREE; @@ -840,6 +850,9 @@ static tree cil32_builtin_build_init_vec (tree type) { switch (element_size) { + case 8: + return cil32_builtins[CIL32_GCC_BUILD_INIT_VEC_VDI]; + case 4: return cil32_builtins[CIL32_GCC_BUILD_INIT_VEC_VSI]; @@ -857,6 +870,9 @@ static tree cil32_builtin_build_init_vec (tree type) { switch (element_size) { + case 8: + return cil32_builtins[CIL32_GCC_BUILD_INIT_VEC_VDF]; + case 4: return cil32_builtins[CIL32_GCC_BUILD_INIT_VEC_VSF]; @@ -1023,4 +1039,9 @@ cil32_builtin_pack (enum tree_code code, tree type) return NULL_TREE; } } + +static tree cil32_builtin_double_supported (void) +{ + return cil32_builtins[CIL32_GCC_DOUBLE_SUPPORTED]; +} diff --git a/gcc/config/cil32/cil32.opt b/gcc/config/cil32/cil32.opt index 82af68f3447..2d49e60d09d 100644 --- a/gcc/config/cil32/cil32.opt +++ b/gcc/config/cil32/cil32.opt @@ -99,3 +99,7 @@ Common Report Var(flag_peel_loop_bound_hints) Init(1) Optimization Generate peel loop bound hints +mloop-nest-version +Common Report Var(flag_loop_nest_version) Optimization +Apply versioning to choose which loop in the loop nest to vectorize + diff --git a/gcc/target-def.h b/gcc/target-def.h index 90eb2ea0d9d..9360f978dca 100644 --- a/gcc/target-def.h +++ b/gcc/target-def.h @@ -400,6 +400,7 @@ #define TARGET_VECTORIZE_BUILTIN_BUILD_INIT_VEC 0 #define TARGET_VECTORIZE_BUILTIN_EXTRACT_EVEN_ODD 0 #define TARGET_VECTORIZE_BUILTIN_PACK 0 +#define TARGET_VECTORIZE_BUILTIN_DOUBLE_SUPPORTED 0 #define TARGET_VECTORIZE \ { \ @@ -427,7 +428,8 @@ TARGET_VECTORIZE_BUILTIN_BUILD_INIT_VEC, \ TARGET_VECTORIZE_BUILTIN_EXTRACT_EVEN_ODD, \ TARGET_VECTORIZE_BUILTIN_INTERLEAVE_HIGH_LOW, \ - TARGET_VECTORIZE_BUILTIN_PACK \ + TARGET_VECTORIZE_BUILTIN_PACK, \ + TARGET_VECTORIZE_BUILTIN_DOUBLE_SUPPORTED \ } #define TARGET_DEFAULT_TARGET_FLAGS 0 diff --git a/gcc/target.h b/gcc/target.h index 6e40d3e82fe..2378698e915 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -507,6 +507,9 @@ struct gcc_target tree (* builtin_interleave_high_low) (enum tree_code, tree); tree (* builtin_pack) (enum tree_code, tree); + + tree (* builtin_double_supported) (void); + } vectorize; /* The initial value of target_flags. */ diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c index 9fcfc66bda8..3a95bbb27e6 100644 --- a/gcc/tree-vect-analyze.c +++ b/gcc/tree-vect-analyze.c @@ -127,8 +127,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) unsigned int nunits; stmt_vec_info stmt_info; int i; - HOST_WIDE_INT dummy; tree biggest_type = NULL; + HOST_WIDE_INT lhs_size_unit, rhs_size_unit; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); @@ -154,6 +154,9 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) gcc_assert (!STMT_VINFO_VECTYPE (stmt_info)); scalar_type = TREE_TYPE (PHI_RESULT (phi)); + if (scalar_type == double_type_node) + LOOP_VINFO_NEEDS_DOUBLE (loop_vinfo) = true; + if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "get vectype for scalar type: "); @@ -245,12 +248,18 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) } else { - gcc_assert (! STMT_VINFO_DATA_REF (stmt_info) && !is_pattern_stmt_p (stmt_info)); - scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, - &dummy); + scalar_type = vect_get_smallest_scalar_type (stmt, &lhs_size_unit, + &rhs_size_unit); + + if (scalar_type == double_type_node + || (is_gimple_assign (stmt) + && TREE_TYPE (gimple_assign_rhs1 (stmt)) + == double_type_node)) + LOOP_VINFO_NEEDS_DOUBLE (loop_vinfo) = true; + if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "get vectype for scalar type: "); @@ -3819,6 +3828,9 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo) /* Set vectype for STMT. */ scalar_type = TREE_TYPE (DR_REF (dr)); + if (scalar_type == double_type_node) + LOOP_VINFO_NEEDS_DOUBLE (loop_vinfo) = true; + STMT_VINFO_VECTYPE (stmt_info) = get_vectype_for_scalar_type (scalar_type); if (!STMT_VINFO_VECTYPE (stmt_info)) diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index c7bd06e1b89..f987a402846 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -8979,14 +8979,15 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, cost model initially. */ static struct loop * -vect_loop_versioning (loop_vec_info loop_vinfo) +vect_loop_versioning (loop_vec_info loop_vinfo, + gimple_stmt_iterator *cond_exp_gsi, bool inner) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); struct loop *nloop; tree cond_expr = NULL_TREE; gimple_seq cond_expr_stmt_list = NULL; basic_block condition_bb; - gimple_stmt_iterator gsi, cond_exp_gsi; + gimple_stmt_iterator gsi; basic_block merge_bb; basic_block new_exit_bb; edge new_exit_e, e; @@ -8997,46 +8998,80 @@ vect_loop_versioning (loop_vec_info loop_vinfo) tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo); int min_profitable_iters = 0; unsigned int th; + tree builtin_decl = NULL_TREE; - if (VEC_length (data_reference_p, LOOP_VINFO_DRS_FOR_ALIGN_CHECKS (loop_vinfo)) - && !LOOP_VINFO_ALIGN_SCHEME (loop_vinfo)) + if (inner) { - vect_create_cond_for_dr_align_checks (loop_vinfo, &cond_expr, - &cond_expr_stmt_list); - VEC_free (data_reference_p, heap, LOOP_VINFO_DRS_FOR_ALIGN_CHECKS (loop_vinfo)); + if (LOOP_VINFO_NEEDS_DOUBLE (loop_vinfo) + && targetm.vectorize.builtin_double_supported + && (builtin_decl = targetm.vectorize.builtin_double_supported ())) + { + tree var = create_tmp_var (boolean_type_node, "double"); + gimple new_stmt = gimple_build_call (builtin_decl, 0); + tree tmp_cond_expr; + + add_referenced_var (var); + tmp_cond_expr = make_ssa_name (var, new_stmt); + gimple_call_set_lhs (new_stmt, tmp_cond_expr); + gimple_seq_add_stmt (&cond_expr_stmt_list, new_stmt); + + if (cond_expr) + { + cond_expr = + fold_build2 (TRUTH_AND_EXPR, boolean_type_node, cond_expr, tmp_cond_expr); + cond_expr = + force_gimple_operand (cond_expr, &gimplify_stmt_list, true, NULL_TREE); + gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list); + } + else + cond_expr = tmp_cond_expr; + } + + if (!cond_expr) + { + cond_expr = create_tmp_var (boolean_type_node, "dummy"); + } } else { - /* Get profitability threshold for vectorized loop. */ - min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo); + if (VEC_length (data_reference_p, LOOP_VINFO_DRS_FOR_ALIGN_CHECKS (loop_vinfo)) + && !LOOP_VINFO_ALIGN_SCHEME (loop_vinfo) && !flag_bases_aligned) + { + vect_create_cond_for_dr_align_checks (loop_vinfo, &cond_expr, + &cond_expr_stmt_list); + VEC_free (data_reference_p, heap, + LOOP_VINFO_DRS_FOR_ALIGN_CHECKS (loop_vinfo)); + } + else + { + /* Get profitability threshold for vectorized loop. */ + min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo); - th = conservative_cost_threshold (loop_vinfo, - min_profitable_iters); + th = conservative_cost_threshold (loop_vinfo, + min_profitable_iters); - cond_expr = - fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters, - build_int_cst (TREE_TYPE (scalar_loop_iters), th)); + cond_expr = + fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters, + build_int_cst (TREE_TYPE (scalar_loop_iters), th)); - cond_expr = force_gimple_operand (cond_expr, &cond_expr_stmt_list, - false, NULL_TREE); + cond_expr = force_gimple_operand (cond_expr, &cond_expr_stmt_list, + false, NULL_TREE); - if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))) - vect_create_cond_for_align_checks (loop_vinfo, &cond_expr, - &cond_expr_stmt_list); + if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))) + vect_create_cond_for_align_checks (loop_vinfo, &cond_expr, + &cond_expr_stmt_list); - if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))) - vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr, - &cond_expr_stmt_list); - } - - cond_expr = - fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node); - cond_expr = - force_gimple_operand (cond_expr, &gimplify_stmt_list, true, NULL_TREE); - gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list); + if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))) + vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr, + &cond_expr_stmt_list); + } - //vect_mark_split_info_for_renaming (loop_vinfo); - //update_ssa (TODO_update_ssa); + cond_expr = + fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node); + cond_expr = + force_gimple_operand (cond_expr, &gimplify_stmt_list, true, NULL_TREE); + gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list); + } initialize_original_copy_tables (); nloop = loop_version (loop, cond_expr, &condition_bb, @@ -9072,8 +9107,8 @@ vect_loop_versioning (loop_vec_info loop_vinfo) update_ssa (TODO_update_ssa); if (cond_expr_stmt_list) { - cond_exp_gsi = gsi_last_bb (condition_bb); - gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list, GSI_SAME_STMT); + *cond_exp_gsi = gsi_last_bb (condition_bb); + gsi_insert_seq_before (cond_exp_gsi, cond_expr_stmt_list, GSI_SAME_STMT); } return nloop; @@ -9241,15 +9276,18 @@ vect_transform_loop (loop_vec_info loop_vinfo) tree vf; gimple new_stmt; basic_block new_bb; + gimple_stmt_iterator cond_expr_gsi; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vec_transform_loop ==="); - if (VEC_length (data_reference_p, LOOP_VINFO_DRS_FOR_ALIGN_CHECKS (loop_vinfo)) + if (VEC_length (data_reference_p, + LOOP_VINFO_DRS_FOR_ALIGN_CHECKS (loop_vinfo)) && !LOOP_VINFO_ALIGN_SCHEME (loop_vinfo) && !flag_bases_aligned && flag_alignment_hints) { - struct loop *new_loop = vect_loop_versioning (loop_vinfo); + struct loop *new_loop = vect_loop_versioning (loop_vinfo, &cond_expr_gsi, + false); loop_vec_info new_loop_vinfo = vect_analyze_loop (new_loop, true); new_loop->aux = new_loop_vinfo; @@ -9257,9 +9295,29 @@ vect_transform_loop (loop_vec_info loop_vinfo) vect_transform_loop (new_loop_vinfo); } + if (loop->inner && flag_loop_nest_version + && LOOP_VINFO_NEEDS_DOUBLE (loop_vinfo)) + { + struct loop *new_loop = vect_loop_versioning (loop_vinfo, &cond_expr_gsi, + true); + loop_vec_info inner_loop_vinfo; + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== SWITCHING TO INNER LOOP ==="); + + inner_loop_vinfo = vect_analyze_loop (new_loop->inner, false); + new_loop->inner->aux = inner_loop_vinfo; + + if (inner_loop_vinfo && LOOP_VINFO_VECTORIZABLE_P (inner_loop_vinfo)) + vect_transform_loop (inner_loop_vinfo); + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== SWITCHING BACK TO OUTER LOOP ==="); + } + if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))) - vect_loop_versioning (loop_vinfo); + vect_loop_versioning (loop_vinfo, &cond_expr_gsi, false); /* CHECKME: we wouldn't need this if we called update_ssa once for all loops. */ diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index eb73f00b703..aab139a8c44 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -1971,7 +1971,7 @@ new_loop_vec_info (struct loop *loop) LOOP_VINFO_VF (res) = NULL_TREE; LOOP_VINFO_ALIGN_SCHEME (res) = no_forced_scheme; LOOP_VINFO_DRS_FOR_ALIGN_CHECKS (res) = VEC_alloc (data_reference_p, heap, 10); - + LOOP_VINFO_NEEDS_DOUBLE (res) = false; return res; } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index f84683e7094..eac10e88177 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -264,6 +264,8 @@ typedef struct _loop_vec_info { VEC (data_reference_p, heap) *drs_for_alignment_checks; + /* Inner-outer loop versioning flags. */ + bool needs_double; } *loop_vec_info; /* Access Functions. */ @@ -292,6 +294,7 @@ typedef struct _loop_vec_info { #define LOOP_VINFO_SPLIT_INFO(L) (L)->split_info #define LOOP_VINFO_ALIGN_SCHEME(L) (L)->align_scheme #define LOOP_VINFO_DRS_FOR_ALIGN_CHECKS(L) (L)-> drs_for_alignment_checks +#define LOOP_VINFO_NEEDS_DOUBLE(L) (L)->needs_double #define NITERS_KNOWN_P(n) \ (host_integerp ((n),0) \ |