diff options
author | Bill Schmidt <wschmidt@linux.vnet.ibm.com> | 2012-03-02 14:51:58 +0000 |
---|---|---|
committer | Bill Schmidt <wschmidt@linux.vnet.ibm.com> | 2012-03-02 14:51:58 +0000 |
commit | 799a03735e2a9b4a1491a0c015874ffd07c29b60 (patch) | |
tree | 7e32214d7fbd818b4df47552dbb3fa714a88ca1f | |
parent | d533f32bc267a4d411792bda2df41103e832e4a9 (diff) |
2012-03-02 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Ira Rosen <irar@il.ibm.com>
PR tree-optimization/50031
PR tree-optimization/50969
* targhooks.c (default_builtin_vectorization_cost): Handle
vec_promote_demote.
* target.h (enum vect_cost_for_stmt): Add vec_promote_demote.
* tree-vect-loop.c (vect_get_single_scalar_iteraion_cost): Handle
all types of reduction and pattern statements.
(vect_estimate_min_profitable_iters): Likewise.
* tree-vect-stmts.c (vect_model_promotion_demotion_cost): New function.
(vect_model_store_cost): Use vec_perm rather than vector_stmt for
statement cost.
(vect_model_load_cost): Likewise.
(vect_get_load_cost): Likewise; add dump logic for explicit realigns.
(vectorizable_type_demotion): Call vect_model_promotion_demotion_cost.
(vectorizable_type_promotion): Likewise.
* config/spu/spu.c (spu_builtin_vectorization_cost): Handle
vec_promote_demote.
* config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise.
* config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Update
vec_perm for VSX and handle vec_promote_demote.
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/gcc-4_6-branch@184787 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 24 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 3 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 13 | ||||
-rw-r--r-- | gcc/config/spu/spu.c | 1 | ||||
-rw-r--r-- | gcc/target.h | 3 | ||||
-rw-r--r-- | gcc/targhooks.c | 1 | ||||
-rw-r--r-- | gcc/tree-vect-loop.c | 13 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 60 |
8 files changed, 107 insertions, 11 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4828be18f2f..22a1359dd96 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,27 @@ +2012-03-02 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + Ira Rosen <irar@il.ibm.com> + + PR tree-optimization/50031 + PR tree-optimization/50969 + * targhooks.c (default_builtin_vectorization_cost): Handle + vec_promote_demote. + * target.h (enum vect_cost_for_stmt): Add vec_promote_demote. + * tree-vect-loop.c (vect_get_single_scalar_iteraion_cost): Handle + all types of reduction and pattern statements. + (vect_estimate_min_profitable_iters): Likewise. + * tree-vect-stmts.c (vect_model_promotion_demotion_cost): New function. + (vect_model_store_cost): Use vec_perm rather than vector_stmt for + statement cost. + (vect_model_load_cost): Likewise. + (vect_get_load_cost): Likewise; add dump logic for explicit realigns. + (vectorizable_type_demotion): Call vect_model_promotion_demotion_cost. + (vectorizable_type_promotion): Likewise. + * config/spu/spu.c (spu_builtin_vectorization_cost): Handle + vec_promote_demote. + * config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise. + * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Update + vec_perm for VSX and handle vec_promote_demote. + 2012-03-01 Jakub Jelinek <jakub@redhat.com> * BASE-VER: Set to 4.6.4. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 2ee8df99bde..a58a8cac2a5 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -32823,7 +32823,8 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, return ix86_cost->cond_not_taken_branch_cost; case vec_perm: - return 1; + case vec_promote_demote: + return ix86_cost->vec_stmt_cost; default: gcc_unreachable (); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 2f2f342e792..742ec237aae 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -3695,12 +3695,23 @@ rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, case vec_to_scalar: case scalar_to_vec: case cond_branch_not_taken: - case vec_perm: return 1; case cond_branch_taken: return 3; + case vec_perm: + if (TARGET_VSX) + return 4; + else + return 1; + + case vec_promote_demote: + if (TARGET_VSX) + return 5; + else + return 1; + case unaligned_load: if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) { diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c index 8901162a341..dffca84b040 100644 --- a/gcc/config/spu/spu.c +++ b/gcc/config/spu/spu.c @@ -6794,6 +6794,7 @@ spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, case scalar_to_vec: case cond_branch_not_taken: case vec_perm: + case vec_promote_demote: return 1; case scalar_store: diff --git a/gcc/target.h b/gcc/target.h index eaf7aadd707..5ccd7fd67a3 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -128,7 +128,8 @@ enum vect_cost_for_stmt scalar_to_vec, cond_branch_not_taken, cond_branch_taken, - vec_perm + vec_perm, + vec_promote_demote }; /* Sets of optimization levels at which an option may be enabled by diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 225831b9515..c1bd118179c 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -529,6 +529,7 @@ default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, case scalar_to_vec: case cond_branch_not_taken: case vec_perm: + case vec_promote_demote: return 1; case unaligned_load: diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 44c1ecddd1d..dd9aef4174f 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2104,7 +2104,8 @@ vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo) if (stmt_info && !STMT_VINFO_RELEVANT_P (stmt_info) && (!STMT_VINFO_LIVE_P (stmt_info) - || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)) + || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) + && !STMT_VINFO_IN_PATTERN_P (stmt_info)) continue; if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))) @@ -2251,11 +2252,19 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) { gimple stmt = gsi_stmt (si); stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + + if (STMT_VINFO_IN_PATTERN_P (stmt_info)) + { + stmt = STMT_VINFO_RELATED_STMT (stmt_info); + stmt_info = vinfo_for_stmt (stmt); + } + /* Skip stmts that are not vectorized inside the loop. */ if (!STMT_VINFO_RELEVANT_P (stmt_info) && (!STMT_VINFO_LIVE_P (stmt_info) - || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)) + || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))) continue; + vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor; /* FIXME: for stmts in the inner-loop in outer-loop vectorization, some of the "outside" costs are generated inside the outer-loop. */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index b5ecd3f24ee..7a263785e9d 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -623,6 +623,46 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, } +/* Model cost for type demotion and promotion operations. PWR is normally + zero for single-step promotions and demotions. It will be one if + two-step promotion/demotion is required, and so on. Each additional + step doubles the number of instructions required. */ + +static void +vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, + enum vect_def_type *dt, int pwr) +{ + int i, tmp; + int inside_cost = 0, outside_cost = 0, single_stmt_cost; + + /* The SLP costs were already calculated during SLP tree build. */ + if (PURE_SLP_STMT (stmt_info)) + return; + + single_stmt_cost = vect_get_stmt_cost (vec_promote_demote); + for (i = 0; i < pwr + 1; i++) + { + tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ? + (i + 1) : i; + inside_cost += vect_pow2 (tmp) * single_stmt_cost; + } + + /* FORNOW: Assuming maximum 2 args per stmts. */ + for (i = 0; i < 2; i++) + { + if (dt[i] == vect_constant_def || dt[i] == vect_external_def) + outside_cost += vect_get_stmt_cost (vector_stmt); + } + + if (vect_print_dump_info (REPORT_COST)) + fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, " + "outside_cost = %d .", inside_cost, outside_cost); + + /* Set the costs in STMT_INFO. */ + stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost); + stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost); +} + /* Function vect_cost_strided_group_size For strided load or store, return the group_size only if it is the first @@ -691,7 +731,7 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, { /* Uses a high and low interleave operation for each needed permute. */ inside_cost = ncopies * exact_log2(group_size) * group_size - * vect_get_stmt_cost (vector_stmt); + * vect_get_stmt_cost (vec_perm); if (vect_print_dump_info (REPORT_COST)) fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .", @@ -795,7 +835,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node) { /* Uses an even and odd extract operations for each needed permute. */ inside_cost = ncopies * exact_log2(group_size) * group_size - * vect_get_stmt_cost (vector_stmt); + * vect_get_stmt_cost (vec_perm); if (vect_print_dump_info (REPORT_COST)) fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .", @@ -855,7 +895,7 @@ vect_get_load_cost (struct data_reference *dr, int ncopies, case dr_explicit_realign: { *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load) - + vect_get_stmt_cost (vector_stmt)); + + vect_get_stmt_cost (vec_perm)); /* FIXME: If the misalignment remains fixed across the iterations of the containing loop, the following cost should be added to the @@ -863,6 +903,9 @@ vect_get_load_cost (struct data_reference *dr, int ncopies, if (targetm.vectorize.builtin_mask_for_load) *inside_cost += vect_get_stmt_cost (vector_stmt); + if (vect_print_dump_info (REPORT_COST)) + fprintf (vect_dump, "vect_model_load_cost: explicit realign"); + break; } case dr_explicit_realign_optimized: @@ -886,7 +929,12 @@ vect_get_load_cost (struct data_reference *dr, int ncopies, } *inside_cost += ncopies * (vect_get_stmt_cost (vector_load) - + vect_get_stmt_cost (vector_stmt)); + + vect_get_stmt_cost (vec_perm)); + + if (vect_print_dump_info (REPORT_COST)) + fprintf (vect_dump, + "vect_model_load_cost: explicit realign optimized"); + break; } @@ -2919,7 +2967,7 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi, STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_demotion ==="); - vect_model_simple_cost (stmt_info, ncopies, dt, NULL); + vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); return true; } @@ -3217,7 +3265,7 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi, STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_promotion ==="); - vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL); + vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); return true; } |