From fe3292b28eea4f3f727b3840d6c270f3438a4310 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 4 Dec 2017 15:08:56 +0000 Subject: 2017-12-04 Richard Biener * tree-vectorizer.h (check_reduction_path): Declare. * tree-vect-loop.c (check_reduction_path): New function, split out from ... (vect_is_simple_reduction): ... here. * gimple-loop-interchange.cc: Include tree-vectorizer.h. (loop_cand::analyze_iloop_reduction_var): Use single_imm_use. Properly check for a supported reduction operation and a valid expression if the reduction covers multiple stmts. (prepare_perfect_loop_nest): Simpify allocation. (pass_linterchange::execute): Likewise. * gcc.dg/tree-ssa/loop-interchange-1.c: Add fast-math flags. * gcc.dg/tree-ssa/loop-interchange-1b.c: New test variant. * gcc.dg/tree-ssa/loop-interchange-4.c: XFAIL. git-svn-id: https://gcc.gnu.org/svn/gcc/branches/gimple-linterchange@255383 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/gimple-loop-interchange.cc | 90 +++++---- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c | 2 +- .../gcc.dg/tree-ssa/loop-interchange-1b.c | 52 +++++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c | 2 +- gcc/testsuite/gfortran.dg/pr81303.f | 2 +- gcc/tree-vect-loop.c | 210 +++++++++++---------- gcc/tree-vectorizer.h | 3 + 7 files changed, 220 insertions(+), 141 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c diff --git a/gcc/gimple-loop-interchange.cc b/gcc/gimple-loop-interchange.cc index 21dbdec1c3a..1f46509a141 100644 --- a/gcc/gimple-loop-interchange.cc +++ b/gcc/gimple-loop-interchange.cc @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-loop-ivopts.h" #include "tree-ssa-dce.h" #include "tree-data-ref.h" +#include "tree-vectorizer.h" /* This pass performs loop interchange: for example, the loop nest @@ -551,23 +552,29 @@ loop_cand::analyze_iloop_reduction_var (tree var) in a way that reduction operation is seen as black box. In general, we can ignore reassociation of reduction operator; we can handle fake reductions in which VAR is not even used to compute NEXT. */ - FOR_EACH_IMM_USE_FAST (use_p, iterator, var) - { - stmt = USE_STMT (use_p); - if (is_gimple_debug (stmt)) - continue; - - if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt))) - return false; + if (! single_imm_use (var, &use_p, &single_use) + || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use))) + return false; - if (single_use != NULL) + /* Check the reduction operation. We require a left-associative operation. + For FP math we also need to be allowed to associate operations. */ + if (gassign *ass = dyn_cast (single_use)) + { + enum tree_code code = gimple_assign_rhs_code (ass); + if (! (associative_tree_code (code) + || (code == MINUS_EXPR + && use_p->use == gimple_assign_rhs1_ptr (ass))) + || (FLOAT_TYPE_P (TREE_TYPE (var)) + && ! flag_associative_math)) return false; - - single_use = stmt; } + else + return false; + /* Handle and verify a series of stmts feeding the reduction op. */ if (single_use != next_def - && !stmt_dominates_stmt_p (single_use, next_def)) + && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next, + gimple_assign_rhs_code (single_use))) return false; /* Only support cases in which INIT is used in inner loop. */ @@ -1964,7 +1971,7 @@ prepare_perfect_loop_nest (struct loop *loop, vec *loop_nest, vec *datarefs, vec *ddrs) { struct loop *start_loop = NULL, *innermost = loop; - struct loop *outermost = superloop_at_depth (loop, 0); + struct loop *outermost = loops_for_fn (cfun)->tree_root; /* Find loop nest from the innermost loop. The outermost is the innermost outer*/ @@ -1985,21 +1992,26 @@ prepare_perfect_loop_nest (struct loop *loop, vec *loop_nest, if (!start_loop || !start_loop->inner) return false; + /* Prepare the data reference vector for the loop nest, pruning outer + loops we cannot handle. */ datarefs->create (20); - if ((start_loop = prepare_data_references (start_loop, datarefs)) == NULL + start_loop = prepare_data_references (start_loop, datarefs); + if (!start_loop /* Check if there is no data reference. */ || datarefs->is_empty () /* Check if there are too many of data references. */ - || (int) datarefs->length () > MAX_DATAREFS - /* Compute access strides for all data references. */ - || ((start_loop = compute_access_strides (start_loop, innermost, - *datarefs)) == NULL) - /* Check if loop nest should be interchanged. */ - || !should_interchange_loop_nest (start_loop, innermost, *datarefs)) - { - free_data_refs_with_aux (*datarefs); - return false; - } + || (int) datarefs->length () > MAX_DATAREFS) + return false; + + /* Compute access strides for all data references, pruning outer + loops we cannot analyze refs in. */ + start_loop = compute_access_strides (start_loop, innermost, *datarefs); + if (!start_loop) + return false; + + /* Check if any interchange is profitable in the loop nest. */ + if (!should_interchange_loop_nest (start_loop, innermost, *datarefs)) + return false; /* Check if data dependences can be computed for loop nest starting from start_loop. */ @@ -2029,14 +2041,15 @@ prepare_perfect_loop_nest (struct loop *loop, vec *loop_nest, return true; } + /* ??? We should be able to adjust DDRs we already computed by + truncating distance vectors. */ free_dependence_relations (*ddrs); + *ddrs = vNULL; /* Try to compute data dependences with the outermost loop stripped. */ loop = start_loop; start_loop = start_loop->inner; } while (start_loop && start_loop->inner); - loop_nest->release (); - free_data_refs_with_aux (*datarefs); return false; } @@ -2050,19 +2063,20 @@ pass_linterchange::execute (function *fun) bool changed_p = false; struct loop *loop; - vec loop_nest; - vec datarefs; - vec ddrs; - FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) - if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs)) - { - tree_loop_interchange loop_interchange (loop_nest); - changed_p |= loop_interchange.interchange (datarefs, ddrs); - free_dependence_relations (ddrs); - free_data_refs_with_aux (datarefs); - loop_nest.release (); - } + { + vec loop_nest = vNULL; + vec datarefs = vNULL; + vec ddrs = vNULL; + if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs)) + { + tree_loop_interchange loop_interchange (loop_nest); + changed_p |= loop_interchange.interchange (datarefs, ddrs); + } + free_dependence_relations (ddrs); + free_data_refs_with_aux (datarefs); + loop_nest.release (); + } if (changed_p) scev_reset_htab (); diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c index 0924c6f943d..8bd3ba78e73 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */ +/* { dg-options "-O2 -floop-interchange -fassociative-math -fno-signed-zeros -fno-trapping-math -fdump-tree-linterchange-details" } */ /* Copied from graphite/interchange-4.c */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c new file mode 100644 index 00000000000..f5f765bb902 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */ + +/* Copied from graphite/interchange-4.c */ + +#define DEBUG 0 +#if DEBUG +#include +#endif + +double u[1782225]; + +static void __attribute__((noinline)) +foo (int N, double *res) +{ + int i, j; + double sum = 0; + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + sum = sum + u[i + 1335 * j]; + + *res = sum; +} + +extern void abort (); + +int +main (void) +{ + int i, j; + double res; + + for (i = 0; i < 1782225; i++) + u[i] = 0; + u[0] = __DBL_MAX__; + u[1335] = -__DBL_MAX__; + u[1] = __DBL_MAX__; + u[1336] = -__DBL_MAX__; + + foo (1335, &res); + +#if DEBUG + fprintf (stderr, "res = %d \n", res); +#endif + + if (res != 0.0) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-not "is interchanged" "linterchange"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c index 80d022b73b2..a919a6c38bf 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c @@ -47,4 +47,4 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "Loop_pair is interchanged" 1 "linterchange"} } */ +/* { dg-final { scan-tree-dump-times "Loop_pair is interchanged" 1 "linterchange" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gfortran.dg/pr81303.f b/gcc/testsuite/gfortran.dg/pr81303.f index 842172b19d9..f83a859d8ff 100644 --- a/gcc/testsuite/gfortran.dg/pr81303.f +++ b/gcc/testsuite/gfortran.dg/pr81303.f @@ -1,5 +1,5 @@ ! { dg-do compile } -! { dg-options "-O3 -floop-interchange -fdump-tree-linterchange-details" } +! { dg-options "-O3 -ffast-math -floop-interchange -fdump-tree-linterchange-details" } subroutine mat_times_vec(y,x,a,axp,ayp,azp,axm,aym,azm, $ nb,nx,ny,nz) diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index d7669e4ebb7..6a2de9ae576 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2626,6 +2626,114 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi, } +/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and + reduction operation CODE has a handled computation expression. */ + +bool +check_reduction_path (location_t loc, loop_p loop, gphi *phi, tree loop_arg, + enum tree_code code) +{ + auto_vec > path; + auto_bitmap visited; + tree lookfor = PHI_RESULT (phi); + ssa_op_iter curri; + use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE); + while (USE_FROM_PTR (curr) != loop_arg) + curr = op_iter_next_use (&curri); + curri.i = curri.numops; + do + { + path.safe_push (std::make_pair (curri, curr)); + tree use = USE_FROM_PTR (curr); + if (use == lookfor) + break; + gimple *def = SSA_NAME_DEF_STMT (use); + if (gimple_nop_p (def) + || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) + { +pop: + do + { + std::pair x = path.pop (); + curri = x.first; + curr = x.second; + do + curr = op_iter_next_use (&curri); + /* Skip already visited or non-SSA operands (from iterating + over PHI args). */ + while (curr != NULL_USE_OPERAND_P + && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME + || ! bitmap_set_bit (visited, + SSA_NAME_VERSION + (USE_FROM_PTR (curr))))); + } + while (curr == NULL_USE_OPERAND_P && ! path.is_empty ()); + if (curr == NULL_USE_OPERAND_P) + break; + } + else + { + if (gimple_code (def) == GIMPLE_PHI) + curr = op_iter_init_phiuse (&curri, as_a (def), SSA_OP_USE); + else + curr = op_iter_init_use (&curri, def, SSA_OP_USE); + while (curr != NULL_USE_OPERAND_P + && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME + || ! bitmap_set_bit (visited, + SSA_NAME_VERSION + (USE_FROM_PTR (curr))))) + curr = op_iter_next_use (&curri); + if (curr == NULL_USE_OPERAND_P) + goto pop; + } + } + while (1); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + dump_printf_loc (MSG_NOTE, loc, "reduction path: "); + unsigned i; + std::pair *x; + FOR_EACH_VEC_ELT (path, i, x) + { + dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second)); + dump_printf (MSG_NOTE, " "); + } + dump_printf (MSG_NOTE, "\n"); + } + + /* Check whether the reduction path detected is valid. */ + bool fail = path.length () == 0; + bool neg = false; + for (unsigned i = 1; i < path.length (); ++i) + { + gimple *use_stmt = USE_STMT (path[i].second); + tree op = USE_FROM_PTR (path[i].second); + if (! has_single_use (op) + || ! is_gimple_assign (use_stmt)) + { + fail = true; + break; + } + if (gimple_assign_rhs_code (use_stmt) != code) + { + if (code == PLUS_EXPR + && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) + { + /* Track whether we negate the reduction value each iteration. */ + if (gimple_assign_rhs2 (use_stmt) == op) + neg = ! neg; + } + else + { + fail = true; + break; + } + } + } + return ! fail && ! neg; +} + + /* Function vect_is_simple_reduction (1) Detect a cross-iteration def-use cycle that represents a simple @@ -3128,106 +3236,8 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi, } /* Look for the expression computing loop_arg from loop PHI result. */ - auto_vec > path; - auto_bitmap visited; - tree lookfor = PHI_RESULT (phi); - ssa_op_iter curri; - use_operand_p curr = op_iter_init_phiuse (&curri, as_a (phi), - SSA_OP_USE); - while (USE_FROM_PTR (curr) != loop_arg) - curr = op_iter_next_use (&curri); - curri.i = curri.numops; - do - { - path.safe_push (std::make_pair (curri, curr)); - tree use = USE_FROM_PTR (curr); - if (use == lookfor) - break; - gimple *def = SSA_NAME_DEF_STMT (use); - if (gimple_nop_p (def) - || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) - { -pop: - do - { - std::pair x = path.pop (); - curri = x.first; - curr = x.second; - do - curr = op_iter_next_use (&curri); - /* Skip already visited or non-SSA operands (from iterating - over PHI args). */ - while (curr != NULL_USE_OPERAND_P - && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME - || ! bitmap_set_bit (visited, - SSA_NAME_VERSION - (USE_FROM_PTR (curr))))); - } - while (curr == NULL_USE_OPERAND_P && ! path.is_empty ()); - if (curr == NULL_USE_OPERAND_P) - break; - } - else - { - if (gimple_code (def) == GIMPLE_PHI) - curr = op_iter_init_phiuse (&curri, as_a (def), SSA_OP_USE); - else - curr = op_iter_init_use (&curri, def, SSA_OP_USE); - while (curr != NULL_USE_OPERAND_P - && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME - || ! bitmap_set_bit (visited, - SSA_NAME_VERSION - (USE_FROM_PTR (curr))))) - curr = op_iter_next_use (&curri); - if (curr == NULL_USE_OPERAND_P) - goto pop; - } - } - while (1); - if (dump_file && (dump_flags & TDF_DETAILS)) - { - dump_printf_loc (MSG_NOTE, vect_location, - "reduction path: "); - unsigned i; - std::pair *x; - FOR_EACH_VEC_ELT (path, i, x) - { - dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second)); - dump_printf (MSG_NOTE, " "); - } - dump_printf (MSG_NOTE, "\n"); - } - - /* Check whether the reduction path detected is valid. */ - bool fail = path.length () == 0; - bool neg = false; - for (unsigned i = 1; i < path.length (); ++i) - { - gimple *use_stmt = USE_STMT (path[i].second); - tree op = USE_FROM_PTR (path[i].second); - if (! has_single_use (op) - || ! is_gimple_assign (use_stmt)) - { - fail = true; - break; - } - if (gimple_assign_rhs_code (use_stmt) != code) - { - if (code == PLUS_EXPR - && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) - { - /* Track whether we negate the reduction value each iteration. */ - if (gimple_assign_rhs2 (use_stmt) == op) - neg = ! neg; - } - else - { - fail = true; - break; - } - } - } - if (! fail && ! neg) + if (check_reduction_path (vect_location, loop, as_a (phi), loop_arg, + code)) return def_stmt; if (dump_enabled_p ()) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 06224f9f94d..56811e5e14b 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1255,6 +1255,9 @@ extern tree vect_create_addr_base_for_vector_ref (gimple *, gimple_seq *, /* FORNOW: Used in tree-parloops.c. */ extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, bool *, bool); +/* Used in gimple-loop-interchange.c. */ +extern bool check_reduction_path (location_t, loop_p, gphi *, tree, + enum tree_code); /* Drive for loop analysis stage. */ extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info); extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); -- cgit v1.2.3