aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2018-10-26 07:38:59 +0000
committerRichard Biener <rguenther@suse.de>2018-10-26 07:38:59 +0000
commitc3bcda68fa1cf66735d9ce444d2a9eec7f571b42 (patch)
tree9d618aa607519185f0bde590ce1b6b862c7ff3b5
parent86e1e86d06e21aba1a8b8640877d05b335d82784 (diff)
2018-10-26 Richard Biener <rguenther@suse.de>
PR tree-optimization/87105 * tree-vectorizer.h (_slp_tree::refcnt): New member. * tree-vect-slp.c (vect_free_slp_tree): Decrement and honor refcnt. (vect_create_new_slp_node): Initialize refcnt to one. (bst_traits): Move. (scalar_stmts_set_t, bst_fail): Remove. (vect_build_slp_tree_2): Add bst_map argument and adjust calls. (vect_build_slp_tree): Add bst_map argument and lookup already created SLP nodes. (vect_print_slp_tree): Handle a SLP graph, print SLP node addresses. (vect_slp_rearrange_stmts): Handle a SLP graph. (vect_analyze_slp_instance): Adjust and free SLP nodes from the CSE map. Fix indenting. (vect_schedule_slp_instance): Add short-cut. * g++.dg/vect/slp-pr87105.cc: Adjust. * gcc.dg/torture/20181024-1.c: New testcase. * g++.dg/opt/20181025-1.C: Likewise. git-svn-id: https://gcc.gnu.org/svn/gcc/trunk@265522 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog19
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/g++.dg/opt/20181025-1.C31
-rw-r--r--gcc/testsuite/g++.dg/vect/slp-pr87105.cc9
-rw-r--r--gcc/testsuite/gcc.dg/torture/20181024-1.c41
-rw-r--r--gcc/tree-vect-slp.c281
-rw-r--r--gcc/tree-vectorizer.h2
7 files changed, 263 insertions, 127 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b60988c910e..d46a3b47a55 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,22 @@
+2018-10-26 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/87105
+ * tree-vectorizer.h (_slp_tree::refcnt): New member.
+ * tree-vect-slp.c (vect_free_slp_tree): Decrement and honor
+ refcnt.
+ (vect_create_new_slp_node): Initialize refcnt to one.
+ (bst_traits): Move.
+ (scalar_stmts_set_t, bst_fail): Remove.
+ (vect_build_slp_tree_2): Add bst_map argument and adjust calls.
+ (vect_build_slp_tree): Add bst_map argument and lookup
+ already created SLP nodes.
+ (vect_print_slp_tree): Handle a SLP graph, print SLP node
+ addresses.
+ (vect_slp_rearrange_stmts): Handle a SLP graph.
+ (vect_analyze_slp_instance): Adjust and free SLP nodes from
+ the CSE map. Fix indenting.
+ (vect_schedule_slp_instance): Add short-cut.
+
2018-10-26 Martin Liska <mliska@suse.cz>
PR testsuite/86158
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 73739030993..36627afeaa3 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,12 @@
2018-10-26 Richard Biener <rguenther@suse.de>
+ PR tree-optimization/87105
+ * g++.dg/vect/slp-pr87105.cc: Adjust.
+ * gcc.dg/torture/20181024-1.c: New testcase.
+ * g++.dg/opt/20181025-1.C: Likewise.
+
+2018-10-26 Richard Biener <rguenther@suse.de>
+
PR testsuite/87754
* g++.dg/lto/odr-1_0.C: Fix pattern.
diff --git a/gcc/testsuite/g++.dg/opt/20181025-1.C b/gcc/testsuite/g++.dg/opt/20181025-1.C
new file mode 100644
index 00000000000..43d1614f023
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/20181025-1.C
@@ -0,0 +1,31 @@
+// { dg-do compile }
+// { dg-options "-Ofast" }
+
+template <typename Number>
+class Vector {
+ typedef Number value_type;
+ typedef const value_type *const_iterator;
+ Number norm_sqr () const;
+ const_iterator begin () const;
+ unsigned int dim;
+};
+template <typename Number>
+static inline Number
+local_sqr (const Number x)
+{
+ return x*x;
+}
+template <typename Number>
+Number
+Vector<Number>::norm_sqr () const
+{
+ Number sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
+ const_iterator ptr = begin(), eptr = ptr + (dim/4)*4;
+ while (ptr!=eptr)
+ {
+ sum0 += ::local_sqr(*ptr++);
+ sum1 += ::local_sqr(*ptr++);
+ }
+ return sum0+sum1+sum2+sum3;
+}
+template class Vector<double>;
diff --git a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
index 1023d915201..949b16c848f 100644
--- a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
+++ b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
@@ -2,7 +2,7 @@
// { dg-require-effective-target c++11 }
// { dg-require-effective-target vect_double }
// For MIN/MAX recognition
-// { dg-additional-options "-ffast-math -fvect-cost-model" }
+// { dg-additional-options "-ffast-math" }
#include <algorithm>
#include <cmath>
@@ -99,6 +99,7 @@ void quadBoundingBoxA(const Point bez[3], Box& bBox) noexcept {
// We should have if-converted everything down to straight-line code
// { dg-final { scan-tree-dump-times "<bb \[0-9\]+>" 1 "slp2" } }
-// We fail to elide an earlier store which makes us not handle a later
-// duplicate one for vectorization.
-// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" { xfail *-*-* } } }
+// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" } }
+// It's a bit awkward to detect that all stores were vectorized but the
+// following more or less does the trick
+// { dg-final { scan-tree-dump "vect_iftmp\[^\r\m\]* = MIN" "slp2" } }
diff --git a/gcc/testsuite/gcc.dg/torture/20181024-1.c b/gcc/testsuite/gcc.dg/torture/20181024-1.c
new file mode 100644
index 00000000000..f2cfe7f6d67
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/20181024-1.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=core-avx2" { target { x86_64-*-* i?86-*-* } } } */
+
+typedef enum {
+ C = 0, N, S, E, W, T, B, NE, NW, SE, SW, NT, NB, ST, SB, ET, EB, WT, WB, FLAGS, N_CELL_ENTRIES} CELL_ENTRIES;
+typedef double LBM_Grid[(130)*100*100*N_CELL_ENTRIES];
+void foo( LBM_Grid srcGrid )
+{
+ double ux , uy , uz , rho , ux1, uy1, uz1, rho1, ux2, uy2, uz2, rho2, u2, px, py;
+ int i;
+ for( i = 0;
+ i < (N_CELL_ENTRIES*( 100*100));
+ i += N_CELL_ENTRIES )
+ {
+ rho1 = + ((srcGrid)[((C)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((N)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((S)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((E)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((W)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((T)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((B)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((NE)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((NW)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((SE)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((SW)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((NT)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((NB)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((ST)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((SB)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((ET)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((EB)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((WT)+N_CELL_ENTRIES*( 100*100))+(i)])
+ + ((srcGrid)[((WB)+N_CELL_ENTRIES*( 100*100))+(i)]);
+ rho = 2.0*rho1 - rho2;
+ px = (((i / N_CELL_ENTRIES) % 100) / (0.5*(100-1))) - 1.0;
+ uz = 0.01 * (1.0-px*px) * (1.0-py*py);
+ u2 = 1.5 * (ux*ux + uy*uy + uz*uz);
+ (((srcGrid)[((C))+(i)])) = (1.0/ 3.0)*rho*(1.0 - u2);
+ (((srcGrid)[((N))+(i)])) = (1.0/18.0)*rho*(1.0 + uy*(4.5*uy + 3.0) - u2);
+ }
+}
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 3aae1776ef9..ab8504a10bd 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -57,6 +57,9 @@ vect_free_slp_tree (slp_tree node, bool final_p)
int i;
slp_tree child;
+ if (--node->refcnt != 0)
+ return;
+
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
vect_free_slp_tree (child, final_p);
@@ -82,7 +85,6 @@ vect_free_slp_tree (slp_tree node, bool final_p)
free (node);
}
-
/* Free the memory allocated for the SLP instance. FINAL_P is true if we
have vectorized the instance or if we have made a final decision not
to vectorize the statements in any way. */
@@ -126,6 +128,7 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
SLP_TREE_TWO_OPERATORS (node) = false;
SLP_TREE_DEF_TYPE (node) = vect_internal_def;
+ node->refcnt = 1;
unsigned i;
FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
@@ -1021,9 +1024,6 @@ bst_traits::equal (value_type existing, value_type candidate)
return true;
}
-typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
-static scalar_stmts_set_t *bst_fail;
-
typedef hash_map <vec <gimple *>, slp_tree,
simple_hashmap_traits <bst_traits, slp_tree> >
scalar_stmts_to_slp_tree_map_t;
@@ -1034,30 +1034,33 @@ vect_build_slp_tree_2 (vec_info *vinfo,
poly_uint64 *max_nunits,
vec<slp_tree> *loads,
bool *matches, unsigned *npermutes, unsigned *tree_size,
- unsigned max_tree_size);
+ unsigned max_tree_size,
+ scalar_stmts_to_slp_tree_map_t *bst_map);
static slp_tree
vect_build_slp_tree (vec_info *vinfo,
vec<stmt_vec_info> stmts, unsigned int group_size,
poly_uint64 *max_nunits, vec<slp_tree> *loads,
bool *matches, unsigned *npermutes, unsigned *tree_size,
- unsigned max_tree_size)
+ unsigned max_tree_size,
+ scalar_stmts_to_slp_tree_map_t *bst_map)
{
- if (bst_fail->contains (stmts))
- return NULL;
- slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
- loads, matches, npermutes, tree_size,
- max_tree_size);
- /* When SLP build fails for stmts record this, otherwise SLP build
- can be exponential in time when we allow to construct parts from
- scalars, see PR81723. */
- if (! res)
+ if (slp_tree *leader = bst_map->get (stmts))
{
- vec <stmt_vec_info> x;
- x.create (stmts.length ());
- x.splice (stmts);
- bst_fail->add (x);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
+ *leader ? "" : "failed ", *leader);
+ if (*leader)
+ (*leader)->refcnt++;
+ return *leader;
}
+ slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
+ loads, matches, npermutes, tree_size,
+ max_tree_size, bst_map);
+ /* Keep a reference for the bst_map use. */
+ if (res)
+ res->refcnt++;
+ bst_map->put (stmts.copy (), res);
return res;
}
@@ -1074,7 +1077,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
poly_uint64 *max_nunits,
vec<slp_tree> *loads,
bool *matches, unsigned *npermutes, unsigned *tree_size,
- unsigned max_tree_size)
+ unsigned max_tree_size,
+ scalar_stmts_to_slp_tree_map_t *bst_map)
{
unsigned nops, i, this_tree_size = 0;
poly_uint64 this_max_nunits = *max_nunits;
@@ -1205,7 +1209,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
group_size, &this_max_nunits,
&this_loads, matches, npermutes,
&this_tree_size,
- max_tree_size)) != NULL)
+ max_tree_size, bst_map)) != NULL)
{
/* If we have all children of child built up from scalars then just
throw that away and build it up this node from scalars. */
@@ -1348,7 +1352,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
group_size, &this_max_nunits,
&this_loads, tem, npermutes,
&this_tree_size,
- max_tree_size)) != NULL)
+ max_tree_size, bst_map)) != NULL)
{
/* ... so if successful we can apply the operand swapping
to the GIMPLE IL. This is necessary because for example
@@ -1441,21 +1445,37 @@ fail:
static void
vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
- slp_tree node)
+ slp_tree node, hash_set<slp_tree> &visited)
{
int i;
stmt_vec_info stmt_info;
slp_tree child;
- dump_printf_loc (dump_kind, loc, "node%s\n",
+ if (visited.add (node))
+ return;
+
+ dump_printf_loc (dump_kind, loc, "node%s %p\n",
SLP_TREE_DEF_TYPE (node) != vect_internal_def
- ? " (external)" : "");
+ ? " (external)" : "", node);
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
dump_printf_loc (dump_kind, loc, "\tstmt %d %G", i, stmt_info->stmt);
+ if (SLP_TREE_CHILDREN (node).is_empty ())
+ return;
+ dump_printf_loc (dump_kind, loc, "\tchildren");
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_print_slp_tree (dump_kind, loc, child);
+ dump_printf (dump_kind, " %p", (void *)child);
+ dump_printf (dump_kind, "\n");
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+ vect_print_slp_tree (dump_kind, loc, child, visited);
}
+static void
+vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
+ slp_tree node)
+{
+ hash_set<slp_tree> visited;
+ vect_print_slp_tree (dump_kind, loc, node, visited);
+}
/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
@@ -1509,15 +1529,19 @@ vect_mark_slp_stmts_relevant (slp_tree node)
static void
vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
- vec<unsigned> permutation)
+ vec<unsigned> permutation,
+ hash_set<slp_tree> &visited)
{
stmt_vec_info stmt_info;
vec<stmt_vec_info> tmp_stmts;
unsigned int i;
slp_tree child;
+ if (visited.add (node))
+ return;
+
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_slp_rearrange_stmts (child, group_size, permutation);
+ vect_slp_rearrange_stmts (child, group_size, permutation, visited);
gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
tmp_stmts.create (group_size);
@@ -1578,8 +1602,9 @@ vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
statements in the nodes is not important unless they are memory
accesses, we can rearrange the statements in all the nodes
according to the order of the loads. */
+ hash_set<slp_tree> visited;
vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
- node->load_permutation);
+ node->load_permutation, visited);
/* We are done, no actual permutations need to be generated. */
poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
@@ -1889,12 +1914,18 @@ vect_analyze_slp_instance (vec_info *vinfo,
/* Build the tree for the SLP instance. */
bool *matches = XALLOCAVEC (bool, group_size);
unsigned npermutes = 0;
- bst_fail = new scalar_stmts_set_t ();
+ scalar_stmts_to_slp_tree_map_t *bst_map
+ = new scalar_stmts_to_slp_tree_map_t ();
poly_uint64 max_nunits = nunits;
node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
&max_nunits, &loads, matches, &npermutes,
- NULL, max_tree_size);
- delete bst_fail;
+ NULL, max_tree_size, bst_map);
+ /* The map keeps a reference on SLP nodes built, release that. */
+ for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
+ it != bst_map->end (); ++it)
+ if ((*it).second)
+ vect_free_slp_tree ((*it).second, false);
+ delete bst_map;
if (node != NULL)
{
/* Calculate the unrolling factor based on the smallest type. */
@@ -1924,109 +1955,109 @@ vect_analyze_slp_instance (vec_info *vinfo,
}
else
{
- /* Create a new SLP instance. */
- new_instance = XNEW (struct _slp_instance);
- SLP_INSTANCE_TREE (new_instance) = node;
- SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
- SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
- SLP_INSTANCE_LOADS (new_instance) = loads;
-
- /* Compute the load permutation. */
- slp_tree load_node;
- bool loads_permuted = false;
- FOR_EACH_VEC_ELT (loads, i, load_node)
- {
- vec<unsigned> load_permutation;
- int j;
- stmt_vec_info load_info;
- bool this_load_permuted = false;
- load_permutation.create (group_size);
- stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
- (SLP_TREE_SCALAR_STMTS (load_node)[0]);
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
+ /* Create a new SLP instance. */
+ new_instance = XNEW (struct _slp_instance);
+ SLP_INSTANCE_TREE (new_instance) = node;
+ SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
+ SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
+ SLP_INSTANCE_LOADS (new_instance) = loads;
+
+ /* Compute the load permutation. */
+ slp_tree load_node;
+ bool loads_permuted = false;
+ FOR_EACH_VEC_ELT (loads, i, load_node)
{
- int load_place = vect_get_place_in_interleaving_chain
- (load_info, first_stmt_info);
- gcc_assert (load_place != -1);
- if (load_place != j)
- this_load_permuted = true;
- load_permutation.safe_push (load_place);
+ vec<unsigned> load_permutation;
+ int j;
+ stmt_vec_info load_info;
+ bool this_load_permuted = false;
+ load_permutation.create (group_size);
+ stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
+ (SLP_TREE_SCALAR_STMTS (load_node)[0]);
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
+ {
+ int load_place = vect_get_place_in_interleaving_chain
+ (load_info, first_stmt_info);
+ gcc_assert (load_place != -1);
+ if (load_place != j)
+ this_load_permuted = true;
+ load_permutation.safe_push (load_place);
+ }
+ if (!this_load_permuted
+ /* The load requires permutation when unrolling exposes
+ a gap either because the group is larger than the SLP
+ group-size or because there is a gap between the groups. */
+ && (known_eq (unrolling_factor, 1U)
+ || (group_size == DR_GROUP_SIZE (first_stmt_info)
+ && DR_GROUP_GAP (first_stmt_info) == 0)))
+ {
+ load_permutation.release ();
+ continue;
+ }
+ SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
+ loads_permuted = true;
}
- if (!this_load_permuted
- /* The load requires permutation when unrolling exposes
- a gap either because the group is larger than the SLP
- group-size or because there is a gap between the groups. */
- && (known_eq (unrolling_factor, 1U)
- || (group_size == DR_GROUP_SIZE (first_stmt_info)
- && DR_GROUP_GAP (first_stmt_info) == 0)))
+
+ if (loads_permuted)
{
- load_permutation.release ();
- continue;
+ if (!vect_supported_load_permutation_p (new_instance))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Build SLP failed: unsupported load "
+ "permutation %G", stmt_info->stmt);
+ vect_free_slp_instance (new_instance, false);
+ return false;
+ }
}
- SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
- loads_permuted = true;
- }
-
- if (loads_permuted)
- {
- if (!vect_supported_load_permutation_p (new_instance))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: unsupported load "
- "permutation %G", stmt_info->stmt);
- vect_free_slp_instance (new_instance, false);
- return false;
- }
- }
/* If the loads and stores can be handled with load/store-lan
- instructions do not generate this SLP instance. */
- if (is_a <loop_vec_info> (vinfo)
- && loads_permuted
- && dr && vect_store_lanes_supported (vectype, group_size, false))
- {
- slp_tree load_node;
- FOR_EACH_VEC_ELT (loads, i, load_node)
+ instructions do not generate this SLP instance. */
+ if (is_a <loop_vec_info> (vinfo)
+ && loads_permuted
+ && dr && vect_store_lanes_supported (vectype, group_size, false))
{
- stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
- (SLP_TREE_SCALAR_STMTS (load_node)[0]);
- /* Use SLP for strided accesses (or if we can't load-lanes). */
- if (STMT_VINFO_STRIDED_P (stmt_vinfo)
- || ! vect_load_lanes_supported
- (STMT_VINFO_VECTYPE (stmt_vinfo),
- DR_GROUP_SIZE (stmt_vinfo), false))
- break;
+ slp_tree load_node;
+ FOR_EACH_VEC_ELT (loads, i, load_node)
+ {
+ stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
+ (SLP_TREE_SCALAR_STMTS (load_node)[0]);
+ /* Use SLP for strided accesses (or if we can't load-lanes). */
+ if (STMT_VINFO_STRIDED_P (stmt_vinfo)
+ || ! vect_load_lanes_supported
+ (STMT_VINFO_VECTYPE (stmt_vinfo),
+ DR_GROUP_SIZE (stmt_vinfo), false))
+ break;
+ }
+ if (i == loads.length ())
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Built SLP cancelled: can use "
+ "load/store-lanes\n");
+ vect_free_slp_instance (new_instance, false);
+ return false;
+ }
}
- if (i == loads.length ())
+
+ vinfo->slp_instances.safe_push (new_instance);
+
+ if (dump_enabled_p ())
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Built SLP cancelled: can use "
- "load/store-lanes\n");
- vect_free_slp_instance (new_instance, false);
- return false;
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Final SLP tree for instance:\n");
+ vect_print_slp_tree (MSG_NOTE, vect_location, node);
}
- }
- vinfo->slp_instances.safe_push (new_instance);
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "Final SLP tree for instance:\n");
- vect_print_slp_tree (MSG_NOTE, vect_location, node);
+ return true;
}
-
- return true;
- }
}
else
{
- /* Failed to SLP. */
- /* Free the allocated memory. */
- scalar_stmts.release ();
- loads.release ();
+ /* Failed to SLP. */
+ /* Free the allocated memory. */
+ scalar_stmts.release ();
+ loads.release ();
}
/* For basic block SLP, try to break the group up into multiples of the
@@ -3749,8 +3780,13 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
return;
+ /* See if we have already vectorized the node in the graph of the
+ SLP instance. */
+ if (SLP_TREE_VEC_STMTS (node).exists ())
+ return;
+
/* See if we have already vectorized the same set of stmts and reuse their
- vectorized stmts. */
+ vectorized stmts across instances. */
if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
{
SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
@@ -3778,8 +3814,7 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
group_size = SLP_INSTANCE_GROUP_SIZE (instance);
gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
- if (!SLP_TREE_VEC_STMTS (node).exists ())
- SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
+ SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 08d696a2f7c..e1292aa6eb6 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -130,6 +130,8 @@ struct _slp_tree {
scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF
divided by vector size. */
unsigned int vec_stmts_size;
+ /* Reference count in the SLP graph. */
+ unsigned int refcnt;
/* Whether the scalar computations use two different operators. */
bool two_operators;
/* The DEF type of this node. */