aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSameera Deshpande <sameera.deshpande@imgtec.com>2016-07-11 08:01:08 +0000
committerSameera Deshpande <sameera.deshpande@imgtec.com>2016-07-11 08:01:08 +0000
commitdcc9a1360cd68f08cea0e608d6069a555639e1ce (patch)
tree37ba12cf0d964aac2ae6d7ef808bf4642164dc5c
parent96875826ebbeae406f21ed458e9779e43d2e806d (diff)
Add new pass to perform autovectorization using unified representation - Current
GCC framework does not give complete overview of the loop to be vectorized : it either breaks the loop across body, or across iterations. Because of which these data structures can not be reused for our approach which gathers all the information of loop body at one place using primitive permute operations. Hence, define new data structures and populate them. Add support for vectorization of LOAD/STORE instructions a. Create permute order tree for the loop with LOAD and STORE instructions for single or multi-dimensional arrays, aggregates within nested loops. This change adds new pass to perform autovectorization using unified representation, defines new data structures to cater to this requirement and creates primitive reorder tree for LOAD/STORE instructions within the loop. The whole loop is represented using the ITER_NODE, which have information about - The preparatory statements for vectorization to be executed before entering the loop (like initialization of vectors, prepping for reduction operations, peeling etc.) - Vectorizable loop body represented as PRIMOP_TREE (primitive reordering tree) - Final statements (For peeling, variable loop bound, COLLAPSE operation for reduction etc.) - Other loop attributes (loop bound, peeling needed, dependences, etc.) Memory accesses within a loop have definite repetitive pattern which can be captured using primitive permute operators which can be used to determine desired permute order for the vector computations. The PRIMOP_TREE is AST which records all computations and permutations required to store destination vector into continuous memory at the end of all iterations of the loop. It can have INTERLEAVE, CONCAT, EXTRACT, SPLIT, ITER or any compute operation as intermediate node. Leaf nodes can either be memory reference, constant or vector of loop invariants. Depending upon the operation, PRIMOP_TREE holds appropriate information about the statement within the loop which is necessary for vectorization. At this stage, these data structures are populated by gathering all the information of the loop, statements within the loop and correlation of the statements within the loop. Moreover the loop body is analyzed to check if vectorization of each statement is possible. One has to note however that this analysis phase will give worst-case estimate of instruction selection, as it checks if specific named pattern is defined in .md for the target. It not necessarily give optimal cover which is aim of the transformation phase using tree tiling algorithm - and can be invoked only once the loop body is represented using primitive reoder tree. At this stage, the focus is to create permute order tree for the loop with LOAD and STORE instructions only. The code we intend to compile is of the form FOR(i = 0; i < N; i + +) { stmt 1 : D[k ∗ i + d 1 ] =S 1 [k ∗ i + c 11 ] stmt 2 : D[k ∗ i + d 2 ] =S 1 [k ∗ i + c 21 ] ... stmt k : D[k ∗ i + d k ] =S 1 [k ∗ i + c k 1 ] } Here we are assuming that any data reference can be represented using base + k * index + offset (The data structure struct data_reference from GCC is used currently for this purpose). If not, the address is normalized to convert to such representation. git-svn-id: https://gcc.gnu.org/svn/gcc/branches/unified-autovect@238205 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog71
-rw-r--r--gcc/Makefile.in1
-rw-r--r--gcc/common.opt4
-rw-r--r--gcc/hsa.c1
-rw-r--r--gcc/passes.def5
-rw-r--r--gcc/tree-data-ref.h3
-rw-r--r--gcc/tree-pass.h1
-rw-r--r--gcc/tree-ssa-loop.c3
-rw-r--r--gcc/tree-vect-data-refs.c63
-rw-r--r--gcc/tree-vect-loop.c2
-rw-r--r--gcc/tree-vect-unified.c2585
-rw-r--r--gcc/tree-vect-unified.h291
-rw-r--r--gcc/tree-vectorizer.c4
-rw-r--r--gcc/tree-vectorizer.h11
14 files changed, 3021 insertions, 24 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index afd022ed7f3..4dd80966bdc 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,74 @@
+2016-07-08 Sameera Deshpande <sameera.deshpande@imgtec.com>
+
+ * Makefile.in: Add tree-vect-unified.o in OBJS.
+ * common.opt (ftree-loop-vectorize-unified): New option.
+ * hsa.c (x_flag_tree_loop_vectorize_unified): Initialize.
+ * passes.def (pass_unified_vectorize): New pass.
+ * tree-pass.h (make_pass_unified_vectorize): New function declaration.
+ * tree-ssa-loop.c (gate): Change gate condition for tree loop
+ vectorization.
+ * tree-vect-data-refs.c (vect_mark_for_runtime_alias_test): Split
+ (vect_mark_for_runtime_alias_test_1): Into this.
+ (vect_mark_for_runtime_alias_test): And this.
+ (vect_check_gather_scatter): Split.
+ (vect_check_gather_scatter_1): Into this.
+ (vect_check_gather_scatter): And this.
+ * tree-vect-loop.c (vect_is_simple_iv_evolution): export.
+ * tree-vect-unified.c: New file to add lOOP Vectorization using unified
+ representation.
+ (pass_data_unified_vectorize): New DS for pass.
+ (pass_unified_vectorize): New pass.
+ (new_iter_node): New function.
+ (destroy_iter_node_info): Likewise.
+ (new_stmt_attr): Likewise.
+ (vect_populate_iter_node_from_loop): Likewise.
+ (vect_analyze_dataref_access): Likewise.
+ (vect_analyze_dataref_accesses): Likewise.
+ (vect_analyze_datarefs): Likewise.
+ (is_raw_dependence): Likewise.
+ (vect_analyze_dataref_dependence): Likewise.
+ (vect_analyze_dataref_dependences): Likewise.
+ (mark_addr_index_stmt): Likewise.
+ (classify_loop_stmt): Likewise.
+ (classify_loop_stmts): Likewise.
+ (mark_probable_root_nodes): Likewise.
+ (vect_is_simple_use): Likewise.
+ (normalize_base_addr): Likewise.
+ (init_primop_node): Likewise.
+ (populate_prim_node): Likewise.
+ (exists_primTree_with_memref): Likewise.
+ (create_primTree_memref): Likewise.
+ (create_primTree_combine): Likewise.
+ (create_primTree_partition): Likewise.
+ (add_child_at_index): Likewise.
+ (get_child_at_index): Likewise.
+ (vectorizable_store): Likewise.
+ (vectorizable_load): Likewise.
+ (analyze_and_create_ptree): Likewise.
+ (is_ptree_complete): Likewise.
+ (create_ptree): Likewise.
+ (vect_analyze_loop_with_prim_tree_2): Likewise.
+ (pretty_print_gimple_vec): Likewise.
+ (pp_primop_tree): Likewise.
+ (pretty_print_ptree_vec): Likewise.
+ (pretty_print_iter_node): Likewise.
+ (dump_iter_node): Likewise.
+ (vectorize_loops_using_uniop): Likewise.
+ * tree-vect-unified.h: New header file.
+ (struct ITER_node): New data structure.
+ (struct stmt_attr): Likewise.
+ (struct primop_tree): Likewise.
+ (enum primop_code): New enum.
+ (stmt_use_type): Likewise.
+ * tree-vectorizer.h:
+ (vect_mark_for_runtime_alias_test_1): Declare.
+ (vect_check_gather_scatter_1): Likewise.
+ (vect_analyze_loop_form_1): Likewise.
+ (vect_is_simple_iv_evolution): Likewise.
+ * tree-vectorizer.c:
+ (vect_loop_vectorized_call): Extern.
+ (fold_loop_vectorized_call): Likewise.
+
2016-07-08 Martin Liska <mliska@suse.cz>
PR middle-end/71606
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 5e7422da0c0..07c3a0ac2e8 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1528,6 +1528,7 @@ OBJS = \
tree-vect-loop-manip.o \
tree-vect-slp.o \
tree-vectorizer.o \
+ tree-vect-unified.o \
tree-vrp.o \
tree.o \
valtrack.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index a7c5125bf08..c64f5f6a392 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2582,6 +2582,10 @@ ftree-loop-vectorize
Common Report Var(flag_tree_loop_vectorize) Optimization
Enable loop vectorization on trees.
+ftree-loop-vectorize-unified
+Common Report Var(flag_tree_loop_vectorize_unified) Optimization
+Enable loop vectorization using unified representation on trees.
+
ftree-slp-vectorize
Common Report Var(flag_tree_slp_vectorize) Optimization
Enable basic block vectorization (SLP) on trees.
diff --git a/gcc/hsa.c b/gcc/hsa.c
index ff978702489..1c0953945d2 100644
--- a/gcc/hsa.c
+++ b/gcc/hsa.c
@@ -840,6 +840,7 @@ hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host,
fn_opts = optimization_default_node;
fn_opts = copy_node (fn_opts);
TREE_OPTIMIZATION (fn_opts)->x_flag_tree_loop_vectorize = false;
+ TREE_OPTIMIZATION (fn_opts)->x_flag_tree_loop_vectorize_unified = false;
TREE_OPTIMIZATION (fn_opts)->x_flag_tree_slp_vectorize = false;
DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl) = fn_opts;
diff --git a/gcc/passes.def b/gcc/passes.def
index 3647e90ddfe..ca056813e6c 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -278,6 +278,11 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_expand_omp_ssa);
NEXT_PASS (pass_ch_vect);
NEXT_PASS (pass_if_conversion);
+ NEXT_PASS (pass_unified_vectorize);
+ PUSH_INSERT_PASSES_WITHIN (pass_unified_vectorize)
+ NEXT_PASS (pass_dce);
+ POP_INSERT_PASSES ()
+
/* pass_vectorize must immediately follow pass_if_conversion.
Please do not add any other passes in between. */
NEXT_PASS (pass_vectorize);
diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
index 856dd58f3ad..2bb75bb0d6b 100644
--- a/gcc/tree-data-ref.h
+++ b/gcc/tree-data-ref.h
@@ -341,7 +341,8 @@ extern bool dr_may_alias_p (const struct data_reference *,
const struct data_reference *, bool);
extern bool dr_equal_offsets_p (struct data_reference *,
struct data_reference *);
-
+extern bool memref_analyze_innermost (gimple *, tree, struct loop *, tree *,
+ tree *, tree *, tree *, tree *);
/* Return true when the base objects of data references A and B are
the same memory object. */
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 36299a61a5b..9aadbf0616f 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -375,6 +375,7 @@ extern gimple_opt_pass *make_pass_record_bounds (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_graphite (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_graphite_transforms (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_if_conversion (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_unified_vectorize (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_loop_distribution (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_vectorize (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_simduid_cleanup (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index 06354e39e78..ff92a4b3523 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -398,7 +398,8 @@ public:
/* opt_pass methods: */
virtual bool gate (function *fun)
{
- return flag_tree_loop_vectorize || fun->has_force_vectorize_loops;
+ return ((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
+ && !flag_tree_loop_vectorize_unified);
}
virtual unsigned int execute (function *);
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 6fddd3a1c68..4171f658340 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -136,16 +136,9 @@ vect_get_smallest_scalar_type (gimple *stmt, HOST_WIDE_INT *lhs_size_unit,
return scalar_type;
}
-
-/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
- tested at run-time. Return TRUE if DDR was successfully inserted.
- Return false if versioning is not supported. */
-
-static bool
-vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
+bool
+vect_mark_for_runtime_alias_test_1 (ddr_p ddr, loop *loop)
{
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-
if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
return false;
@@ -189,11 +182,28 @@ vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
return false;
}
- LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
return true;
}
+
+/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
+ tested at run-time. Return TRUE if DDR was successfully inserted.
+ Return false if versioning is not supported. */
+
+static bool
+vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
+{
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ bool is_alias;
+
+ is_alias = vect_mark_for_runtime_alias_test_1 (ddr, loop);
+ if (is_alias)
+ LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
+ return is_alias;
+}
+
+
/* Function vect_analyze_data_ref_dependence.
Return TRUE if there (might) exist a dependence between a memory-reference
@@ -3199,16 +3209,35 @@ bool
vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo,
gather_scatter_info *info)
{
- HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+ tree base;
+ bool is_read;
+ tree vec_type;
+
+ base = DR_REF (dr);
+ is_read = DR_IS_READ (dr);
+ vec_type = STMT_VINFO_VECTYPE (stmt_info);
+
+ return vect_check_gather_scatter_1 (stmt, base, is_read, loop, vec_type,
+ basep, offp, scalep);
+}
+
+/* Check whether a non-affine read or write in stmt is suitable for gather load
+ or scatter store and if so, return a builtin decl for that operation. */
+
+tree
+vect_check_gather_scatter_1 (gimple *stmt, tree base, bool is_read,
+ struct loop *loop, tree vec_type, tree *basep,
+ tree *offp, int *scalep)
+{
+ HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
tree offtype = NULL_TREE;
- tree decl, base, off;
+ tree decl, off;
machine_mode pmode;
int punsignedp, reversep, pvolatilep = 0;
- base = DR_REF (dr);
/* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
see if we can use the def stmt of the address. */
if (is_gimple_call (stmt)
@@ -3389,12 +3418,10 @@ vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo,
if (offtype == NULL_TREE)
offtype = TREE_TYPE (off);
- if (DR_IS_READ (dr))
- decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
- offtype, scale);
+ if (is_read)
+ decl = targetm.vectorize.builtin_gather (vec_type, offtype, scale);
else
- decl = targetm.vectorize.builtin_scatter (STMT_VINFO_VECTYPE (stmt_info),
- offtype, scale);
+ decl = targetm.vectorize.builtin_scatter (vec_type, offtype, scale);
if (decl == NULL_TREE)
return false;
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 63c002e737c..00318a7f651 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -704,7 +704,7 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
FORNOW: A simple evolution of an induction variables in the loop is
considered a polynomial evolution. */
-static bool
+bool
vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
tree * step)
{
diff --git a/gcc/tree-vect-unified.c b/gcc/tree-vect-unified.c
new file mode 100644
index 00000000000..2014f319c9e
--- /dev/null
+++ b/gcc/tree-vect-unified.c
@@ -0,0 +1,2585 @@
+/* lOOP Vectorization using unified representation
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Loop autovectorization using unified representation for permute
+ instructions. */
+#if 1
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "predict.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "cgraph.h"
+#include "fold-const.h"
+#include "stor-layout.h"
+#include "gimple-iterator.h"
+#include "gimple-walk.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-cfg.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
+#include "tree-ssa-propagate.h"
+#include "dbgcnt.h"
+#include "tree-scalar-evolution.h"
+#include "tree-vect-unified.h"
+#include "tree-pretty-print.h"
+#include "gimple-pretty-print.h"
+#include "target.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "optabs-tree.h"
+#include "dumpfile.h"
+#include "alias.h"
+#include "tree-eh.h"
+#include "gimplify.h"
+#include "gimplify-me.h"
+#include "tree-ssa-loop-ivopts.h"
+#include "tree-ssa-loop.h"
+#include "expr.h"
+#include "builtins.h"
+#include "params.h"
+#include "pretty-print.h"
+
+/* Entry point to the autovectorizer using tree tiling algorithm for permute
+ instruction selection using unified representation. */
+
+namespace {
+
+const pass_data pass_data_unified_vectorize =
+{
+ GIMPLE_PASS, /* type */
+ "unified-vect", /* name */
+ OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
+ TV_TREE_VECTORIZATION, /* tv_id */
+ ( PROP_cfg | PROP_ssa ), /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_unified_vectorize : public gimple_opt_pass
+{
+public:
+ pass_unified_vectorize (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_unified_vectorize, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *fun)
+ {
+ return (flag_tree_loop_vectorize && flag_tree_loop_vectorize_unified)
+ || fun->has_force_vectorize_loops;
+ }
+
+ virtual unsigned int execute (function *);
+
+}; // class pass_unified_vectorize
+
+unsigned int
+pass_unified_vectorize::execute (function *fun)
+{
+ if (number_of_loops (fun) <= 1)
+ return 0;
+
+ return vectorize_loops_using_uniop ();
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_unified_vectorize (gcc::context *ctxt)
+{
+ return new pass_unified_vectorize (ctxt);
+}
+
+/* Function new_iter_node.
+
+ Create new ITER_node for the loop LOOP, and return the pointer. */
+
+static struct ITER_node *
+new_iter_node (struct loop *loop)
+{
+ struct ITER_node *res;
+ basic_block *bbs;
+ gimple_stmt_iterator si;
+ int i;
+
+ res = (struct ITER_node *) xcalloc (1, sizeof (struct ITER_node));
+ ITER_NODE_LOOP (res) = loop;
+ ITER_NODE_NITERS (res) = NULL;
+ ITER_NODE_PROLOGUE (res) = vNULL;
+ ITER_NODE_LOOP_PEEL_NEEDED (res) = 0;
+ ITER_NODE_LOOP_BODY (res) = vNULL;
+ ITER_NODE_PEELING_FOR_GAPS (res) = false;
+ ITER_NODE_PEELING_FOR_NITER (res) = false;
+ ITER_NODE_EPILOGUE (res) = vNULL;
+
+ bbs = get_loop_body (loop);
+
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ basic_block bb = bbs[i];
+
+ for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple *phi = gsi_stmt (si);
+ gimple_set_uid (phi, 0);
+ }
+
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple *stmt = gsi_stmt (si);
+ gimple_set_uid (stmt, 0);
+ }
+ }
+
+ return res;
+}
+
+/* Function destroy_iter_node_info.
+
+ Clear the statements within the loop corresponding the ITER_node INODE, and
+ destroy ITER_node. */
+
+static void
+destroy_iter_node_info (struct ITER_node *inode)
+{
+ int i;
+ basic_block *bbs;
+ gimple_stmt_iterator si;
+
+ if (ITER_NODE_PROLOGUE (inode) != vNULL)
+ ITER_NODE_PROLOGUE (inode).release ();
+ if (ITER_NODE_LOOP_BODY (inode) != vNULL)
+ ITER_NODE_LOOP_BODY (inode).release ();
+ if (ITER_NODE_EPILOGUE (inode) != vNULL)
+ ITER_NODE_EPILOGUE (inode).release ();
+
+ bbs = get_loop_body (ITER_NODE_LOOP (inode));
+ for (i = 0; i < ITER_NODE_LOOP (inode)->num_nodes; i++)
+ {
+ basic_block bb = bbs[i];
+
+ for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple *phi = gsi_stmt (si);
+ gimple_set_uid (phi, 0);
+ }
+
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple *stmt = gsi_stmt (si);
+ gimple_set_uid (stmt, 0);
+ }
+ }
+
+ free (inode);
+}
+
+/* Function new_stmt_attr.
+
+ Create statement attribute information, and return the pointer for the
+ same. */
+
+static struct stmt_attr *
+new_stmt_attr ()
+{
+ struct stmt_attr *info;
+ info = (struct stmt_attr *) xcalloc (1, sizeof (struct stmt_attr));
+ info->use_type = stmt_use_type_undef;
+ info->access_fn = NULL;
+ info->ptree = NULL;
+ info->dr = NULL;
+ info->probable_root = false;
+ info->vectype = NULL;
+ return info;
+}
+
+/* Function vect_populate_iter_node_from_loop.
+
+ Create new ITER_node corresponding to loop LOOP, and fill all fields in
+ ITER_node necessary for auto-vectorization. */
+
+static struct ITER_node *
+vect_populate_iter_node_from_loop (struct loop *loop)
+{
+ tree number_of_iterations, number_of_iterationsm1;
+ basic_block *bbs;
+ gcond *loop_cond, *inner_loop_cond = NULL;
+ int i;
+ gimple_stmt_iterator si;
+
+ if (! vect_analyze_loop_form_1 (loop, &loop_cond, &number_of_iterationsm1,
+ &number_of_iterations, &inner_loop_cond))
+ return NULL;
+
+ struct ITER_node * t_iter_node = new_iter_node (loop);
+ ITER_NODE_NITERS (t_iter_node) = number_of_iterations;
+
+ bbs = get_loop_body (loop);
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ basic_block bb = bbs[i];
+
+ for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple *phi = gsi_stmt (si);
+ set_stmt_attr (phi, new_stmt_attr ());
+ }
+
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple *stmt = gsi_stmt (si);
+ set_stmt_attr (stmt, new_stmt_attr ());
+ }
+ }
+
+ if (!ITER_NODE_NITERS_KNOWN_P (t_iter_node))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Symbolic number of iterations is ");
+ dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
+ dump_printf (MSG_NOTE, "\n");
+ }
+ }
+
+ STMT_ATTR_USE_TYPE (loop_cond) = stmt_use_type_loop_exit_ctrl;
+ if (inner_loop_cond)
+ STMT_ATTR_USE_TYPE (inner_loop_cond) = stmt_use_type_loop_exit_ctrl;
+
+ gcc_assert (!loop->aux);
+ loop->aux = t_iter_node;
+ return t_iter_node;
+}
+
+/* Function vect_analyze_dataref_access.
+
+ Analyze access pattern of data reference DR within the LOOP. Except variable
+ stride, any access pattern is supported. */
+
+static bool
+vect_analyze_dataref_access (struct data_reference *dr, struct loop * loop)
+{
+ tree step = DR_STEP (dr);
+ gimple *stmt = DR_STMT (dr);
+
+ if (!step)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "bad data-ref access in loop\n");
+ return false;
+ }
+
+ if (integer_zerop (step))
+ {
+ /* Stride is 0, i.e. the data_ref is loop invariant. So, writes cannot be
+ vectorized. */
+ if (nested_in_vect_loop_p (loop, stmt))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "zero step in inner loop of nest\n");
+
+ if (!loop->force_vectorize)
+ return false;
+ }
+
+ return DR_IS_READ (dr);
+ }
+
+ if (loop && nested_in_vect_loop_p (loop, stmt))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "grouped access in outer loop.\n");
+ return false;
+ }
+
+ /* For now, do not vectorize for variable step size. */
+ if (TREE_CODE (step) != INTEGER_CST)
+ return false;
+
+ return true;
+}
+
+/* Function vect_analyze_dataref_accesses.
+
+ Analyze all the data refs within the LOOP for the access pattern. */
+
+static bool
+vect_analyze_dataref_accesses (struct ITER_node *inode)
+{
+ unsigned int i;
+ vec<data_reference_p> datarefs = ITER_NODE_DATA_REFS (inode);
+ struct data_reference *dr;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "=== vect_analyze_dataref_accesses ===\n");
+
+ if (datarefs.is_empty ())
+ return true;
+
+ FOR_EACH_VEC_ELT (datarefs, i, dr)
+ if (!vect_analyze_dataref_access (dr, ITER_NODE_LOOP (inode)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: complicated access pattern.\n");
+
+ return false;
+ }
+
+ return true;
+}
+
+/* Function vect_analyze_datarefs.
+
+ Analyze all data references DR within the LOOP to check if vectorization is
+ possible. */
+
+static bool
+vect_analyze_datarefs (struct ITER_node *inode)
+{
+ struct loop *loop = ITER_NODE_LOOP (inode);
+ vec<data_reference_p> datarefs = ITER_NODE_DATA_REFS (inode);
+ tree scalar_type;
+ tree vec_type;
+ struct data_reference *dr;
+ unsigned int i;
+
+ FOR_EACH_VEC_ELT (datarefs, i, dr)
+ {
+ gimple *stmt;
+ //tree base, offset, init;
+ bool simd_lane_access = false;
+ int vf;
+
+
+again:
+ if (!dr || !DR_REF (dr))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: unhandled data-ref\n");
+ return false;
+ }
+
+ stmt = DR_STMT (dr);
+
+ /* Discard clobbers from the dataref vector. We will remove
+ clobber stmts during vectorization. */
+ if (gimple_clobber_p (stmt))
+ {
+ free_data_ref (dr);
+ if (i == datarefs.length () - 1)
+ {
+ datarefs.pop ();
+ break;
+ }
+ datarefs.ordered_remove (i);
+ dr = datarefs[i];
+ goto again;
+ }
+
+ /* Check that analysis of the data-ref succeeded. */
+ if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
+ || !DR_STEP (dr))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: data ref analysis "
+ "failed ");
+ dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+ }
+
+ return false;
+ }
+
+ if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: base addr of dr is a "
+ "constant\n");
+
+ return false;
+ }
+
+ if (TREE_THIS_VOLATILE (DR_REF (dr)))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: volatile type ");
+ dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+ }
+
+ return false;
+ }
+
+ if (stmt_can_throw_internal (stmt))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: statement can throw an "
+ "exception ");
+ dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+ }
+
+ return false;
+ }
+
+ if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
+ && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: statement is bitfield "
+ "access ");
+ dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+ }
+
+ return false;
+ }
+
+ if (is_gimple_call (stmt)
+ && (!gimple_call_internal_p (stmt)
+ || (gimple_call_internal_fn (stmt) != IFN_MASK_LOAD
+ && gimple_call_internal_fn (stmt) != IFN_MASK_STORE)))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: dr in a call ");
+ dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+ }
+
+ return false;
+ }
+
+ /* If the dataref is in an inner-loop of the loop that is considered for
+ for vectorization, we also want to analyze the access relative to
+ the outer-loop (DR contains information only relative to the
+ inner-most enclosing loop). We do that by building a reference to the
+ first location accessed by the inner-loop, and analyze it relative to
+ the outer-loop. */
+ if (loop && nested_in_vect_loop_p (loop, stmt))
+ {
+ /* Do nothing for now, as the purpose is unclear. */
+#if 0
+ /* Build a reference to the first location accessed by the
+ inner-loop: *(BASE+INIT). (The first location is actually
+ BASE+INIT+OFFSET, but we add OFFSET separately later). */
+ tree inner_base = build_fold_indirect_ref
+ (fold_build_pointer_plus (base, init));
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "analyze in outer-loop: ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
+ dump_printf (MSG_NOTE, "\n");
+ }
+
+ outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
+ &poffset, &pmode, &punsignedp,
+ &preversep, &pvolatilep, false);
+ gcc_assert (outer_base != NULL_TREE);
+
+ if (pbitpos % BITS_PER_UNIT != 0)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "failed: bit offset alignment.\n");
+ return false;
+ }
+
+ if (preversep)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "failed: reverse storage order.\n");
+ return false;
+ }
+
+ outer_base = build_fold_addr_expr (outer_base);
+ if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
+ &base_iv, false))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "failed: evolution of base is not affine.\n");
+ return false;
+ }
+
+ if (offset)
+ {
+ if (poffset)
+ poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
+ poffset);
+ else
+ poffset = offset;
+ }
+
+ if (!poffset)
+ {
+ offset_iv.base = ssize_int (0);
+ offset_iv.step = ssize_int (0);
+ }
+ else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
+ &offset_iv, false))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "evolution of offset is not affine.\n");
+ return false;
+ }
+
+ outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
+ split_constant_offset (base_iv.base, &base_iv.base, &dinit);
+ outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
+ split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
+ outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
+
+ outer_step = size_binop (PLUS_EXPR,
+ fold_convert (ssizetype, base_iv.step),
+ fold_convert (ssizetype, offset_iv.step));
+#endif
+ }
+
+ STMT_ATTR_DR (stmt) = dr;
+
+ if (simd_lane_access)
+ {
+ free_data_ref (datarefs[i]);
+ datarefs[i] = dr;
+ }
+
+ /* Set vectype for STMT. */
+ scalar_type = TREE_TYPE (DR_REF (dr));
+ vec_type = get_vectype_for_scalar_type (scalar_type);
+ if (!vec_type)
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: no vectype for stmt: ");
+ dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
+ dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
+ dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
+ scalar_type);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+ }
+
+ return false;
+ }
+ else
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "got vectype for stmt: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ dump_generic_expr (MSG_NOTE, TDF_SLIM,
+ vec_type);
+ dump_printf (MSG_NOTE, "\n");
+ }
+ }
+
+ /* Adjust the minimal vectorization factor according to the
+ vector type. */
+ vf = TYPE_VECTOR_SUBPARTS (vec_type);
+
+ if (TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
+ {
+ if (nested_in_vect_loop_p (loop, stmt))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: not suitable for strided "
+ "load ");
+ dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+ }
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+
+/* Function is_raw_dependence.
+
+ Helper function to check if there exists read-after-write dependence between
+ data reference DRA and DRB. */
+
+static bool
+is_raw_dependence (struct data_reference *dra, struct data_reference *drb)
+{
+ gimple *earlier;
+ struct data_reference *earlier_ref;
+
+ /* Identify first reference, and check if read-after-write condition. */
+ earlier = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
+ if (earlier)
+ {
+ if (earlier == DR_STMT (dra))
+ earlier_ref = dra;
+ else
+ earlier_ref = drb;
+
+ if (DR_IS_WRITE (earlier_ref))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "READ_WRITE dependence in interleaving."
+ "\n");
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Function vect_analyze_dataref_dependence.
+
+ Return TRUE if there exists dependence between any two data references DRA
+ and DRB in DDR, otherwise the loop is vectorizable. */
+
+static bool
+vect_analyze_dataref_dependence (struct data_dependence_relation *ddr,
+ struct ITER_node *inode)
+{
+ unsigned int i;
+ struct loop *loop = ITER_NODE_LOOP (inode);
+ struct data_reference *dra = DDR_A (ddr);
+ struct data_reference *drb = DDR_B (ddr);
+ lambda_vector dist_v;
+ unsigned int loop_depth;
+
+ /* Independent data accesses. */
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
+ return false;
+
+ if (dra == drb
+ || (DR_IS_READ (dra) && DR_IS_READ (drb)))
+ return false;
+
+ /* Even if we have an anti-dependence then, as the vectorized loop covers at
+ least two scalar iterations, there is always also a true dependence.
+ As the vectorizer does not re-order loads and stores we can ignore
+ the anti-dependence if TBAA can disambiguate both DRs similar to the
+ case with known negative distance anti-dependences (positive
+ distance anti-dependences would violate TBAA constraints). */
+ if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
+ || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
+ && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
+ get_alias_set (DR_REF (drb))))
+ return false;
+
+ /* Unknown data dependence. */
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
+ {
+ /* If user asserted safelen consecutive iterations can be
+ executed concurrently, assume independence. */
+ if (loop->safelen >= 2)
+ {
+ return false;
+ }
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "versioning for alias required: "
+ "can't determine dependence between ");
+ dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
+ DR_REF (dra));
+ dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
+ dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
+ DR_REF (drb));
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+ }
+
+ /* Add to list of ddrs that need to be tested at run-time. */
+ return !vect_mark_for_runtime_alias_test_1 (ddr, loop);
+ }
+
+ /* Known data dependence. */
+ if (DDR_NUM_DIST_VECTS (ddr) == 0)
+ {
+ /* If user asserted safelen consecutive iterations can be
+ executed concurrently, assume independence. */
+ if (loop->safelen >= 2)
+ {
+ return false;
+ }
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "versioning for alias required: "
+ "bad dist vector for ");
+ dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
+ dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
+ dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+ }
+ /* Add to list of ddrs that need to be tested at run-time. */
+ return !vect_mark_for_runtime_alias_test_1 (ddr, loop);
+ }
+
+ loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
+ FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
+ {
+ int dist = dist_v[loop_depth];
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "dependence distance = %d.\n", dist);
+
+ if (dist == 0)
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "dependence distance == 0 between ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
+ dump_printf (MSG_NOTE, " and ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+ }
+
+ if (is_raw_dependence (dra, drb))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "READ_WRITE dependence in interleaving."
+ "\n");
+ return true;
+ }
+
+ continue;
+ }
+
+ if (dist > 0 && DDR_REVERSED_P (ddr))
+ {
+ /* If DDR_REVERSED_P the order of the data-refs in DDR was
+ reversed (to make distance vector positive), and the actual
+ distance is negative. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "dependence distance negative.\n");
+ continue;
+ }
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized, possible dependence "
+ "between data-refs ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
+ dump_printf (MSG_NOTE, " and ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
+ dump_printf (MSG_NOTE, "\n");
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+/* Function vect_analyze_dataref_dependences.
+
+ Examine all the data references within the LOOP, and make sure there does not
+ exist any data dependence between them. */
+
+static bool
+vect_analyze_dataref_dependences (struct ITER_node *inode,
+ vec<loop_p> loop_nest)
+{
+ unsigned int i;
+ struct data_dependence_relation *ddr;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "=== vect_analyze_data_ref_dependences ===\n");
+
+ ITER_NODE_DATA_DEPS (inode).create (ITER_NODE_DATA_REFS (inode).length ()
+ * ITER_NODE_DATA_REFS (inode).length ());
+ if (!compute_all_dependences (ITER_NODE_DATA_REFS (inode),
+ &ITER_NODE_DATA_DEPS (inode),
+ loop_nest, true))
+ return false;
+
+ FOR_EACH_VEC_ELT (ITER_NODE_DATA_DEPS (inode), i, ddr)
+ if (vect_analyze_dataref_dependence (ddr, inode))
+ return false;
+
+ return true;
+}
+
+/* Function mark_addr_index_stmt.
+
+ This function marks the statements involved in address computation within the
+ loop, which need not be vectorized, as scalars. Currently, need of this
+ function is unknown. */
+
+static void
+mark_addr_index_stmt (tree use, struct loop *loop)
+{
+ return;
+}
+
+/* Function classify_loop_stmt.
+
+ The computations within the loop can be classified as :
+ - Induction : Following form where v1 is loop invarient.
+
+ var_phi = PHI (var_loop, var_inv)
+ :
+ var_loop = op (var_phi, v1)
+
+ - Reduction : Following form where reduction variable var_loop or reduction
+ component var_phi has single use. v1 can be any vectorizable
+ statement.
+
+ var_phi = PHI (var_loop, var_inv)
+ :
+ var_loop = op (var_phi, v1)
+ :
+ var_out_phi = PHI (var_loop)
+
+ - Intermediate : Intermediate computations defining ssa_vars which are not
+ live beyond the loop. The operands can be loop invarient/
+ results of computations within the loop/PHI nodes/constant/
+ memory.
+
+ - Loop invarient : Constant/memory references/ssa_vars with computations
+ outside the loop. There is no redefinition of components
+ of computations within the loop.
+
+ - Scalar : The statements which contribute in address computations or other
+ accesses which need not be vectorized.
+
+ - Complex : Not vectorizable.
+
+ classify_loop_stmt ():
+ - The loop statement with all operands on RHS as loop invariant (constants or
+ ssa_vars defined outside loop) are marked as loop invariant. RO memory can
+ be marked as loop invariant. Currently, RW memory is not marked as loop
+ invariant.
+
+ - The loop statement which is not vectorizable is marked as complex, and
+ vectorization is terminated.
+
+ - All other loop statements which have non-PHI nodes as output are marked as
+ intermediate.
+
+ - If any PHI node, which is neither reduction nor induction variable,
+ vectorization is terminated.
+*/
+
+static bool
+classify_loop_stmt (gimple *stmt, struct loop * loop)
+{
+ char *attr_name[] = {"Undefined", "Scalar", "loop_invariant", "induction",
+ "reduction", "intermediate", "complex",
+ "loop_exit_control"};
+ enum stmt_use_type stmt_type, def_type;
+ use_operand_p use_p;
+ ssa_op_iter iter;
+
+ if (gimple_has_volatile_ops (stmt))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: stmt has volatile operand\n");
+ return false;
+ }
+
+ if (STMT_ATTR_USE_TYPE (stmt) != stmt_use_type_undef
+ && STMT_ATTR_USE_TYPE (stmt) != stmt_use_type_scalar)
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, " classify_loop_stmt: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " Preprocessed : %s\n",
+ attr_name[STMT_ATTR_USE_TYPE (stmt)]);
+ }
+ return (STMT_ATTR_USE_TYPE (stmt) != stmt_use_type_complex);
+ }
+
+ if (gimple_code (stmt) == GIMPLE_PHI)
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, " classify_loop_stmt: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " PHI : %s\n",
+ attr_name[STMT_ATTR_USE_TYPE (stmt)]);
+ }
+ return true;
+ }
+
+ /* The statement lies outside the loop, so no need to analyze the statement
+ further. */
+ if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
+ {
+ STMT_ATTR_USE_TYPE (stmt) = stmt_use_type_loop_invariant;
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, " classify_loop_stmt: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " Statement outside the loop under consideration.\n");
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " : %s\n",
+ attr_name[STMT_ATTR_USE_TYPE (stmt)]);
+ }
+ return true;
+ }
+
+ /* If DR in STMT, there is LOAD/STORE operation. Mark the instructions
+ computing address for indexing as non-vectorizable/scalar. */
+ if (STMT_ATTR_DR (stmt) && gimple_assign_single_p (stmt))
+ {
+ tree scalar_type;
+ tree op0 = gimple_assign_lhs (stmt);
+ tree op1 = gimple_assign_rhs1 (stmt);
+ enum tree_code code;
+
+ if (TREE_CODE (op0) == SSA_NAME)
+ {
+ code = TREE_CODE (op1);
+ if (code == ARRAY_REF
+ || code == BIT_FIELD_REF
+ || code == INDIRECT_REF
+ || code == COMPONENT_REF
+ || code == IMAGPART_EXPR
+ || code == REALPART_EXPR
+ || code == MEM_REF
+ || TREE_CODE_CLASS (code) == tcc_declaration)
+ {
+ /* LOAD - trace SSA_NAME for address if any. Mark it as scalar if
+ not marked already. For loads, no need to analyze uses. */
+ mark_addr_index_stmt (TREE_OPERAND (op1, 0), loop);
+ STMT_ATTR_USE_TYPE (stmt) = stmt_use_type_intermediate;
+ scalar_type = TREE_TYPE (DR_REF (STMT_ATTR_DR (stmt)));
+ STMT_ATTR_VECTYPE (stmt) =
+ get_vectype_for_scalar_type (scalar_type);
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " classify_loop_stmt: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " : %s\n",
+ attr_name[STMT_ATTR_USE_TYPE (stmt)]);
+ }
+ return true;
+ }
+ /* TODO: Conversion needs to be handled here. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Expected load.\n");
+ return false;
+ }
+ else
+ {
+ code = TREE_CODE (op0);
+ if (code == ARRAY_REF
+ || code == BIT_FIELD_REF
+ || code == INDIRECT_REF
+ || code == COMPONENT_REF
+ || code == IMAGPART_EXPR
+ || code == REALPART_EXPR
+ || code == MEM_REF
+ || TREE_CODE_CLASS (code) == tcc_declaration)
+ {
+ /* STORE - Trace SSA_NAME for address if any. Mark it as scalar
+ if not marked already. Do not return, as we need to analyze
+ uses. However, the statement itself is marked of type
+ intermediate, so that it is not marked as loop invariant. */
+ mark_addr_index_stmt (TREE_OPERAND (op0, 0), loop);
+ STMT_ATTR_USE_TYPE (stmt) = stmt_use_type_intermediate;
+ }
+ else
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Expected store.\n");
+
+ return false;
+ }
+ }
+ }
+
+ /* TODO: PAREN_EXPR and CONVERT_EXPR_CODE_P need to be handled here?? - may
+ not. But still, marking the location. */
+ bool retval = true;
+ tree vec_type = NULL;
+ stmt_type = STMT_ATTR_USE_TYPE (stmt);
+ FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
+ {
+ tree op = USE_FROM_PTR (use_p);
+ enum tree_code code = TREE_CODE (op);
+
+ if (code == SSA_NAME)
+ {
+ if (!SSA_NAME_IS_DEFAULT_DEF (op))
+ {
+ gimple *def_stmt = SSA_NAME_DEF_STMT (op);
+ retval &= classify_loop_stmt (def_stmt, loop);
+
+ def_type = STMT_ATTR_USE_TYPE (def_stmt);
+
+ if (def_type == stmt_use_type_induction &&
+ stmt_type == stmt_use_type_undef)
+ stmt_type = stmt_use_type_scalar;
+ else if (stmt_type > stmt_use_type_scalar &&
+ stmt_type != def_type)
+ stmt_type = stmt_use_type_intermediate;
+ else
+ stmt_type = def_type;
+
+ vec_type = STMT_ATTR_VECTYPE (def_stmt);
+ if (STMT_ATTR_VECTYPE (stmt) && vec_type
+ && !useless_type_conversion_p (vec_type,
+ STMT_ATTR_VECTYPE (stmt)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Not vectorized: Types of operands do not match.\n");
+ return false;
+ }
+ STMT_ATTR_VECTYPE (stmt) = vec_type;
+ }
+ }
+ else if (CONSTANT_CLASS_P (op) || is_gimple_min_invariant (op))
+ {
+ if (stmt_type <= stmt_use_type_loop_invariant)
+ stmt_type = stmt_use_type_loop_invariant;
+ else
+ stmt_type = stmt_use_type_intermediate;
+ }
+ }
+
+ /* Once all the USEs of stmt are marked, mark the type of this statement. */
+ STMT_ATTR_USE_TYPE (stmt) = stmt_type;
+
+ if (stmt_type == stmt_use_type_undef)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Not vectorized: stmt use type UNDEF at end of processing.\n");
+ return false;
+ }
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, " classify_loop_stmt: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " : %s\n",
+ attr_name[STMT_ATTR_USE_TYPE (stmt)]);
+ }
+
+ return retval;
+}
+
+/*
+ FUNCTION classify_loop_stmts.
+
+ - Analyze PHI nodes in loop header to identify induction variables. As
+ induction variables have initial definition and a definition within loop,
+ each induction variable aught to be in the PHI nodes of loop header. Hence,
+ we need not identify them from loop body.
+
+ - If the induction variable has variable step, ILV node creation is not
+ possible (for now), as the arity depends upon the stepsize. It can still be
+ reduction variable, hence decision of vectorization is not taken yet.
+
+ - The variables that are not induction variables, are checked if they are
+ reduction variables - the reduction operation is vectorizable only if the
+ reduction variable does not have any use apart from that in outgoing PHI
+ node.
+
+ - Invoke function classify_loop_stmt () to classify each statement in
+ PROBABLE_ROOTS list recursively so that all the USEs in the statement are
+ processed.
+*/
+
+static bool
+classify_loop_stmts (struct ITER_node *inode, vec<gimple *> *probable_roots)
+{
+ basic_block *bbs;
+ int nbbs;
+ struct loop *loop = ITER_NODE_LOOP (inode);
+ vec<gimple *> worklist = vNULL;
+
+ bbs = get_loop_body (loop);
+ nbbs = loop->num_nodes;
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "==== classify_loop_stmts ====\n");
+ }
+
+ /* Mark phi node*/
+ for (gphi_iterator si = gsi_start_phis (loop->header);
+ !gsi_end_p (si); gsi_next (&si))
+ {
+ gphi *phi = si.phi ();
+ tree access_fn = NULL;
+ tree def = PHI_RESULT (phi);
+ tree init, step;
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
+ }
+
+ if (virtual_operand_p (def))
+ continue;
+
+ access_fn = analyze_scalar_evolution (loop, def);
+ if (access_fn)
+ {
+ STMT_ATTR_ACCESS_FN (phi) = access_fn;
+ }
+ else
+ {
+ worklist.safe_push (phi);
+ continue;
+ }
+
+ if (vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step))
+ {
+ STMT_ATTR_USE_TYPE (phi) = stmt_use_type_induction;
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Detected induction.\n");
+ dump_printf (MSG_NOTE, "\n");
+ }
+ }
+ else
+ worklist.safe_push (phi);
+ }
+#define REDUCTION_COMPLETE 0
+#if REDUCTION_COMPLETE
+ while (worklist.length () > 0)
+ {
+ gimple *phi = worklist.pop ();
+ tree def = PHI_RESULT (phi);
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Analyze phi for reduction: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
+ }
+
+ gcc_assert (!virtual_operand_p (def)
+ && STMT_ATTR_USE_TYPE (phi) == stmt_use_type_undef);
+
+ reduc_stmt = vect_simple_reduction ();
+
+ if (reduc_stmt)
+ {
+ }
+ else
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Unknown def-use cycle pattern.\n");
+ dump_printf (MSG_NOTE, "\n");
+ }
+ }
+ }
+#else
+ if (worklist.length () > 0)
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Reduction not supported yet. Unknown def-use cycle pattern.\n");
+ dump_printf (MSG_NOTE, "\n");
+ }
+ return false;
+ }
+#endif
+
+ worklist = (*probable_roots).copy ();
+ while (worklist.length () > 0)
+ {
+ gimple *stmt = worklist.pop ();
+
+ if (!classify_loop_stmt (stmt, loop))
+ return false;
+ }
+
+ return true;
+}
+
+/* FUNCTION mark_probable_root_nodes.
+
+ The loop has side effects only if
+ - Loop writes to memory location.
+ - The computations within loop are live after the loop.
+
+ This function identifies such nodes.
+
+ mark_probable_root_nodes () -
+ FORALL statements within loop DO
+ - If the result of statement is in outgoing PHI node (live across loop
+ body) mark the statement for probable root p-node.
+
+ - If the result of statement is memory, mark the statement as probable root
+ p-node.
+
+ - If the loop is inner loop, and any statement uses induction variable of
+ outer loop - because of which stride > vector_size, do not vectorize.
+*/
+
+static void
+mark_probable_root_nodes (struct ITER_node *inode,
+ vec<gimple *> *probable_roots)
+{
+ int i;
+ gimple *stmt;
+ ssa_op_iter op_iter;
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ def_operand_p def_p;
+ struct loop *loop = ITER_NODE_LOOP (inode);
+ int nbbs = loop->num_nodes;
+ basic_block *bbs = get_loop_body (ITER_NODE_LOOP (inode));
+
+ for (i = 0; i < nbbs; i++)
+ {
+ basic_block bb = bbs[i];
+ gimple_stmt_iterator si;
+
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ stmt = gsi_stmt (si);
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " mark_probable_root_nodes: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ }
+
+ if (gimple_has_volatile_ops (stmt))
+ continue;
+
+ if (gimple_vdef (stmt) && !gimple_clobber_p (stmt))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " : Memory def.\n");
+ probable_roots->safe_push (stmt);
+ STMT_ATTR_PROOT (stmt) = true;
+ }
+
+
+ FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
+ {
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
+ {
+ basic_block bb = gimple_bb (USE_STMT (use_p));
+ if (!flow_bb_inside_loop_p (loop, bb))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " : Live beyond loop.\n");
+
+ if (is_gimple_debug (USE_STMT (use_p)))
+ continue;
+
+ /* We expect all such uses to be in the loop exit phis
+ (because of loop closed form) */
+ gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
+ gcc_assert (bb == single_exit (loop)->dest);
+
+ probable_roots->safe_push (stmt);
+ STMT_ATTR_PROOT (stmt) = true;
+ }
+ }
+ }
+ }
+ }
+}
+
+/* Function vect_is_simple_use.
+
+ Return TRUE if OPERAND is either constant, invariant, or internal SSA_NAME
+ with simple DEF_STMT. If it is SSA_NAME, return the statement defining the
+ name in DEF_STMT. */
+
+bool
+vect_is_simple_use (tree operand, gimple **def_stmt)
+{
+ *def_stmt = NULL;
+
+ if (CONSTANT_CLASS_P (operand))
+ return true;
+ if (is_gimple_min_invariant (operand))
+ return true;
+ if (TREE_CODE (operand) != SSA_NAME)
+ return false;
+ if (SSA_NAME_IS_DEFAULT_DEF (operand))
+ return true;
+
+ *def_stmt = SSA_NAME_DEF_STMT (operand);
+
+ switch (gimple_code (*def_stmt))
+ {
+ case GIMPLE_PHI:
+ case GIMPLE_ASSIGN:
+ case GIMPLE_CALL:
+ break;
+ default:
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unsupported defining stmt:\n");
+ return false;
+ }
+ return true;
+}
+
+/* Function normalize_base_addr.
+
+ This function normalizes the address used by the STMT within LOOP. The data
+ reference DR holds BASE_ADDRESS, INIT component of first element, and OFFSET
+ of current element. This function combines all the components of the address
+ and splits it into 2 components - BASE_ADDRESS and OFFSET which is < stride.
+*/
+
+static tree
+normalize_base_addr (gimple *stmt, struct loop *loop, tree *offset)
+{
+ tree addr, var, constant, retval;
+
+ addr = size_binop (PLUS_EXPR,
+ fold_convert (ssizetype, DR_BASE_ADDRESS (STMT_ATTR_DR (stmt))),
+ size_binop (PLUS_EXPR,
+ fold_convert (ssizetype, DR_INIT (STMT_ATTR_DR (stmt))),
+ fold_convert (ssizetype, DR_OFFSET (STMT_ATTR_DR (stmt)))));
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "normalize_base_addr: ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM,
+ addr);
+ dump_printf (MSG_NOTE, "\n");
+ }
+
+ split_constant_offset (addr, &var, &constant);
+
+ if (tree_fits_shwi_p (constant)
+ && tree_fits_shwi_p (DR_STEP (STMT_ATTR_DR (stmt))))
+ {
+ int c, step;
+ c = tree_to_shwi (constant);
+ step = tree_to_shwi (DR_STEP (STMT_ATTR_DR (stmt)));
+
+ if (c >= step)
+ {
+ *offset = ssize_int (c % step);
+ retval = size_binop (PLUS_EXPR, var, ssize_int (c / step * step));
+ }
+ else
+ {
+ *offset = constant;
+ retval = var;
+ }
+
+ }
+ else
+ {
+ *offset = ssize_int (0);
+ retval = size_binop (PLUS_EXPR, var, constant);
+ }
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "\tbase: ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM,
+ retval);
+ dump_printf (MSG_NOTE, "\toffset: ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM,
+ *offset);
+ dump_printf (MSG_NOTE, "\n");
+ }
+ return retval;
+}
+
+/***** Helper functions for prim-tree creation *****/
+
+/* Function init_primop_node.
+
+ This function creates PRIMOP_TREE node and initializes all its fields to 0.
+*/
+
+struct primop_tree *
+init_primop_node (void)
+{
+ static int pid = 0;
+ struct primop_tree *ptree;
+ ptree = (struct primop_tree *) xcalloc (1, sizeof (struct primop_tree));
+
+ PT_PID (ptree) = pid++;
+ PT_NODE_OP (ptree) = 0;
+ PT_ARITY (ptree) = 0;
+ ptree->children = vNULL;
+ PT_PARENT (ptree) = NULL;
+ PT_ITER_COUNT (ptree) = NULL;
+ PT_VEC_SIZE (ptree) = 0;
+ PT_VEC_TYPE (ptree) = NULL;
+ PT_VEC_INST (ptree) = vNULL;
+ PT_TARGET_COST (ptree) = 0;
+ PT_NUM_INSTANCES (ptree) = 0;
+ PT_LOOP_DEPENDENCES (ptree) = vNULL;
+ PT_DEP (ptree) =vNULL;
+ PT_DEPTH (ptree) = 0;
+ PT_ATTR_NO (ptree) = 0;
+ PT_AUX (ptree) = NULL;
+ memset (&ptree->u, 0, sizeof (ptree->u));
+ return ptree;
+}
+
+/* Function populate_prim_node.
+
+ This function returns PRIMOP_TREE node initialized with given information.
+*/
+
+struct primop_tree *
+populate_prim_node (enum primop_code pcode, struct ITER_node *inode,
+ struct primop_tree *parent, gimple *stmt)
+{
+ struct primop_tree *ptree;
+ ptree = init_primop_node ();
+
+ PT_NODE_OP (ptree) = (int) pcode;
+ PT_PARENT (ptree) = parent;
+ PT_ITER_COUNT (ptree) = ITER_NODE_NITERS (inode);
+
+ if (stmt)
+ {
+ PT_VEC_TYPE (ptree) = STMT_ATTR_VECTYPE (stmt);
+ PT_ATTR_NO (ptree) = gimple_uid (stmt);
+ STMT_ATTR_TREE (stmt) = ptree;
+ }
+
+ return ptree;
+}
+
+/* Function exists_primTree_with_memref.
+
+ This function checks if PRIMOP_TREE node corresponding to MEMREF already
+ exists in ITER_node. If yes, return the pointer of PRIMOP_TREE node, else,
+ return NULL. */
+
+struct primop_tree *
+exists_primTree_with_memref (tree base, tree step, bool is_read,
+ struct ITER_node *inode)
+{
+ vec<struct primop_tree *> worklist;
+ worklist = (ITER_NODE_LOOP_BODY (inode)).copy ();
+
+ while (worklist.length () > 0)
+ {
+ primop_tree *ptree = worklist.pop ();
+
+ if (!operand_equal_p (PT_MEMVAL_BASE (ptree), base, 0))
+ continue;
+
+ if (!operand_equal_p (PT_MEMVAL_MULT_IDX (ptree), step, 0))
+ continue;
+
+ if (is_read != PT_MEMVAL_IS_READ (ptree))
+ continue;
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " exists_primTree_with_memref: TRUE\n");
+ }
+
+ return ptree;
+ }
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " exists_primTree_with_memref: FALSE\n");
+ }
+
+ return NULL;
+}
+
+/* Create primtree of type mem_ref - it is only for load/store. */
+
+struct primop_tree *
+create_primTree_memref (tree base, tree step, bool is_read, int num,
+ struct ITER_node *inode, struct primop_tree *parent)
+{
+ struct primop_tree * ptree;
+
+ ptree = populate_prim_node (POP_MEMREF, inode, parent, NULL);
+
+ PT_MEMVAL_BASE (ptree) = unshare_expr (base);
+ PT_MEMVAL_MULT_IDX (ptree) = unshare_expr (step);
+ PT_MEMVAL_IS_READ (ptree) = is_read;
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " create_primTree_memref : stride - %d\n ",
+ tree_to_uhwi (step) / num);
+ }
+ return ptree;
+}
+
+/* Function create_primTree_combine.
+
+ Create primtree with PCODE as interleave or concat. STMT is statement for
+ which primtree is being created. */
+struct primop_tree *
+create_primTree_combine (enum primop_code pcode, gimple *stmt, int parts,
+ struct ITER_node *inode, struct primop_tree *parent)
+{
+ struct primop_tree * ptree;
+
+ ptree = populate_prim_node (pcode, inode, parent, stmt);
+ PT_OPERAND_SELECTOR (ptree) = -1;
+ PT_DIVISION (ptree) = parts;
+ PT_VAR_STRIDE (ptree) = NULL;
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " create_primTree_combine: parts - %d\n", parts);
+ }
+
+ return ptree;
+}
+
+/* Function create_primTree_partition.
+
+ Create primtree with PCODE as split or extract. STMT is statement for which
+ primtree is being created. PARTS is number of partitions to be created.
+ SELECTOR is the part being selected. */
+struct primop_tree *
+create_primTree_partition (enum primop_code pcode, gimple *stmt, int parts,
+ int selector, struct ITER_node *inode,
+ struct primop_tree *parent)
+{
+ struct primop_tree * ptree;
+
+ ptree = populate_prim_node (pcode, inode, parent, stmt);
+ PT_OPERAND_SELECTOR (ptree) = selector;
+ PT_DIVISION (ptree) = parts;
+ PT_VAR_STRIDE (ptree) = NULL;
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " create_primTree_partition : parts - %d selector - %d\n",
+ parts, selector);
+ }
+
+ return ptree;
+}
+
+/* Function add_child_at_index.
+
+ Attach PCHILD node as idx^th child of PNODE. */
+void
+add_child_at_index (struct primop_tree *ptree,
+ struct primop_tree *pchild, int idx)
+{
+ (PT_ARITY (ptree))++;
+ while (idx >= ptree->children.length ())
+ {
+ ptree->children.safe_push (NULL);
+ }
+ PT_CHILD (ptree, idx) = pchild;
+}
+
+/* Function get_child_at_index.
+
+ Get idx^th child of PNODE. */
+struct primop_tree *
+get_child_at_index (struct primop_tree *ptree, int idx)
+{
+ gcc_assert (idx < PT_ARITY (ptree));
+ return PT_CHILD (ptree, idx);
+}
+
+
+/* Function vectorizable_store.
+
+ This function checks if STMT is STORE instruction and is vectorizable for
+ target architecture. If TRUE, it computes MEMREF node with respect to the
+ loop LOOP for which the vectorization is targetted.
+
+ TODO: Currently, we are not handling variable strides as vectorization is not
+ possible in all the cases with variable stride.
+
+ The variable step can be of 3 types:
+ 1. Monotonically dependent on index
+ 2. Non-monotonically dependent on index.
+ 3. Loop invariant step.
+ a. Case for classic vectorization
+ b. Case for SLP
+
+ 1. Monotonically dependent on index:
+ The address expression under consideration is base + step * index + offset.
+ However, even if the induction variable is linear in nature, if the step is
+ monotonically dependent on index - which is multiplied by induction variable,
+ the expression no longer remains linear, but becomes quadratic - because of
+ which loop unrolling cannot take place.
+
+ 2. Non-monotonically dependent on index:
+ Again, variable step can cause RAW or WAW conflicts if it is not monotonic
+ function of index, because of which optimization will be incorrect.
+ eg:
+ step = a[i];
+ c[step * i + d] = b[i]
+
+ If it is on RHS, though there won't be conflicts, we cannot determine memory
+ locations which are accessed at compile-time, because of which optimization
+ not possible.
+
+ 3. Loop invariant step:
+ a) Classic auto-vectorization for stride = j.
+ eg:
+ j = op (v1, v2)
+ for (i = 0; i < 2048; i++)
+ a[i] = b[i*j]
+ So, we should extract multiples of j from array b : for which instruction
+ generation is very difficult as we don't know what permute order to use.
+
+ If we have some instruction like (un)pack (vreg, reg) which (un)packs vector
+ elements from vec with index = multiples of reg to form new vec, we can
+ think of this optimization.
+
+ b) The only case in which variable step can work unconditionally is if the
+ variable is iteration invariant - for which SLP might work if SLP factor is
+ same as vector size.
+ eg:
+ For vector size = 4,
+ for loop body as follows:
+
+ a[4 * i] = b[var * i]
+ a[4 * i + 1] = b[var * i + 1]
+ a[4 * i + 2] = b[var * i + 2]
+ a[4 * i + 3] = b[var * i + 3]
+
+ above can be vectorized with vector load at b[var * i]. Conditions: a and b
+ are distinct.
+
+ Looked at paper "Automatic Vectorization of Interleaved Data Revisited" for
+ implementation of this part, however, I personally don't find it useful for
+ MIPS. */
+
+static struct primop_tree *
+vectorizable_store (gimple *stmt, struct ITER_node *inode,
+ struct primop_tree *parent)
+{
+ tree src_op, dest_op, base, offset, step;
+ enum tree_code code;
+ tree vec_type, scalar_type, rhs_vectype;
+ gimple *def_stmt;
+ machine_mode vec_mode;
+ struct loop *loop = ITER_NODE_LOOP (inode);
+ struct primop_tree *pnode, *pchild1, *pchild2;
+ int num;
+
+ if (STMT_ATTR_USE_TYPE (stmt) != stmt_use_type_intermediate)
+ return NULL;
+
+ if (!gimple_assign_single_p (stmt))
+ return NULL;
+
+ dest_op = gimple_assign_lhs (stmt);
+ code = TREE_CODE (dest_op);
+
+ if (code != ARRAY_REF
+ && code != BIT_FIELD_REF
+ && code != INDIRECT_REF
+ && code != COMPONENT_REF
+ && code != IMAGPART_EXPR
+ && code != REALPART_EXPR
+ && code != MEM_REF)
+ return NULL;
+
+ if (!STMT_ATTR_DR (stmt))
+ return NULL;
+
+ scalar_type = TREE_TYPE (DR_REF (STMT_ATTR_DR (stmt)));
+ vec_type = get_vectype_for_scalar_type (scalar_type);
+
+ src_op = gimple_assign_rhs1 (stmt);
+
+ if (!vect_is_simple_use (src_op, &def_stmt))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "use not simple.\n");
+ return NULL;
+ }
+
+ rhs_vectype = STMT_ATTR_VECTYPE (def_stmt);
+ if (rhs_vectype && !useless_type_conversion_p (vec_type, rhs_vectype))
+ return NULL;
+
+
+ vec_mode = TYPE_MODE (vec_type);
+ if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
+ return NULL;
+
+ /* The dataref for store is available. Analyze it, and create memref for the
+ same. */
+
+ num = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (STMT_ATTR_DR (stmt)))));
+ base = normalize_base_addr (stmt, loop, &offset);
+ step = DR_STEP (STMT_ATTR_DR (stmt));
+
+ if (!tree_fits_uhwi_p (step) || !tree_fits_uhwi_p (offset))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Variable stride or offset.\n");
+ return NULL;
+ }
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " vectorize_store: ptree creation\n ");
+ }
+
+
+ pnode = exists_primTree_with_memref (base, step, false, inode);
+ if (pnode == NULL)
+ {
+ pnode = create_primTree_memref (base, step, false, num, inode, NULL);
+ ITER_NODE_LOOP_BODY (inode).safe_push (pnode);
+ pchild1 = create_primTree_combine (POP_ILV, stmt,
+ tree_to_uhwi (step) / num, inode, parent);
+ add_child_at_index (pnode, pchild1, 0);
+ }
+ else
+ {
+ pchild1 = get_child_at_index (pnode, 0);
+ }
+ if (def_stmt)
+ {
+ pchild2 = analyze_and_create_ptree (pchild1, def_stmt, inode);
+ if (pchild2 == NULL)
+ return NULL;
+ }
+ else
+ {
+ /* RHS is not SSA name - it is either invariant or constant. Create
+ appropriate primtree node accordingly. */
+ /* TODO - Create POP_CONST or POP_INV node - which will create vector
+ using VEC_INIT at later stages. */
+ return NULL;
+ }
+
+ if (tree_to_uhwi (offset) / num >= PT_DIVISION (pchild1))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Not vectorized: stride < offset\n");
+ }
+ return NULL;
+ }
+
+ add_child_at_index (pchild1, pchild2, tree_to_uhwi (offset) / num);
+
+ return pnode;
+}
+
+/* Function vectorizable_load.
+
+ This function checks if STMT is LOAD instruction and is vectorizable for
+ target architecture. If TRUE, it computes MEMREF node with respect to the
+ loop LOOP for which the vectorization is targetted. */
+
+static struct primop_tree *
+vectorizable_load (gimple *stmt, struct ITER_node *inode,
+ struct primop_tree *parent)
+{
+ tree src_op, dest_op, step, base, offset;
+ enum tree_code code;
+ tree vec_type, scalar_type;
+ machine_mode vec_mode;
+ struct loop * loop = ITER_NODE_LOOP (inode);
+ struct primop_tree *pnode, *pchild1;
+ int num;
+
+
+ if (STMT_ATTR_USE_TYPE (stmt) != stmt_use_type_intermediate)
+ return NULL;
+
+ if (!gimple_assign_single_p (stmt))
+ return NULL;
+
+ dest_op = gimple_assign_lhs (stmt);
+ src_op = gimple_assign_rhs1 (stmt);
+ code = TREE_CODE (src_op);
+
+ if (code != ARRAY_REF
+ && code != BIT_FIELD_REF
+ && code != INDIRECT_REF
+ && code != COMPONENT_REF
+ && code != IMAGPART_EXPR
+ && code != REALPART_EXPR
+ && code != MEM_REF)
+ return NULL;
+
+ if (!STMT_ATTR_DR (stmt))
+ return NULL;
+
+
+ scalar_type = TREE_TYPE (DR_REF (STMT_ATTR_DR (stmt)));
+ vec_type = get_vectype_for_scalar_type (scalar_type);
+
+ vec_mode = TYPE_MODE (vec_type);
+ if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
+ return NULL;
+
+ /* The dataref for load is available. Analyze it, and create memref for the
+ same. */
+
+ num = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (STMT_ATTR_DR (stmt)))));
+ base = normalize_base_addr (stmt, loop, &offset);
+ step = DR_STEP (STMT_ATTR_DR (stmt));
+
+ if (!tree_fits_uhwi_p (step) || !tree_fits_uhwi_p (offset))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Variable stride or offset.\n");
+ return NULL;
+ }
+
+ gcc_assert (parent);
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " vectorize_load: ptree creation\n ");
+ }
+
+ pnode = create_primTree_partition (POP_EXTR, stmt,
+ tree_to_uhwi (step) / num,
+ tree_to_uhwi (offset) / num, inode, parent);
+ pchild1 = create_primTree_memref (base, step, true, num, inode, pnode);
+ add_child_at_index (pnode, pchild1, 0);
+ return pnode;
+}
+
+/* FUNCTION analyze_and_create_ptree ().
+
+ - If the result is storing in memory, try accessing the address - extract
+ <base, idx, mult_idx, offset> tuple from gimple chains
+
+ *_4 = op (v1, v2, ...)
+
+ - If <offset> >= <mult_idx> : Cannot vectorize currently. (Optimization
+ TODO: create new induction variable ind_var and adjust the offset to
+ <offset> % <mult_idx>, and ind_var = idx + <offset>/<mult_idx>).
+ - Else
+ 1. Create root node with mem_ref <base,idx> (if not existing already.
+ If exists, goto step 3.)
+ 2. Attach child - ILV node with arity = <mult_idx> to root node.
+ 3. Create a child to ILV at index <offset>:
+ - analyze_and_create_ptree (op (v1, v2, ...)).
+
+ - If LHS is SSA_NAME, look at the definition of the SSA_NAME.
+ - If RHS of DEF is op (v1, v2, ...) and <op> is vectorizable, create
+ c-node with arity = arity (op), and attach the children:
+ - analyze_and_create_ptree (v1)
+ - analyze_and_create_ptree (v2)
+ - :
+ - :
+
+ - If RHS is accessing memory - try accessing address - create mem_ref node
+ as above.
+ 1. Create EXTR node with arity = <mult_idx> and extr_idx = <offset>
+ 2. Create a child node with mem_ref <base, idx>
+
+ - If result is outgoing PHI node, there should be single use node
+ - Access definition of the use node.
+ - If op on LHS of definition is collapsible, generate a
+ COLPS <op, PHI name> in epilogue.
+ - Create root node with memref<PHI name, loop ind_var>
+ - Create ITER node with single child - analyze_and_create (<second_opd>)
+*/
+
+struct primop_tree *
+analyze_and_create_ptree (struct primop_tree *parent, gimple *stmt,
+ struct ITER_node *inode)
+{
+ struct primop_tree *ptree;
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " analyze_and_create_ptree: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ }
+
+ /* If the statement is not within the loop, create VEC_INIT node and
+ return. */
+
+ if (!flow_bb_inside_loop_p (ITER_NODE_LOOP (inode), gimple_bb (stmt)))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Not vectorized: Loop invariant not handled.\n");
+ }
+ return NULL;
+ }
+
+ if ( (ptree = vectorizable_store (stmt, inode, parent)) == NULL
+ && (ptree = vectorizable_load (stmt, inode, parent)) == NULL
+ /* && (ptree = vectorizable_assign (stmt, inode, parent)) == NULL
+ && (ptree = vectorizable_reduction (stmt, inode, parent)) == NULL
+ && (ptree = vectorizable_arith (stmt, inode, parent)) == NULL */
+ )
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Not vectorized: Target does not support instruction.\n");
+ }
+ return ptree;
+}
+
+/* Function is_ptree_complete.
+
+ The function checks if the generated ptree */
+
+static bool
+is_ptree_complete (struct ITER_node *inode)
+{
+ vec<struct primop_tree *> worklist;
+ vec<struct primop_tree *> chlist;
+
+ worklist = (ITER_NODE_LOOP_BODY (inode)).copy ();
+
+ while (worklist.length () > 0)
+ {
+ primop_tree *ptree = worklist.pop ();
+ if (PT_ARITY (ptree) == 0)
+ continue;
+
+ gcc_assert (ptree->children.length () == 1);
+
+ ptree = get_child_at_index (ptree, 0);
+ gcc_assert (PT_NODE_OP (ptree) == POP_ILV);
+ if (PT_ARITY (ptree) > ptree->children.length ())
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Not vectorized: Multiple writes to same index.\n");
+ }
+ return false;
+ }
+
+ chlist = ptree->children.copy ();
+
+ while (chlist.length () > 0)
+ {
+ if (chlist.pop () == NULL)
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Not vectorized: Void in memory writes.\n");
+ }
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/* Function create_ptree.
+
+ This function is invoked if all the data reference related checks are
+ successful and autovectorization is possible. This function identifies
+ statements within the LOOP which have side-effects, as PROBABLE_ROOT_NODES.
+ It then classifies all the STMTs within the LOOP. Once successful, it
+ invokes analyze_and_create_ptree which actually create permute order tree.
+*/
+
+static bool
+create_ptree (struct ITER_node *inode)
+{
+ auto_vec<gimple *, 64> worklist;
+ bool is_ok;
+
+ mark_probable_root_nodes (inode, &worklist);
+
+ if (worklist.length () == 0)
+ {
+ dump_printf (MSG_MISSED_OPTIMIZATION,
+ "Not vectorizable: no probable root nodes.\n");
+ return false;
+ }
+
+ is_ok = classify_loop_stmts (inode, &worklist);
+ if (! is_ok)
+ {
+ dump_printf (MSG_MISSED_OPTIMIZATION,
+ "Not vectorizable: Classification failed.\n");
+ return false;
+ }
+
+ while (worklist.length () > 0)
+ {
+ is_ok = (analyze_and_create_ptree (NULL, worklist.pop (), inode) != NULL);
+ if (! is_ok)
+ {
+ dump_printf (MSG_MISSED_OPTIMIZATION,
+ "Not vectorized: p-tree creation failed.\n");
+ return false;
+ }
+ }
+
+ ITER_NODE_NITERS (inode) = integer_one_node;
+
+ if (is_ptree_complete (inode))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf (MSG_NOTE,
+ "Vectorized: ptree complete.\n");
+ }
+ return true;
+ }
+ else
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf (MSG_MISSED_OPTIMIZATION,
+ "Not vectorized: ptree incomplete.\n");
+ }
+ return false;
+ }
+}
+
+/* Function vect_analyze_loop_with_prim_tree_2.
+
+ Perform various analysis on the loop LOOP, and record the information in
+ ITER_node structure. */
+
+static bool
+vect_analyze_loop_with_prim_tree_2 (struct ITER_node *inode)
+{
+ bool ok;
+ int max_vf = MAX_VECTORIZATION_FACTOR;
+ int min_vf = 2;
+ unsigned int n_stmts = 0;
+ auto_vec<loop_p, 64> loop_nest;
+
+ /* Find all data references in the loop (which correspond to vdefs/vuses)
+ and analyze their evolution in the loop. */
+
+ struct loop *loop = ITER_NODE_LOOP (inode);
+ basic_block *bbs = get_loop_body (ITER_NODE_LOOP (inode));
+ if (!find_loop_nest (loop, &loop_nest))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: loop contains function calls"
+ " or data references that cannot be analyzed\n");
+ return false;
+ }
+
+ for (unsigned i = 0; i < loop->num_nodes; i++)
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (is_gimple_debug (stmt))
+ continue;
+ ++n_stmts;
+ if (!find_data_references_in_stmt (loop, stmt,
+ &ITER_NODE_DATA_REFS (inode)))
+ {
+ if (is_gimple_call (stmt) && loop->safelen)
+ {
+ tree fndecl = gimple_call_fndecl (stmt), op;
+ if (fndecl != NULL_TREE)
+ {
+ cgraph_node *node = cgraph_node::get (fndecl);
+ if (node != NULL && node->simd_clones != NULL)
+ {
+ unsigned int j, n = gimple_call_num_args (stmt);
+ for (j = 0; j < n; j++)
+ {
+ op = gimple_call_arg (stmt, j);
+ if (DECL_P (op)
+ || (REFERENCE_CLASS_P (op)
+ && get_base_address (op)))
+ break;
+ }
+ op = gimple_call_lhs (stmt);
+ // Ignore #pragma omp declare simd functions
+ // if they don't have data references in the
+ // call stmt itself.
+ if (j == n
+ && !(op
+ && (DECL_P (op)
+ || (REFERENCE_CLASS_P (op)
+ && get_base_address (op)))))
+ continue;
+ }
+ }
+ }
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: loop contains function "
+ "calls or data references that cannot "
+ "be analyzed\n");
+ return false;
+ }
+ }
+
+ if (!vect_analyze_datarefs (inode))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "bad data references.\n");
+ return false;
+ }
+
+ ok = vect_analyze_dataref_accesses (inode);
+ if (!ok)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "bad data access.\n");
+ return false;
+ }
+
+ ok = vect_analyze_dataref_dependences (inode, loop_nest);
+ if (!ok
+ || max_vf < min_vf)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "bad data dependence.\n");
+ return false;
+ }
+
+ return create_ptree (inode);
+}
+
+/* Function pretty_print_gimple_vec.
+
+ Function to pretty print all the gimple statements in LIST. */
+
+void
+pretty_print_gimple_vec (pretty_printer *pp, vec<gimple *> list)
+{
+
+ vec<gimple *> worklist;
+
+ worklist = list.copy ();
+
+ while (worklist.length () > 0)
+ {
+ pp_newline_and_indent (pp, 2);
+ pp_gimple_stmt_1 (pp, worklist.pop (), 2, TDF_SLIM);
+ pp_printf (pp, ";");
+ }
+}
+
+#define DEFTREECODE(SYM, NAME, TYPE, LEN) NAME,
+#define END_OF_BASE_TREE_CODES "@dummy",
+
+static const char *const tree_code_name[] = {
+#include "all-tree.def"
+"ILV",
+"CONCAT",
+"EXTR",
+"SPLT",
+"COLLAPSE",
+"MEMREF",
+"CONST",
+"INVAR",
+"ITER"
+};
+
+#undef DEFTREECODE
+#undef END_OF_BASE_TREE_CODES
+
+/* Function pp_primop_tree.
+
+ Function to pretty print primop_tree PTREE. */
+
+void
+pp_primop_tree (pretty_printer *pp, struct primop_tree * ptree)
+{
+ pp_newline_and_indent (pp, 0);
+ pp_indent (pp);
+ pp_printf (pp,"node [shape=record];");
+ pp_newline_and_indent (pp, 0);
+ pp_indent (pp);
+ pp_printf (pp, "%d [label=\"{", PT_PID (ptree));
+ dump_generic_node (pp, PT_ITER_COUNT (ptree), 0, TDF_SLIM, false);
+ pp_printf (pp, "}|{%s", tree_code_name[PT_NODE_OP (ptree)]);
+
+switch (PT_NODE_OP (ptree))
+ {
+ case POP_ILV:
+ case POP_CONCAT:
+ pp_printf (pp, "|%d", PT_DIVISION (ptree));
+ break;
+ case POP_EXTR:
+ case POP_SPLT:
+ pp_printf (pp, "|div:%d", PT_DIVISION (ptree));
+ pp_printf (pp, "|sel:%d", PT_OPERAND_SELECTOR (ptree));
+ break;
+ case POP_COLLAPSE:
+ break;
+ case POP_MEMREF:
+ pp_printf (pp, "|");
+ dump_generic_node (pp, PT_MEMVAL_BASE (ptree), 0, TDF_SLIM, false);
+ pp_printf (pp, "|stride:");
+ dump_generic_node (pp, PT_MEMVAL_MULT_IDX (ptree), 0, TDF_SLIM, false);
+ break;
+ case POP_CONST:
+ break;
+ case POP_INV:
+ break;
+ case POP_ITER:
+ break;
+ default:
+ break;
+ }
+
+ pp_printf (pp, "}|{%d}\"];", PT_ARITY (ptree));
+
+ if (PT_NODE_OP (ptree) == POP_ITER)
+ {
+ pretty_print_iter_node (pp, PT_INODE (ptree), 2);
+ return;
+ }
+
+ if (PT_ARITY (ptree) != 0)
+ {
+ pretty_print_ptree_vec (pp, ptree->children);
+
+ vec<struct primop_tree *> worklist;
+
+ worklist = ptree->children.copy ();
+
+ while (worklist.length () > 0)
+ {
+ struct primop_tree *child = worklist.pop ();
+ pp_newline_and_indent (pp, 0);
+ pp_indent (pp);
+ pp_printf (pp, "%d -> %d;", PT_PID (ptree), PT_PID (child));
+ }
+ }
+
+}
+
+/* Function pretty_print_ptree_vec.
+
+ Function to pretty print the list LIST of primop nodes. */
+
+void
+pretty_print_ptree_vec (pretty_printer *pp, vec<struct primop_tree *> list)
+{
+ vec<struct primop_tree *> worklist;
+
+ worklist = list.copy ();
+
+ while (worklist.length () > 0)
+ {
+ pp_newline_and_indent (pp, 0);
+ pp_primop_tree (pp, worklist.pop ());
+ }
+}
+
+/* Function pretty_print_iter_node.
+
+ INODE dup in .dot format. */
+
+void
+pretty_print_iter_node (pretty_printer *pp, struct ITER_node *inode, int depth)
+{
+ pp_indentation (pp) += depth;
+ pp_indent (pp);
+ pp_printf (pp, "subgraph cluster_iter_node_%d {\n",
+ ITER_NODE_LOOP (inode)->num);
+ pp_indentation (pp) += 2;
+ pp_indent (pp);
+ pp_printf (pp, "label=\"LOOP #%d. NUM_ITER:", ITER_NODE_LOOP (inode)->num);
+ dump_generic_node (pp, ITER_NODE_NITERS (inode), 0, TDF_SLIM, false);
+ pp_printf (pp, "\";\n");
+
+ pp_indent (pp);
+ pp_printf (pp, "subgraph cluster_iter_node_%d_pro {\n",
+ ITER_NODE_LOOP (inode)->num);
+ pp_indentation (pp) += 2;
+ pp_indent (pp);
+ pp_printf (pp, "label=\"PROLOGUE\";\n");
+ pretty_print_gimple_vec (pp, ITER_NODE_PROLOGUE (inode));
+ pp_indentation (pp) -= 2;
+ pp_indent (pp);
+ pp_printf (pp, "}\n");
+
+ pp_printf (pp, "subgraph cluster_iter_node_%d_epi {\n",
+ ITER_NODE_LOOP (inode)->num);
+ pp_indentation (pp) += 2;
+ pp_indent (pp);
+ pp_printf (pp, "label=\"EPILOGUE\";\n");
+ pretty_print_gimple_vec (pp, ITER_NODE_EPILOGUE (inode));
+ pp_indentation (pp) -= 2;
+ pp_indent (pp);
+ pp_printf (pp, "}\n");
+
+ pp_indentation (pp) += 2;
+ pp_printf (pp, "subgraph cluster_iter_node_%d_body {\n",
+ ITER_NODE_LOOP (inode)->num);
+ pp_indentation (pp) += 2;
+ pp_indent (pp);
+ pp_printf (pp, "label=\"LOOP\\nBODY\";\n");
+ pretty_print_ptree_vec (pp, ITER_NODE_LOOP_BODY (inode));
+ pp_indentation (pp) -= 2;
+ pp_indent (pp);
+ pp_printf (pp, "}\n");
+ pp_indentation (pp) -= 2;
+ pp_indent (pp);
+ pp_printf (pp, "}\n");
+}
+
+/* Function dump_iter_node.
+
+ Dump the permute order trees represented by INODE in .dot format. */
+
+void
+dump_iter_node (struct ITER_node *inode, FILE *fp)
+{
+ pretty_printer pp;
+
+ pp.buffer->stream = fp;
+ pp_printf (&pp, "digraph uniopDG {\n");
+ pp_printf (&pp, " " "color=blue;" "\n");
+ pp_printf (&pp, " " "style=bold;" "\n");
+ pp_printf (&pp, " " "compound=true;" "\n");
+
+ pretty_print_iter_node (&pp, inode, 4);
+
+ pp_printf (&pp, "}\n");
+ pp_flush (&pp);
+}
+
+/* Function vectorize_loops_using_uniop.
+
+ Entry point to autovectorization using unified representation:
+ For each loop D:
+ - analyze the loop, and create p-tree if loop is vectorizable.
+ - generate vectorised code for corresponding p-tree. */
+
+unsigned
+vectorize_loops_using_uniop (void)
+{
+ unsigned int vect_loops_num;
+ struct loop *loop;
+ bool any_ifcvt_loops = false;
+ unsigned int num_vectorized_loops = 0;
+ unsigned int ret = 0;
+ unsigned int i;
+ vect_loops_num = number_of_loops (cfun);
+
+ /* Bail out if there are no loops. */
+ if (vect_loops_num <= 1)
+ return 0;
+
+ if (cfun->has_simduid_loops)
+ return 0;
+
+ //iter_node = NULL;
+
+ init_stmt_attr_vec ();
+
+ FOR_EACH_LOOP (loop, 0)
+ if (loop->dont_vectorize)
+ any_ifcvt_loops = true;
+ else if (loop->simduid)
+ continue;
+ else if ((flag_tree_loop_vectorize
+ && optimize_loop_nest_for_speed_p (loop))
+ || loop->force_vectorize)
+ {
+ /* Vectorization should be possible. Let us find if all statements are
+ vectorizable, and if yes, create p-tree. */
+ struct ITER_node * tmp_iter_node;
+
+ vect_location = find_loop_location (loop);
+ if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
+ && dump_enabled_p ())
+ dump_printf (MSG_NOTE, "\nAnalyzing loop at %s:%d\n",
+ LOCATION_FILE (vect_location),
+ LOCATION_LINE (vect_location));
+
+ tmp_iter_node = vect_populate_iter_node_from_loop (loop);
+
+ if (!tmp_iter_node)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "bad loop form.\n");
+ continue;
+ }
+
+ if (!vect_analyze_loop_with_prim_tree_2 (tmp_iter_node))
+ {
+ destroy_iter_node_info (tmp_iter_node);
+ loop->aux = NULL;
+ continue;
+ }
+
+ loop->aux = tmp_iter_node;
+
+ if (!dbg_cnt (vect_loop))
+ {
+ any_ifcvt_loops = true;
+ break;
+ }
+
+ if (dump_enabled_p ())
+ {
+ dump_printf (MSG_NOTE, "\nLoop is vectorizable.\n");
+ if (dump_file)
+ dump_iter_node (tmp_iter_node, dump_file);
+ if (alt_dump_file)
+ dump_iter_node (tmp_iter_node, alt_dump_file);
+ }
+
+ gimple *loop_vectorized_call = vect_loop_vectorized_call (loop);
+ /* If the loop is vectorized, set uid of stmts within scalar loop to
+ 0. This change is needed if transform phase uses this loop info. */
+ /*if (loop_vectorized_call)
+ set_uid_loop_bbs (loop_vinfo, loop_vectorized_call);*/
+
+ /* TODO: Insert call to transformation entry point. */
+
+ num_vectorized_loops++;
+ /* Now that the loop has been vectorized, allow it to be unrolled
+ etc. */
+ loop->force_vectorize = false;
+ if (loop_vectorized_call)
+ {
+ fold_loop_vectorized_call (loop_vectorized_call, boolean_true_node);
+ ret |= TODO_cleanup_cfg;
+ }
+ }
+
+
+ vect_location = UNKNOWN_LOCATION;
+
+ statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
+ if (dump_enabled_p ()
+ || (num_vectorized_loops > 0 && dump_enabled_p ()))
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vectorized %u loops in function.\n",
+ num_vectorized_loops);
+
+
+ if (any_ifcvt_loops)
+ for (i = 1; i < vect_loops_num; i++)
+ {
+ loop = get_loop (cfun, i);
+ if (loop && loop->dont_vectorize)
+ {
+ gimple *g = vect_loop_vectorized_call (loop);
+ if (g)
+ {
+ fold_loop_vectorized_call (g, boolean_false_node);
+ ret |= TODO_cleanup_cfg;
+ }
+ }
+ }
+
+ for (i = 1; i < vect_loops_num; i++)
+ {
+ struct ITER_node *inode;
+
+ loop = get_loop (cfun, i);
+ if (!loop)
+ continue;
+ inode = (struct ITER_node *) loop->aux;
+ if (inode)
+ destroy_iter_node_info (inode);
+ loop->aux = NULL;
+ }
+
+ free_stmt_attr_vec ();
+
+ if (num_vectorized_loops > 0)
+ {
+ /* If we vectorized any loop only virtual SSA form needs to be updated.
+ ??? Also while we try hard to update loop-closed SSA form we fail
+ to properly do this in some corner-cases (see PR56286). */
+ rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
+ return TODO_cleanup_cfg;
+ }
+
+ return ret;
+}
+#endif
diff --git a/gcc/tree-vect-unified.h b/gcc/tree-vect-unified.h
new file mode 100644
index 00000000000..95cb8a4bacc
--- /dev/null
+++ b/gcc/tree-vect-unified.h
@@ -0,0 +1,291 @@
+/* Loop Vectorization using unified representation for permute instructions.
+ Copyright (C) 2003-2015 Free Software Foundation, Inc.
+ Contributed by Sameera Deshpande <sameera.deshpande@imgtec.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+
+/* Prim-op ITER : ITER node has 3 main objectives in loop representation using
+ primitive-ops for reordering -
+ 1) Enlist all p-trees representing live destination vectors written within
+ the loop.
+ 2) Record preparatory operations, loop transformations and cleanup operations
+ needed to enable vectorization.
+ 3) Record number of iterations once vector-size reduction is applied on
+ primitive trees. */
+
+struct ITER_node {
+ /* Preamble */
+
+ /* Loop for which the ITER node is created. */
+ struct loop *loop;
+
+ /* Number of iterations - At the beginning, it is 1. After vector-size
+ reduction operation, it has appropriate value. */
+ tree num_iter;
+
+ /* Preparatory statements to be emitted before vectorized loop body. */
+ vec<gimple *> prep_stmts;
+
+ /* If loop peeling is needed - before/after depending upon this variable. */
+ int loop_peel_needed;
+
+ /* Actual loop body */
+
+ /* This vector enlists the statements which are live beyond the loop. Each
+ iteration performs all the operations in this vector in same order. For
+ most of the cases, this vector holds perm-trees responsible for vector
+ updation. */
+ vec<struct primop_tree *> stmts;
+
+ /* Epilogue */
+
+ /* When we have grouped data accesses with gaps, we may introduce invalid
+ memory accesses. We peel the last iteration of the loop to prevent
+ this. */
+ bool peeling_for_gaps;
+
+ /* When the number of iterations is not a multiple of the vector size
+ we need to peel off iterations at the end to form an epilogue loop. */
+ bool peeling_for_niter;
+
+ /* Concluding operations to be performed after loop body - e.g: collapse op
+ on temporary vectors. */
+ vec<gimple *> finish_stmts;
+
+ /* All data references within the loop. */
+ vec<data_reference_p> datarefs;
+
+ /* All data dependences within the loop. */
+ vec<ddr_p> ddrs;
+};
+
+#define ITER_NODE_NITERS(x) (x)->num_iter
+#define ITER_NODE_NITERS_KNOWN_P(x) \
+ (tree_fits_shwi_p ((x)->num_iter) && tree_to_shwi ((x)->num_iter) > 0)
+#define ITER_NODE_LOOP(x) (x)->loop
+#define ITER_NODE_PROLOGUE(x) (x)->prep_stmts
+#define ITER_NODE_LOOP_BODY(x) (x)->stmts
+#define ITER_NODE_EPILOGUE(x) (x)->finish_stmts
+#define ITER_NODE_LOOP_PEEL_NEEDED(x) (x)->loop_peel_needed
+#define ITER_NODE_PEELING_FOR_GAPS(x) (x)->peeling_for_gaps
+#define ITER_NODE_PEELING_FOR_NITER(x) (x)->peeling_for_niter
+#define ITER_NODE_DATA_REFS(x) (x)->datarefs
+#define ITER_NODE_DATA_DEPS(x) (x)->ddrs
+
+enum stmt_use_type {
+ stmt_use_type_undef,
+ stmt_use_type_scalar,
+ stmt_use_type_loop_invariant,
+ stmt_use_type_induction,
+ stmt_use_type_reduction,
+ stmt_use_type_intermediate,
+ stmt_use_type_complex,
+ stmt_use_type_loop_exit_ctrl
+};
+
+struct stmt_attr {
+ enum stmt_vec_info_type type;
+ enum stmt_use_type use_type;
+ tree access_fn;
+ struct primop_tree *ptree;
+ bool probable_root;
+ struct data_reference *dr;
+ tree vectype;
+};
+
+#define STMT_ATTR_USE_TYPE(s) (get_stmt_attr (s))->use_type
+#define STMT_ATTR_ACCESS_FN(s) (get_stmt_attr (s))->access_fn
+#define STMT_ATTR_TREE(s) (get_stmt_attr (s))->ptree
+#define STMT_ATTR_PROOT(s) (get_stmt_attr (s))->probable_root
+#define STMT_ATTR_DR(s) (get_stmt_attr (s))->dr
+#define STMT_ATTR_VECTYPE(s) (get_stmt_attr (s))->vectype
+
+vec<struct stmt_attr *> stmt_attr_vec;
+
+void
+init_stmt_attr_vec (void)
+{
+ gcc_assert (!stmt_attr_vec.exists ());
+ stmt_attr_vec.create (50);
+}
+
+void
+free_stmt_attr_vec (void)
+{
+ gcc_assert (stmt_attr_vec.exists ());
+ stmt_attr_vec.release ();
+}
+
+inline void
+set_stmt_attr (gimple *stmt, struct stmt_attr *info)
+{
+ unsigned int uid = gimple_uid (stmt);
+ if (uid == 0)
+ {
+ gcc_checking_assert (info);
+ uid = stmt_attr_vec.length () + 1;
+ gimple_set_uid (stmt, uid);
+ stmt_attr_vec.safe_push (info);
+ }
+ else
+ {
+ gcc_checking_assert (info == NULL);
+ stmt_attr_vec[uid - 1] = info;
+ }
+}
+
+inline struct stmt_attr *
+get_stmt_attr (gimple *stmt)
+{
+ unsigned int uid = gimple_uid (stmt);
+ if (uid == 0)
+ return NULL;
+
+ return stmt_attr_vec[uid - 1];
+}
+
+/* PRIMOP_TREE : Memory accesses within a loop have definite repetative pattern
+ which can be captured using primitive permute operators which can be used to
+ determine desired permute order for the vector computations. The PRIMOP_TREE
+ is AST which records all computations and permutations required to store
+ destination vector into continuous memory at the end of all iterations of the
+ loop. */
+struct primop_tree {
+ /* Unique ptree ID for dump. */
+ int pid;
+
+ /* stmt_attr number. */
+ int attr_no;
+
+ /* Operation. */
+ int node_op;
+
+ /* Arity of the operation. */
+ int arity;
+
+ /* List of children to this node. */
+ vec<struct primop_tree *> children;
+
+ /* Parent node. */
+ struct primop_tree *parent;
+
+ /* Number of iterations - At the beginning, it is loop_count. After vec-size
+ reduction operation, it is changed to vectorization factor for the
+ operation. */
+ tree iter_count;
+
+ /* Actual vector size supported by target. */
+ int vec_size;
+
+ /* The vector type which should be used to vectorize this node. */
+ tree vec_type;
+
+ /* Set of vector instructions to represent this node. */
+ vec<gimple *> vec_inst;
+
+ /* Instruction cost for vec_size. */
+ int target_cost;
+
+ /* Number of instances of vec_inst needed for iter_count. */
+ int instances;
+
+ /* If the tree is loop-varient, the loops on which this tree depends. */
+ /* TODO: I am not very sure of we need all the ITERs or just innermost
+ affecting loop. However, for now, having list of all loops from inner-
+ most to outer-most. */
+ vec<struct ITER_node *> loop_dependences;
+
+ /* Dependence links if any to other statements. */
+ vec<ddr_p> dependences;
+
+ /* Depth within sub-tree of same type. */
+ int depth;
+
+ union {
+ /* In case of c-node, gimple statement cooresponding to c-op. */
+ gimple * gimple_for_comp;
+
+ /* In case of permute-node, some permute-specific attributes. */
+ struct perm_node {
+ /* The component to be selected for EXTRACT or SPLIT op. */
+ int opd_selector;
+
+ /* Number of partitions for permute op. In case of variable mult-idx,
+ this gives ARITY for ILV and CONCAT as well. For constant mult-idx,
+ ARITY = DIVISION. */
+ int division;
+ tree *var_stride;
+ } val;
+
+ /* ITER-node representing inner loop, if any. */
+ struct ITER_node * inode;
+
+ /* mem_ref without offset information. */
+ struct mem_ref {
+ tree base;
+ tree mult_idx;
+ bool is_read;
+ } memval;
+ } u;
+ void *aux;
+};
+
+#define PT_PID(x) (x)->pid
+#define PT_NODE_OP(x) (x)->node_op
+#define PT_ATTR_NO(x) (x)->attr_no
+#define PT_ARITY(x) (x)->arity
+#define PT_CHILD(x,i) (x)->children[i]
+#define PT_PARENT(x) (x)->parent
+#define PT_ITER_COUNT(x) (x)->iter_count
+#define PT_VEC_SIZE(x) (x)->vec_size
+#define PT_VEC_TYPE(x) (x)->vec_type
+#define PT_VEC_INST(x) (x)->vec_inst
+#define PT_TARGET_COST(x) (x)->target_cost
+#define PT_NUM_INSTANCES(x) (x)->instances
+#define PT_LOOP_DEPENDENCES(x) (x)->loop_dependences
+#define PT_DEP(x) (x)->dependences
+#define PT_DEPTH(x) (x)->depth
+#define PT_COMPUTE_STMT(x) (x)->u.gimple_for_comp
+#define PT_OPERAND_SELECTOR(x) (x)->u.val.opd_selector
+#define PT_DIVISION(x) (x)->u.val.division
+#define PT_VAR_STRIDE(x) (x)->u.val.var_stride
+#define PT_INODE(x) (x)->u.inode
+#define PT_MEMVAL_BASE(x) (x)->u.memval.base
+#define PT_MEMVAL_MULT_IDX(x) (x)->u.memval.mult_idx
+#define PT_MEMVAL_IS_READ(x) (x)->u.memval.is_read
+#define PT_AUX(x) (x)->aux
+
+//struct ITER_node *iter_node;
+
+extern unsigned int vectorize_loops_using_uniop (void);
+extern struct primop_tree * analyze_and_create_ptree (struct primop_tree *,
+ gimple *, struct ITER_node *);
+extern void pretty_print_ptree_vec (pretty_printer *,
+ vec<struct primop_tree*>);
+extern void pretty_print_iter_node (pretty_printer *, struct ITER_node *, int);
+
+enum primop_code {
+ POP_ILV=MAX_TREE_CODES,
+ POP_CONCAT,
+ POP_EXTR,
+ POP_SPLT,
+ POP_COLLAPSE,
+ POP_MEMREF,
+ POP_CONST,
+ POP_INV,
+ POP_ITER};
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 9fbd1836ecb..a8a5760136b 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -399,7 +399,7 @@ vect_stmt_in_region_p (vec_info *vinfo, gimple *stmt)
/* If LOOP has been versioned during ifcvt, return the internal call
guarding it. */
-static gimple *
+gimple *
vect_loop_vectorized_call (struct loop *loop)
{
basic_block bb = loop_preheader_edge (loop)->src;
@@ -435,7 +435,7 @@ vect_loop_vectorized_call (struct loop *loop)
/* Fold LOOP_VECTORIZED internal call G to VALUE and
update any immediate uses of it's LHS. */
-static void
+void
fold_loop_vectorized_call (gimple *g, tree value)
{
tree lhs = gimple_call_lhs (g);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 2cfb72a6fa3..43c187be53d 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -798,7 +798,7 @@ set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info)
/* Return the earlier statement between STMT1 and STMT2. */
-static inline gimple *
+inline gimple *
get_earlier_stmt (gimple *stmt1, gimple *stmt2)
{
unsigned int uid1, uid2;
@@ -1099,6 +1099,9 @@ extern bool vect_analyze_data_ref_accesses (vec_info *);
extern bool vect_prune_runtime_alias_test_list (loop_vec_info);
extern bool vect_check_gather_scatter (gimple *, loop_vec_info,
gather_scatter_info *);
+extern tree vect_check_gather_scatter_1 (gimple *, tree, bool, struct loop *,
+ tree, tree *, tree *, int *);
+
extern bool vect_analyze_data_refs (vec_info *, int *);
extern tree vect_create_data_ref_ptr (gimple *, tree, struct loop *, tree,
tree *, gimple_stmt_iterator *,
@@ -1125,6 +1128,7 @@ extern tree vect_get_new_ssa_name (tree, enum vect_var_kind,
extern tree vect_create_addr_base_for_vector_ref (gimple *, gimple_seq *,
tree, struct loop *,
tree = NULL_TREE);
+extern bool vect_mark_for_runtime_alias_test_1 (ddr_p, loop *);
/* In tree-vect-loop.c. */
/* FORNOW: Used in tree-parloops.c. */
@@ -1136,6 +1140,8 @@ extern loop_vec_info vect_analyze_loop (struct loop *);
/* Drive for loop transformation stage. */
extern void vect_transform_loop (loop_vec_info);
extern loop_vec_info vect_analyze_loop_form (struct loop *);
+extern bool vect_analyze_loop_form_1 (struct loop *, gcond **, tree *, tree *,
+ gcond **);
extern bool vectorizable_live_operation (gimple *, gimple_stmt_iterator *,
slp_tree, int, gimple **);
extern bool vectorizable_reduction (gimple *, gimple_stmt_iterator *,
@@ -1147,6 +1153,7 @@ extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
stmt_vector_for_cost *,
stmt_vector_for_cost *,
stmt_vector_for_cost *);
+extern bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *);
/* In tree-vect-slp.c. */
extern void vect_free_slp_instance (slp_instance);
@@ -1177,5 +1184,7 @@ void vect_pattern_recog (vec_info *);
unsigned vectorize_loops (void);
void vect_destroy_datarefs (vec_info *);
bool vect_stmt_in_region_p (vec_info *, gimple *);
+extern void fold_loop_vectorized_call (gimple *, tree);
+extern gimple *vect_loop_vectorized_call (struct loop *);
#endif /* GCC_TREE_VECTORIZER_H */