aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vectorizer.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vectorizer.c')
-rw-r--r--gcc/tree-vectorizer.c3403
1 files changed, 3403 insertions, 0 deletions
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
new file mode 100644
index 00000000000..d01627223c8
--- /dev/null
+++ b/gcc/tree-vectorizer.c
@@ -0,0 +1,3403 @@
+/* Loop Vectorization
+ Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+ Contributed by Dorit Naishlos <dorit@il.ibm.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA. */
+
+/* Loop Vectorization Pass.
+
+ This pass tries to vectorize loops. This first implementation focuses on
+ simple inner-most loops, with no conditional control flow, and a set of
+ simple operations which vector form can be expressed using existing
+ tree codes (PLUS, MULT etc).
+
+ For example, the vectorizer transforms the following simple loop:
+
+ short a[N]; short b[N]; short c[N]; int i;
+
+ for (i=0; i<N; i++){
+ a[i] = b[i] + c[i];
+ }
+
+ as if it was manually vectorized by rewriting the source code into:
+
+ typedef int __attribute__((mode(V8HI))) v8hi;
+ short a[N]; short b[N]; short c[N]; int i;
+ v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
+ v8hi va, vb, vc;
+
+ for (i=0; i<N/8; i++){
+ vb = pb[i];
+ vc = pc[i];
+ va = vb + vc;
+ pa[i] = va;
+ }
+
+ The main entry to this pass is vectorize_loops(), in which for each
+ the vectorizer applies a set of analyses on a given set of loops,
+ followed by the actual vectorization transformation for the loops that
+ had successfully passed the analysis phase.
+
+ Throughout this pass we make a distinction between two types of
+ data: scalars (which are represented by SSA_NAMES), and data-refs. These
+ are handled separately both by the analyzer and the loop-transformer.
+ Currently, the vectorizer only supports simple data-refs which are
+ limited to ARRAY_REFS that represent one dimensional arrays which base is
+ an array (not a pointer), and have a simple (consecutive) access pattern.
+
+ Analysis phase:
+ ===============
+ The driver for the analysis phase is vect_analyze_loop_nest().
+ which applies a set of loop analyses. Some of the analyses rely on the
+ monotonic evolution analyzer developed by Sebastian Pop.
+
+ During the analysis phase the vectorizer records some information
+ per stmt in a stmt_vec_info which is attached to each stmt in the loop,
+ as well as general information about the loop as a whole, which is
+ recorded in a loop_vec_info struct attached to each loop.
+
+ Transformation phase:
+ =====================
+ The loop transformation phase scans all the stmts in the loop, and
+ creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
+ the loop that needs to be vectorized. It insert the vector code sequence
+ just before the scalar stmt S, and records a pointer to the vector code
+ in STMT_VINFO_VEC_STMT (stmt_info) (where stmt_info is the stmt_vec_info
+ struct that is attached to S). This pointer is used for the vectorization
+ of following stmts which use the defs of stmt S. Stmt S is removed
+ only if it has side effects (like changing memory). If stmt S does not
+ have side effects, we currently rely on dead code elimination for
+ removing it.
+
+ For example, say stmt S1 was vectorized into stmt VS1:
+
+ VS1: vb = px[i];
+ S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
+ S2: a = b;
+
+ To vectorize stmt S2, the vectorizer first finds the stmt that defines
+ the operand 'b' (S1), and gets the relevant vector def 'vb' from the
+ vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
+ resulting sequence would be:
+
+ VS1: vb = px[i];
+ S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
+ VS2: va = vb;
+ S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
+
+ Operands that are not SSA_NAMEs, are currently limited to array
+ references appearing in load/store operations (like 'x[i]' in S1), and
+ are handled differently.
+
+ Target modelling:
+ =================
+ Currently the only target specific information that is used is the
+ size of the vector (in bytes) - "UNITS_PER_SIMD_WORD", and a target hook
+ "vectype_for_scalar_type" that for a given (scalar) machine mode returns
+ the vector machine_mode to be used. Targets that can support different
+ sizes of vectors, for now will need to specify one value for
+ "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
+
+ Since we only vectorize operations which vector form can be
+ expressed using existing tree codes, to verify that an operation is
+ supported the vectorizer checks the relevant optab at the relevant
+ machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
+ the value found is CODE_FOR_nothing, then there's no target support, and
+ we can't vectorize the stmt. Otherwise - the stmt is transformed.
+
+
+ For additional information on this project see:
+ http://gcc.gnu.org/projects/tree-ssa/vectorization.html
+*/
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "errors.h"
+#include "ggc.h"
+#include "tree.h"
+#include "target.h"
+
+#include "rtl.h"
+#include "basic-block.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "tree-dump.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "tree-fold-const.h"
+#include "expr.h"
+#include "optabs.h"
+#include "tree-chrec.h"
+#include "tree-data-ref.h"
+#include "tree-scalar-evolution.h"
+#include "tree-vectorizer.h"
+#include "tree-pass.h"
+
+/* CHECKME: check for unnecessary include files. */
+
+/* Main analysis functions. */
+static loop_vec_info vect_analyze_loop (struct loop *);
+static loop_vec_info vect_analyze_loop_form (struct loop *);
+static bool vect_analyze_data_refs (loop_vec_info);
+static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
+static bool vect_analyze_scalar_cycles (loop_vec_info);
+static bool vect_analyze_data_ref_dependences (loop_vec_info);
+static bool vect_analyze_data_ref_accesses (loop_vec_info);
+static bool vect_analyze_data_refs_alignment (loop_vec_info);
+static void vect_compute_data_refs_alignment (loop_vec_info);
+static bool vect_analyze_operations (loop_vec_info);
+
+/* Main code transformation functions. */
+static void vect_transform_loop (loop_vec_info);
+static void vect_transform_loop_bound (loop_vec_info);
+static bool vect_transform_stmt (tree, block_stmt_iterator *);
+static tree vect_transform_load (tree, block_stmt_iterator *);
+static tree vect_transform_store (tree, block_stmt_iterator *);
+static tree vect_transform_op (tree, block_stmt_iterator *);
+static tree vect_transform_assignment (tree, block_stmt_iterator *);
+static void vect_align_data_ref (tree, tree);
+static void vect_enhance_data_refs_alignment (loop_vec_info);
+
+/* Utility functions for the analyses. */
+static bool vect_is_supportable_op (tree);
+static bool vect_is_supportable_store (tree);
+static bool vect_is_supportable_load (tree);
+static bool vect_is_supportable_assignment (tree);
+static bool vect_is_simple_use (tree , struct loop *);
+static bool exist_non_indexing_operands_for_use_p (tree, tree);
+static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
+static void vect_mark_relevant (varray_type, tree);
+static bool vect_stmt_relevant_p (tree, loop_vec_info);
+static tree vect_get_loop_niters (struct loop *, int *);
+static void vect_compute_data_ref_alignment
+ (struct data_reference *, loop_vec_info);
+static bool vect_analyze_data_ref_access (struct data_reference *);
+static bool vect_analyze_data_ref_dependence
+ (struct data_reference *, struct data_reference *);
+static bool vect_get_array_first_index (tree, int *);
+static bool vect_force_dr_alignment_p (struct data_reference *);
+
+/* Utility functions for the code transformation. */
+static tree vect_create_destination_var (tree, tree);
+static tree vect_create_data_ref (tree, tree, block_stmt_iterator *);
+static tree vect_create_index_for_array_ref (tree, block_stmt_iterator *);
+static tree get_vectype_for_scalar_type (tree);
+static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
+static tree vect_get_vec_def_for_operand (tree, tree);
+static tree vect_init_vector (tree, tree);
+
+/* General untility functions (CHECKME: where do they belong). */
+static tree get_array_base (tree);
+
+/* Utilities for creation and deletion of vec_info structs. */
+loop_vec_info new_loop_vec_info (struct loop *loop);
+void destroy_loop_vec_info (loop_vec_info);
+stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
+
+/* Define number of arguments for each tree code. */
+
+#define DEFTREECODE(SYM, STRING, TYPE, NARGS) NARGS,
+
+int tree_nargs[] = {
+#include "tree.def"
+
+};
+
+#undef DEFTREECODE
+
+/* Function new_stmt_vec_info.
+
+ Create and initialize a new stmt_vec_info struct for STMT. */
+
+stmt_vec_info
+new_stmt_vec_info (tree stmt, struct loop *loop)
+{
+ stmt_vec_info res;
+ res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
+
+ STMT_VINFO_TYPE (res) = undef_vec_info_type;
+ STMT_VINFO_STMT (res) = stmt;
+ STMT_VINFO_LOOP (res) = loop;
+ STMT_VINFO_RELEVANT_P (res) = 0;
+ STMT_VINFO_VECTYPE (res) = NULL;
+ STMT_VINFO_VEC_STMT (res) = NULL;
+ STMT_VINFO_DATA_REF (res) = NULL;
+
+ return res;
+}
+
+
+/* Function new_loop_vec_info.
+
+ Create and initialize a new loop_vec_info struct for LOOP, as well as
+ stmt_vec_info structs for all the stmts in LOOP. */
+
+loop_vec_info
+new_loop_vec_info (struct loop *loop)
+{
+ loop_vec_info res;
+ basic_block *bbs;
+ block_stmt_iterator si;
+ unsigned int i;
+
+ res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
+
+ bbs = get_loop_body (loop);
+
+ /* Create stmt_info for all stmts in the loop. */
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ basic_block bb = bbs[i];
+ for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+ {
+ tree stmt = bsi_stmt (si);
+ stmt_ann_t ann;
+
+ get_stmt_operands (stmt);
+ ann = stmt_ann (stmt);
+ set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
+ }
+ }
+
+ LOOP_VINFO_LOOP (res) = loop;
+ LOOP_VINFO_BBS (res) = bbs;
+ LOOP_VINFO_EXIT_COND (res) = NULL;
+ LOOP_VINFO_NITERS (res) = -1;
+ LOOP_VINFO_VECTORIZABLE_P (res) = 0;
+ LOOP_VINFO_VECT_FACTOR (res) = 0;
+ VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
+ "loop_write_datarefs");
+ VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
+ "loop_read_datarefs");
+ return res;
+}
+
+
+/* Function destroy_loop_vec_info. */
+
+void
+destroy_loop_vec_info (loop_vec_info loop_vinfo)
+{
+ struct loop *loop;
+ basic_block *bbs;
+ int nbbs;
+ block_stmt_iterator si;
+ int j;
+
+ if (!loop_vinfo)
+ return;
+
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
+
+ bbs = LOOP_VINFO_BBS (loop_vinfo);
+ nbbs = loop->num_nodes;
+
+ for (j = 0; j < nbbs; j++)
+ {
+ basic_block bb = bbs[j];
+ for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+ {
+ tree stmt = bsi_stmt (si);
+ stmt_ann_t ann = stmt_ann (stmt);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ free (stmt_info);
+ set_stmt_info (ann, NULL);
+ }
+ }
+
+ free (LOOP_VINFO_BBS (loop_vinfo));
+ varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
+ varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
+
+ free (loop_vinfo);
+}
+
+
+/* Function vect_force_dr_alignment_p
+
+ Returned whether the alignment of a certain data structure can be forced. */
+
+static bool
+vect_force_dr_alignment_p (struct data_reference *dr)
+{
+ tree ref = DR_REF (dr);
+ tree array_base;
+
+ if (TREE_CODE (ref) != ARRAY_REF)
+ return false;
+
+ array_base = get_array_base (ref);
+
+ /* We want to make sure that we can force alignment of
+ the data structure that is being accessed, because we do not
+ handle misalignment yet.
+
+ CHECKME: Is this a correct check for this purpose?
+ CHECKME: This is a very strict check.
+ CHECKME: Can we force the alignment of external decls?
+ */
+
+ if (TREE_CODE (TREE_TYPE (array_base)) != ARRAY_TYPE
+ || TREE_CODE (array_base) != VAR_DECL
+ || DECL_EXTERNAL (array_base))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "unhandled ptr-based array ref\n");
+ if (TREE_CODE (array_base) == VAR_DECL && DECL_EXTERNAL (array_base))
+ fprintf (dump_file,"\nextern decl.\n");
+ }
+ return false;
+ }
+
+ return true;
+}
+
+
+/* Function vect_get_new_vect_var.
+
+ Return a name for a new variable.
+ The current naming scheme appends the prefix "vect_" or "vect_p" to
+ vectorizer generated variables, and appends that to NAME if given.
+
+ CHECKME: naming scheme ok? */
+
+static tree
+vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
+{
+ const char *prefix;
+ int prefix_len;
+ char *vect_var_name;
+ tree new_vect_var;
+
+ if (var_kind == vect_simple_var)
+ prefix = "vect_";
+ else
+ prefix = "vect_p";
+
+ prefix_len = strlen (prefix);
+
+ if (name)
+ {
+ vect_var_name = (char *) xmalloc (strlen (name) + prefix_len + 1);
+ sprintf (vect_var_name, "%s%s", prefix, name);
+ }
+ else
+ {
+ vect_var_name = (char *) xmalloc (prefix_len + 1);
+ sprintf (vect_var_name, "%s", prefix);
+ }
+
+ new_vect_var = create_tmp_var (type, vect_var_name);
+
+ free (vect_var_name);
+ return new_vect_var;
+}
+
+
+/* Function create_index_for_array_ref.
+
+ Create an offset/index to be used to access a memory location.
+ Input:
+
+ STMT: The stmt that contains a data reference to the memory location.
+
+ BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
+ function can be added here, or in the loop pre-header.
+
+ Output:
+
+ Return an index that will be used to index an array, using a pointer as
+ a base.
+
+ FORNOW: we are not trying to be efficient, and just creating the code
+ sequence each time from scratch, even if the same offset can be reused.
+ TODO: record the index in the array_ref_info or the stmt info and reuse
+ it.
+
+ FORNOW: We are only handling array accesses with step 1. */
+
+static tree
+vect_create_index_for_array_ref (tree stmt, block_stmt_iterator *bsi)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ struct loop *loop = STMT_VINFO_LOOP (stmt_info);
+ struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+ tree expr = DR_REF (dr);
+ varray_type access_fns = DR_ACCESS_FNS (dr);
+ tree access_fn;
+ tree scalar_indx;
+ int init_val, step_val;
+ tree init, step;
+ loop_vec_info loop_info = loop->aux;
+ int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_info);
+ bool ok;
+ int array_first_index;
+ int vec_init_val;
+ tree indx_before_incr, indx_after_incr;
+
+ if (TREE_CODE (expr) != ARRAY_REF)
+ abort ();
+
+ /* FORNOW: handle only one dimensional arrays.
+ This restriction will be relaxed in the future. */ /* CHECKME */
+ if (VARRAY_ACTIVE_SIZE (access_fns) != 1)
+ abort ();
+
+ access_fn = DR_ACCESS_FN (dr, 0);
+
+ if (!vect_is_simple_iv_evolution (loop_num (loop), access_fn, &init, &step,
+ true))
+ abort ();
+
+ if (TREE_CODE (init) != INTEGER_CST || TREE_CODE (step) != INTEGER_CST)
+ abort ();
+
+ if (TREE_INT_CST_HIGH (init) != 0 || TREE_INT_CST_HIGH (step) != 0)
+ abort ();
+
+ init_val = TREE_INT_CST_LOW (init);
+ step_val = TREE_INT_CST_LOW (step);
+
+
+ /** Handle initialization. **/
+
+ scalar_indx = TREE_OPERAND (expr, 1);
+
+ /* The actual index depends on the (mis)alignment of the access.
+ FORNOW: we verify that both the array base and the access are
+ aligned, so the index in the vectorized access is simply
+ init_val/vectorization_factor. */
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "creating update chain:\n");
+
+ ok = vect_get_array_first_index (expr, &array_first_index);
+ if (!ok)
+ abort ();
+ vec_init_val = array_first_index +
+ (init_val - array_first_index)/vectorization_factor;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "vec_init_indx = %d\n", vec_init_val);
+
+ init = build_int_2 (vec_init_val, 0);
+ step = integer_one_node;
+
+ /* CHECKME: assuming that bsi_insert is used with BSI_NEW_STMT */
+
+ create_iv (init, step, NULL_TREE, loop, bsi, false,
+ &indx_before_incr, &indx_after_incr);
+
+ return indx_before_incr;
+}
+
+
+/* Function get_vectype_for_scalar_type.
+
+ Return the vector type corresponding to SCALAR_TYPE as supported
+ by the target. */
+
+static tree
+get_vectype_for_scalar_type (tree scalar_type)
+{
+ enum machine_mode inner_mode;
+ enum machine_mode vec_mode;
+ int nbytes;
+ int nunits;
+
+ /* FORNOW: Only a single vector size per target is expected. */
+
+ inner_mode = TYPE_MODE (scalar_type);
+ nbytes = GET_MODE_SIZE (inner_mode);
+
+ if (nbytes == 0)
+ return NULL_TREE;
+
+ nunits = UNITS_PER_SIMD_WORD / nbytes;
+
+ if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
+ vec_mode = MIN_MODE_VECTOR_FLOAT;
+ else
+ vec_mode = MIN_MODE_VECTOR_INT;
+
+ /* CHECKME: This duplicates some of the functionality in build_vector_type;
+ could have directly called build_vector_type_for_mode if exposed. */
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nget vectype for scalar type: ");
+ print_generic_expr (dump_file, scalar_type, TDF_SLIM);
+ fprintf (dump_file, "\n");
+ }
+
+ for (; vec_mode != VOIDmode ; vec_mode = GET_MODE_WIDER_MODE (vec_mode))
+ if (GET_MODE_NUNITS (vec_mode) == nunits
+ && GET_MODE_INNER (vec_mode) == inner_mode
+ && VECTOR_MODE_SUPPORTED_P (vec_mode))
+ return build_vector_type (scalar_type, nunits);
+
+ return NULL_TREE;
+}
+
+
+/* Function vect_align_data_ref
+
+ Handle alignment of a memory accesses.
+
+ FORNOW: Make sure the array is properly aligned. The vectorizer
+ currently does not handle unaligned memory accesses.
+ This restriction will be relaxed in the future.
+
+ FORNOW: data_ref is an array_ref which alignment can be forced; i.e.,
+ the base of the ARRAY_REF is not a pointer but an array.
+ This restriction will be relaxed in the future.
+
+ FORNOW: The array is being accessed starting at location 'init';
+ We limit vectorization to cases in which init % NUNITS == 0
+ (where NUNITS = GET_MODE_NUNITS (TYPE_MODE (vectype))).
+ This restriction will be relaxed in the future. */
+
+static void
+vect_align_data_ref (tree ref, tree stmt)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ tree array_base = get_array_base (ref);
+ struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+
+ if (!aligned_access_p (dr))
+ abort (); /* FORNOW, can't handle misliagned accesses. */
+
+ /* The access is aligned, but some accesses are marked alignd under the
+ assumption that alignment of the base of the data structure will be
+ forced: */
+
+ if (vect_force_dr_alignment_p (dr))
+ {
+ if (DECL_ALIGN (array_base) < TYPE_ALIGN (vectype))
+ {
+ /* CHECKME:
+ - is this the way to force the alignment of an array base?
+ - can it be made to also work for extern decls? */
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "\nforce alignment. before: scalar/vec type_align = %d/%d\n",
+ DECL_ALIGN (array_base), TYPE_ALIGN (vectype));
+
+ DECL_ALIGN (array_base) = TYPE_ALIGN (vectype);
+ }
+ }
+}
+
+
+/* Function vect_create_data_ref.
+
+ Create a memory reference expression for vector access, to be used in a
+ vector load/store stmt.
+
+ Input:
+ STMT: the stmt that references memory
+ FORNOW: a load/store of the form 'var = a[i]'/'a[i] = var'.
+ OP: the operand in STMT that is the memory reference
+ FORNOW: an array_ref.
+ BSI: the block_stmt_iterator where STMT is. Any new stmts created by this
+ function can be added here.
+
+ Output:
+ 1. Declare a new ptr to vector_type, and have it point to the array base.
+ For example, for vector of type V8HI:
+ v8hi *p0;
+ p0 = (v8hi *)&a;
+
+ 3. Return the expression '(*p0)[idx]',
+ where idx is the index used for the scalar expr.
+
+ FORNOW: handle only simple array accesses (step 1). */
+
+static tree
+vect_create_data_ref (tree ref, tree stmt, block_stmt_iterator *bsi)
+{
+ tree new_base;
+ tree data_ref;
+ tree idx;
+ tree vec_stmt;
+ tree new_temp;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ tree ptr_type;
+ tree array_ptr;
+ tree array_base;
+ vdef_optype vdefs = STMT_VDEF_OPS (stmt);
+ vuse_optype vuses = STMT_VUSE_OPS (stmt);
+ int nvuses = 0, nvdefs = 0;
+ int i;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "create array_ref of type:\n");
+ print_generic_expr (dump_file, vectype, TDF_SLIM);
+ }
+
+ vect_align_data_ref (ref, stmt);
+ array_base = get_array_base (ref);
+
+ /*** create: vectype *p; ***/
+ ptr_type = build_pointer_type (vectype);
+ array_ptr = vect_get_new_vect_var (ptr_type, vect_pointer_var,
+ get_name (array_base));
+ add_referenced_tmp_var (array_ptr);
+ if (TREE_CODE (array_base) == VAR_DECL)
+ {
+ get_var_ann (array_ptr)->type_mem_tag = array_base;
+ bitmap_set_bit (vars_to_rename, var_ann (array_base)->uid);
+ }
+ else
+ {
+ /* FORNOW. This restriction will be relaxed in the future. */
+ abort ();
+ }
+
+ /* CHECKME: update name_mem_tag as well? */
+
+ /* Also mark for renaming all aliased variables: */ /* CHECKME */
+ if (vuses)
+ nvuses = NUM_VUSES (vuses);
+ if (vdefs)
+ nvdefs = NUM_VDEFS (vdefs);
+ for (i = 0; i < nvuses; i++)
+ {
+ tree use = VUSE_OP (vuses, i);;
+ if (TREE_CODE (use) == SSA_NAME)
+ bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
+ }
+ for (i = 0; i < nvdefs; i++)
+ {
+ tree def = VDEF_RESULT (vdefs, i);
+ if (TREE_CODE (def) == SSA_NAME)
+ bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
+ }
+
+ /*** create: p = (vectype *)&a; ***/
+ vec_stmt = build (MODIFY_EXPR, void_type_node, array_ptr,
+ build1 (NOP_EXPR, ptr_type,
+ build1 (ADDR_EXPR,
+ build_pointer_type (TREE_TYPE (array_base)), array_base)));
+ TREE_ADDRESSABLE (array_base) = 1;
+ new_temp = make_ssa_name (array_ptr, vec_stmt);
+ TREE_OPERAND (vec_stmt, 0) = new_temp;
+ bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
+
+ idx = vect_create_index_for_array_ref (stmt, bsi);
+
+ /*** create data ref: '(*p)[idx]' ***/
+
+ new_base = build1 (INDIRECT_REF, build_array_type (vectype, 0),
+ TREE_OPERAND (vec_stmt, 0));
+ data_ref = build (ARRAY_REF, vectype, new_base, idx);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ print_generic_expr (dump_file, data_ref, TDF_SLIM);
+
+ return data_ref;
+}
+
+
+/* Function vect_create_destination_var
+
+ Create a new temporary of type VECTYPE. */
+
+static tree
+vect_create_destination_var (tree scalar_dest, tree vectype)
+{
+ tree vec_dest;
+ const char *new_name;
+
+ if (TREE_CODE (scalar_dest) != SSA_NAME)
+ abort ();
+
+ new_name = get_name (scalar_dest);
+ if (!new_name)
+ new_name = "var_";
+ vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
+ add_referenced_tmp_var (vec_dest);
+
+ /* FIXME: introduce new type. */
+ TYPE_ALIAS_SET (TREE_TYPE (vec_dest)) =
+ TYPE_ALIAS_SET (TREE_TYPE (scalar_dest));
+
+ return vec_dest;
+}
+
+
+/* Function vect_init_vector.
+
+ Insert a new stmt (INIT_STMT) that initializes a new vector veriable with
+ the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
+ used in the vectorization of STMT. */
+
+static tree
+vect_init_vector (tree stmt, tree vector_var)
+{
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+ struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
+ block_stmt_iterator pre_header_bsi;
+ tree new_var;
+ tree init_stmt;
+ tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+ tree vec_oprnd;
+
+ new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
+ add_referenced_tmp_var (new_var);
+ bitmap_set_bit (vars_to_rename, var_ann (new_var)->uid);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ print_generic_expr (dump_file, vector_var, TDF_SLIM);
+ fprintf (dump_file, "\n");
+ }
+
+ init_stmt = build (MODIFY_EXPR, vectype, new_var, vector_var);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ print_generic_expr (dump_file, init_stmt, TDF_SLIM);
+ fprintf (dump_file, "\n");
+ }
+
+ /* CHECKME: Is there a utility for inserting code at the end of a basic block? */
+ pre_header_bsi = bsi_last (loop->pre_header);
+ if (!bsi_end_p (pre_header_bsi)
+ && is_ctrl_stmt (bsi_stmt (pre_header_bsi)))
+ bsi_insert_before (&pre_header_bsi, init_stmt, BSI_NEW_STMT);
+ else
+ bsi_insert_after (&pre_header_bsi, init_stmt, BSI_NEW_STMT);
+
+ vec_oprnd = TREE_OPERAND (init_stmt, 0);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ print_generic_expr (dump_file, vec_oprnd, TDF_SLIM);
+ fprintf (dump_file, "\n");
+ }
+
+ return vec_oprnd;
+}
+
+/* Function vect_get_vec_def_for_operand.
+
+ OP is an operand in STMT. This function returns a (vector) def that will be
+ used in the vectorized counterpart of STMT.
+
+ In the case that OP is an SSA_NAME which is defined in the loop, then
+ STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
+
+ In case OP is an invariant or constant, a new stmt that creates a vector def
+ needs to be introduced. */
+
+static tree
+vect_get_vec_def_for_operand (tree op, tree stmt)
+{
+ tree vec_oprnd;
+
+ if (!op)
+ abort ();
+
+ if (TREE_CODE (op) == SSA_NAME)
+ {
+ tree vec_stmt;
+ tree def_stmt;
+ stmt_vec_info def_stmt_info = NULL;
+
+ def_stmt = SSA_NAME_DEF_STMT (op);
+ def_stmt_info = vinfo_for_stmt (def_stmt);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt:\n");
+ print_generic_expr (dump_file, def_stmt, TDF_SLIM);
+ }
+
+ if (!def_stmt_info)
+ {
+ /* op is defined outside the loop (it is loop invariant).
+ Create 'vec_inv = {inv,inv,..,inv}' */
+
+ tree vec_inv;
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+ int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
+ basic_block bb = bb_for_stmt (def_stmt);
+ struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
+ tree t = NULL_TREE;
+ tree def;
+ int i;
+
+ /* Build a tree with vector elements. */
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nCreate vector_inv.\n");
+
+ if (TREE_CODE (def_stmt) == PHI_NODE)
+ {
+ if (flow_bb_inside_loop_p (loop, bb))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nUnsupported reduction.\n");
+ abort ();
+ }
+ def = PHI_RESULT (def_stmt);
+ }
+ else if (TREE_CODE (def_stmt) == NOP_EXPR)
+ {
+ tree arg = TREE_OPERAND (def_stmt, 0);
+ if (TREE_CODE (arg) != INTEGER_CST && TREE_CODE (arg) != REAL_CST)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nUnsupported NOP_EXPR.\n");
+ abort ();
+ }
+ def = op;
+ }
+ else
+ def = TREE_OPERAND (def_stmt, 0);
+
+ for (i = nunits - 1; i >= 0; --i)
+ {
+ t = tree_cons (NULL_TREE, def, t);
+ }
+
+ vec_inv = build_constructor (vectype, t); /* CHECKME */
+ return vect_init_vector (stmt, vec_inv);
+ }
+
+
+ /* op is defined inside the loop. Get the def from the vectorized stmt.
+ */
+ vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
+
+ if (!vec_stmt)
+ abort ();
+
+ /* CHECKME: any cases where the def we want is not TREE_OPERAND 0? */
+ vec_oprnd = TREE_OPERAND (vec_stmt, 0);
+
+ return vec_oprnd;
+ }
+
+ if (TREE_CODE (op) == INTEGER_CST
+ || TREE_CODE (op) == REAL_CST)
+ {
+ /* Create 'vect_cst_ = {cst,cst,...,cst}' */
+
+ tree vec_cst;
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+ int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
+ tree t = NULL_TREE;
+ int i;
+
+ /* Build a tree with vector elements. */
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nCreate vector_cst.\n");
+ for (i = nunits - 1; i >= 0; --i)
+ {
+ t = tree_cons (NULL_TREE, op, t);
+ }
+ vec_cst = build_vector (vectype, t);
+ return vect_init_vector (stmt, vec_cst);
+ }
+
+ return NULL_TREE;
+}
+
+
+/* Function vect_transfom_assignment.
+
+ STMT performs an assignment (copy). Create a vectorized stmt to replace it,
+ and insert it at BSI. */
+
+static tree
+vect_transform_assignment (tree stmt, block_stmt_iterator *bsi)
+{
+ tree vec_stmt;
+ tree vec_dest;
+ tree scalar_dest;
+ tree op;
+ tree vec_oprnd;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ tree new_temp;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "transform assignment\n");
+
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ abort ();
+
+ /** Handle def. **/
+
+ scalar_dest = TREE_OPERAND (stmt, 0);
+ if (TREE_CODE (scalar_dest) != SSA_NAME)
+ abort ();
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+
+ /** Handle use - get the vectorized def from the defining stmt. **/
+
+ op = TREE_OPERAND (stmt, 1);
+
+ vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
+ if (! vec_oprnd)
+ abort ();
+
+ /** arguments are ready. create the new vector stmt. **/
+
+ vec_stmt = build (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
+ new_temp = make_ssa_name (vec_dest, vec_stmt);
+ TREE_OPERAND (vec_stmt, 0) = new_temp;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "add new stmt\n");
+ print_generic_stmt (dump_file, vec_stmt, TDF_SLIM);
+ }
+ bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
+
+ return vec_stmt;
+}
+
+
+/* Function vect_transfom_op.
+
+ STMT performs a binary or unary operation. Create a vectorized stmt to
+ replace it, and insert it at BSI. */
+
+static tree
+vect_transform_op (tree stmt, block_stmt_iterator *bsi)
+{
+ tree vec_stmt;
+ tree vec_dest;
+ tree scalar_dest;
+ tree operation;
+ tree op0, op1=NULL;
+ tree vec_oprnd0, vec_oprnd1=NULL;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ enum tree_code code;
+ tree new_temp;
+ int op_type;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "transform op\n");
+
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ abort ();
+
+ operation = TREE_OPERAND (stmt, 1);
+
+ /** Handle def. **/
+
+ scalar_dest = TREE_OPERAND (stmt, 0);
+ if (TREE_CODE (scalar_dest) != SSA_NAME)
+ abort ();
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+
+ /** Handle uses - get the vectorized defs from the defining stmts. **/
+
+ /** Distinguish between binary and unary operations. **/
+
+ op_type = tree_nargs[TREE_CODE (operation)];
+
+ if (op_type != unary_op && op_type != binary_op)
+ abort ();
+
+ op0 = TREE_OPERAND (operation, 0);
+ if (op_type == binary_op)
+ op1 = TREE_OPERAND (operation, 1);
+
+ vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
+ if (! vec_oprnd0)
+ abort ();
+
+ if(op_type == binary_op)
+ {
+ vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
+ if (! vec_oprnd1)
+ abort ();
+ }
+
+ /** arguments are ready. create the new vector stmt. **/
+
+ code = TREE_CODE (operation);
+ if (op_type == binary_op)
+ vec_stmt = build (MODIFY_EXPR, vectype, vec_dest,
+ build (code, vectype, vec_oprnd0, vec_oprnd1));
+ else
+ vec_stmt = build (MODIFY_EXPR, vectype, vec_dest,
+ build1 (code, vectype, vec_oprnd0));
+
+ new_temp = make_ssa_name (vec_dest, vec_stmt);
+ TREE_OPERAND (vec_stmt, 0) = new_temp;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "add new stmt\n");
+ print_generic_stmt (dump_file, vec_stmt, TDF_SLIM);
+ }
+ bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
+
+ return vec_stmt;
+}
+
+
+/* Function vect_transfom_store.
+
+ STMT is a store to memory. Create a vectorized stmt to replace it,
+ and insert it at BSI. */
+
+static tree
+vect_transform_store (tree stmt, block_stmt_iterator *bsi)
+{
+ tree scalar_dest;
+ tree vec_stmt;
+ tree data_ref;
+ tree op;
+ tree vec_oprnd1;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "transform store\n");
+
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ abort ();
+
+ /** Handle def. **/
+
+ scalar_dest = TREE_OPERAND (stmt, 0);
+
+ if (TREE_CODE (scalar_dest) != ARRAY_REF)
+ abort ();
+
+ data_ref = vect_create_data_ref (scalar_dest, stmt, bsi);
+ if (!data_ref)
+ abort ();
+
+ /** Handle use - get the vectorized def from the defining stmt. **/
+
+ op = TREE_OPERAND (stmt, 1);
+
+ vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
+ if (! vec_oprnd1)
+ abort ();
+
+ /** Arguments are ready. create the new vector stmt. **/
+
+ vec_stmt = build (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "add new stmt\n");
+ print_generic_stmt (dump_file, vec_stmt, TDF_SLIM);
+ }
+ bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
+
+ if (stmt != bsi_stmt (*bsi))
+ {
+ /* This is expected when an update chain for a data-ref index has been
+ created (by vect_create_index_for_array_ref). The current stmt
+ sequence is as follows:
+
+ (i) some stmt
+ (i+1) vec_stmt (with a data_ref that uses index)
+ (i+2) stmt_to_update_index <-- bsi
+ (i+3) stmt
+
+ The iterator bsi should be bumped to point to stmt at location (i+3)
+ because this is what the driver vect_transform_loop expects. */
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "update chain:\n");
+ print_generic_stmt (dump_file, bsi_stmt (*bsi), TDF_SLIM);
+ }
+ bsi_next (bsi);
+ }
+
+ /* The driver function vect_transform_loop expects bsi to point the last
+ scalar stmt that was vectorized. */
+ if (stmt != bsi_stmt (*bsi))
+ abort ();
+
+ return vec_stmt;
+}
+
+
+/* Function vect_transform_load.
+
+ STMT is a load from memory. Create a vectorized stmt to replace it,
+ and insert it at BSI. */
+
+static tree
+vect_transform_load (tree stmt, block_stmt_iterator *bsi)
+{
+ tree vec_stmt;
+ tree scalar_dest;
+ tree vec_dest = NULL;
+ tree data_ref = NULL;
+ tree op;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ tree new_temp;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "transform load\n");
+
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ abort ();
+
+ /** Handle def. **/
+
+ scalar_dest = TREE_OPERAND (stmt, 0);
+ if (TREE_CODE (scalar_dest) != SSA_NAME)
+ abort ();
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ if (!vec_dest)
+ abort ();
+
+ /** Handle use. **/
+
+ op = TREE_OPERAND (stmt, 1);
+
+ if (TREE_CODE (op) != ARRAY_REF)
+ abort ();
+
+ data_ref = vect_create_data_ref (op, stmt, bsi);
+
+ if (!data_ref)
+ abort ();
+
+ /** Arguments are ready. create the new vector stmt. **/
+
+ vec_stmt = build (MODIFY_EXPR, vectype, vec_dest, data_ref);
+ new_temp = make_ssa_name (vec_dest, vec_stmt);
+ TREE_OPERAND (vec_stmt, 0) = new_temp;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "add new stmt\n");
+ print_generic_stmt (dump_file, vec_stmt, TDF_SLIM);
+ }
+ bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
+
+ if (stmt != bsi_stmt (*bsi))
+ {
+ /* This is expected when an update chain for a data-ref index has been
+ created (by vect_create_index_for_array_ref). The current stmt
+ sequence is as follows:
+
+ (i) some stmt
+ (i+1) vec_stmt (with a data_ref that uses index)
+ (i+2) stmt_to_update_index <-- bsi
+ (i+3) stmt
+
+ The iterator bsi should be bumped to point to stmt at location (i+3)
+ because this is what the driver vect_transform_loop expects. */
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "update chain:\n");
+ print_generic_stmt (dump_file, bsi_stmt (*bsi), TDF_SLIM);
+ }
+ bsi_next (bsi);
+ }
+
+ /* The driver function vect_transform_loop expects bsi to point the last
+ scalar stmt that was vectorized. */
+ if (stmt != bsi_stmt (*bsi))
+ abort ();
+
+ return vec_stmt;
+}
+
+
+/* Function vect_transform_stmt.
+
+ Create a vectorized stmt to replace STMT, and insert it at BSI. */
+
+static bool
+vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
+{
+ bool is_store = false;
+ tree vec_stmt = NULL;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+ switch (STMT_VINFO_TYPE (stmt_info))
+ {
+ case op_vec_info_type:
+ vec_stmt = vect_transform_op (stmt, bsi);
+ break;
+
+ case assignment_vec_info_type:
+ vec_stmt = vect_transform_assignment (stmt, bsi);
+ break;
+
+ case load_vec_info_type:
+ vec_stmt = vect_transform_load (stmt, bsi);
+ break;
+
+ case store_vec_info_type:
+ vec_stmt = vect_transform_store (stmt, bsi);
+ is_store = true;
+ break;
+
+ default:
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "stmt not supported\n");
+ abort ();
+ }
+
+ STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
+
+ return is_store;
+}
+
+
+/* Function vect_transform_loop_bound.
+
+ Create a new exit condition for the loop. */
+
+static void
+vect_transform_loop_bound (loop_vec_info loop_vinfo)
+{
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ edge exit_edge = loop_exit_edge (loop, 0);
+ block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
+ tree indx_before_incr, indx_after_incr;
+ tree orig_cond_expr;
+ int old_N, vf;
+ tree cond_stmt;
+ tree new_loop_bound;
+
+ /* FORNOW: assuming the loop bound is known. */
+ if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ abort ();
+
+ old_N = LOOP_VINFO_NITERS (loop_vinfo);
+ vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
+ /* FORNOW:
+ assuming number-of-iterations divides by the vectorization factor. */
+ if (old_N % vf)
+ abort ();
+
+ orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
+ if (!orig_cond_expr)
+ abort ();
+ if (orig_cond_expr != bsi_stmt (loop_exit_bsi))
+ abort ();
+
+ create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
+ &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
+
+ /* CHECKME: bsi_insert is using BSI_NEW_STMT. We need to bump it back
+ to point to the exit condition. */
+ bsi_next (&loop_exit_bsi);
+ if (bsi_stmt (loop_exit_bsi) != orig_cond_expr)
+ abort ();
+
+ /* new loop exit test: */
+ new_loop_bound = build_int_2 (old_N/vf, 0);
+ cond_stmt =
+ build (COND_EXPR, TREE_TYPE (orig_cond_expr),
+ build (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound),
+ TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
+
+ bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
+
+ /* remove old loop exit test: */
+ bsi_remove (&loop_exit_bsi);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
+}
+
+
+/* Function vect_transform_loop.
+
+ The analysis phase has determined that the loop is vectorizable.
+ Vectorize the loop - created vectorized stmts to replace the scalar
+ stmts in the loop, and update the loop exit condition. */
+
+static void
+vect_transform_loop (loop_vec_info loop_vinfo)
+{
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+ int nbbs = loop->num_nodes;
+ int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ block_stmt_iterator si;
+ int i;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n<<vec_transform_loop>>\n");
+
+ /* CHECKME: FORNOW the vectorizer supports only loops which body consist
+ of one basic block + header. When the vectorizer will support more
+ involved loop forms, the order by which the BBs are traversed need
+ to be considered. */
+
+ for (i = 0; i < nbbs; i++)
+ {
+ basic_block bb = bbs[i];
+
+ for (si = bsi_start (bb); !bsi_end_p (si);)
+ {
+ tree stmt = bsi_stmt (si);
+ stmt_vec_info stmt_info;
+ tree vectype;
+ bool is_store;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\n-----\nvectorizing statement:\n");
+ print_generic_stmt (dump_file, stmt, TDF_SLIM);
+ }
+
+ stmt_info = vinfo_for_stmt (stmt);
+ if (!stmt_info)
+ abort ();
+
+ if (!STMT_VINFO_RELEVANT_P (stmt_info))
+ {
+ bsi_next (&si);
+ continue;
+ }
+
+ /* FORNOW: Verify that all stmts operate on the same number of
+ units and no inner unrolling is necessary. */
+ vectype = STMT_VINFO_VECTYPE (stmt_info);
+ if (GET_MODE_NUNITS (TYPE_MODE (vectype)) != vectorization_factor)
+ abort ();
+
+ /* -------- vectorize statement ------------ */
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "transform statement.\n");
+
+ is_store = vect_transform_stmt (stmt, &si);
+
+ if (is_store)
+ {
+ /* free the attched stmt_vec_info and remove the stmt. */
+ stmt_ann_t ann = stmt_ann (stmt);
+ free (stmt_info);
+ set_stmt_info (ann, NULL);
+
+ bsi_remove (&si);
+ continue;
+ }
+
+ bsi_next (&si);
+
+ } /* stmts in BB */
+ } /* BBs in loop */
+
+
+ vect_transform_loop_bound (loop_vinfo);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n<<Success! loop vectorized.>>\n");
+}
+
+
+/* Function vect_is_simple_use.
+
+ Return whether the vectorization of a stmt, in LOOP, that uses OPERAND is
+ supportable. OPERANDS that can't be vectorized yet are those defined
+ by a reduction operation or some other form of recurrence.
+ Other OPERANDS - defined in the loop, constants and invariants -
+ are supported. */
+
+static bool
+vect_is_simple_use (tree operand, struct loop *loop)
+{
+ tree def_stmt;
+ basic_block bb;
+
+ if (!operand)
+ return false;
+
+ if (TREE_CODE (operand) == SSA_NAME)
+ {
+ def_stmt = SSA_NAME_DEF_STMT (operand);
+
+ if (def_stmt == NULL_TREE)
+ return false;
+
+ if (TREE_CODE (def_stmt) == NOP_EXPR)
+ {
+ tree arg = TREE_OPERAND (def_stmt, 0);
+
+ if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
+ return true;
+
+ return false;
+ }
+
+ bb = bb_for_stmt (def_stmt);
+ if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "use defined in loop phi - some form of reduction.\n");
+ return false;
+ }
+
+ return true;
+ }
+
+ if (TREE_CODE (operand) == INTEGER_CST
+ || TREE_CODE (operand) == REAL_CST)
+ {
+ return true;
+ }
+
+ return false;
+}
+
+
+/* Function vect_is_supportable_op.
+
+ Verify that STMT performs an operation that can be vectorized. */
+
+static bool
+vect_is_supportable_op (tree stmt)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree operation;
+ enum tree_code code;
+ tree op;
+ enum machine_mode vec_mode;
+ optab optab;
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ struct loop *loop = STMT_VINFO_LOOP (stmt_info);
+ int i,op_type;
+
+ /* Is op? */
+
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ return false;
+
+ if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
+ return false;
+
+ operation = TREE_OPERAND (stmt, 1);
+ code = TREE_CODE (operation);
+
+ switch (code)
+ {
+ case PLUS_EXPR:
+ optab = add_optab;
+ break;
+ case MULT_EXPR:
+ optab = smul_optab;
+ break;
+ case MINUS_EXPR:
+ optab = sub_optab;
+ break;
+ case BIT_AND_EXPR:
+ optab = and_optab;
+ break;
+ case BIT_XOR_EXPR:
+ optab = xor_optab;
+ break;
+ case BIT_IOR_EXPR:
+ optab = ior_optab;
+ break;
+ case BIT_NOT_EXPR:
+ optab = one_cmpl_optab;
+ break;
+ default:
+ return false;
+ }
+
+ /* Support only unary or binary operations. */
+
+ op_type = tree_nargs[code];
+ if (op_type != unary_op && op_type != binary_op)
+ return false;
+
+ for (i = 0; i < op_type; i++)
+ {
+ op = TREE_OPERAND (operation, i);
+ if (!vect_is_simple_use (op, loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "use not simple.\n");
+ return false;
+ }
+ }
+
+ /* Supportable by target? */
+
+ if (!optab)
+ return false;
+
+ vec_mode = TYPE_MODE (vectype);
+
+ if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "op not supported by target\n");
+ return false;
+ }
+
+ /* FORNOW: Not considering the cost. */
+
+ STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
+
+ return true;
+}
+
+
+/* Function vect_is_supportable_store.
+
+ Verify that STMT performs a store to memory operation,
+ and can be vectorized. */
+
+static bool
+vect_is_supportable_store (tree stmt)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree scalar_dest;
+ tree op;
+ struct loop *loop = STMT_VINFO_LOOP (stmt_info);
+
+ /* Is vectorizable store? */
+
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ return false;
+
+ scalar_dest = TREE_OPERAND (stmt, 0);
+
+ if (TREE_CODE (scalar_dest) != ARRAY_REF)
+ return false;
+
+ op = TREE_OPERAND (stmt, 1);
+
+ if (!vect_is_simple_use (op, loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "use not simple.\n");
+ return false;
+ }
+
+ if (!STMT_VINFO_DATA_REF (stmt_info))
+ return false;
+
+ /* Previous analysis steps have already verified that the data ref is
+ vectorizable (w.r.t data dependences, access pattern, etc). */
+
+ /* FORNOW: Not considering the cost. */
+
+ STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
+
+ return true;
+}
+
+
+/* Function vect_is_supportable_load.
+
+ Verify that STMT performs a load from memory operation,
+ and can be vectorized. */
+
+static bool
+vect_is_supportable_load (tree stmt)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree scalar_dest;
+ tree op;
+
+ /* Is vectorizable load? */
+
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ return false;
+
+ scalar_dest = TREE_OPERAND (stmt, 0);
+ if (TREE_CODE (scalar_dest) != SSA_NAME)
+ return false;
+
+ op = TREE_OPERAND (stmt, 1);
+
+ if (TREE_CODE (op) != ARRAY_REF)
+ return false;
+
+ if (!STMT_VINFO_DATA_REF (stmt_info))
+ return false;
+
+ /* Previous analysis steps have already verified that the data ref is
+ vectorizable (w.r.t data dependences, access pattern, etc). */
+
+ /* FORNOW: Not considering the cost. */
+
+ STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
+
+ return true;
+}
+
+
+/* Function vect_is_supportable_assignment.
+
+ Verify that STMT performs an assignment, and can be vectorized. */
+
+static bool
+vect_is_supportable_assignment (tree stmt)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree scalar_dest;
+ tree op;
+ struct loop *loop = STMT_VINFO_LOOP (stmt_info);
+
+ /* Is vectorizable assignment? */
+
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ return false;
+
+ scalar_dest = TREE_OPERAND (stmt, 0);
+ if (TREE_CODE (scalar_dest) != SSA_NAME)
+ return false;
+
+ op = TREE_OPERAND (stmt, 1);
+
+ if (!vect_is_simple_use (op, loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "use not simple.\n");
+ return false;
+ }
+
+ STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
+
+ return true;
+}
+
+
+/* Function vect_analyze_operations.
+
+ Scan the loop stmts and make sure they are all vectorizable. */
+
+static bool
+vect_analyze_operations (loop_vec_info loop_vinfo)
+{
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+ int nbbs = loop->num_nodes;
+ block_stmt_iterator si;
+ int vectorization_factor = 0;
+ int i;
+ bool ok;
+ tree scalar_type;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
+
+ for (i = 0; i < nbbs; i++)
+ {
+ basic_block bb = bbs[i];
+
+ for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+ {
+ tree stmt = bsi_stmt (si);
+ int nunits;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype;
+ dataflow_t df;
+ int j, num_uses;
+ vdef_optype vdefs;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\n-------\nexamining statement:\n");
+ print_generic_stmt (dump_file, stmt, TDF_SLIM);
+ }
+
+ if (!stmt_info)
+ abort ();
+
+ /* skip stmts which do not need to be vectorized.
+ this is expected to include:
+ - the COND_EXPR which is the loop exit condition
+ - any LABEL_EXPRs in the loop
+ - computations that are used only for array indexing or loop
+ control */
+
+ if (!STMT_VINFO_RELEVANT_P (stmt_info))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "irrelevant.\n");
+ continue;
+ }
+
+ /* FORNOW: Make sure that the def of this stmt is not used out
+ side the loop. This restriction will be relaxed in the future. */
+ vdefs = STMT_VDEF_OPS (stmt);
+ if (!vdefs) /* CHECKME */
+ {
+ df = get_immediate_uses (stmt);
+ num_uses = num_immediate_uses (df);
+ for (j = 0; j < num_uses; j++)
+ {
+ tree use = immediate_use (df, j);
+ basic_block bb = bb_for_stmt (use);
+ if (!flow_bb_inside_loop_p (loop, bb))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "def used out of loop:\n");
+ print_generic_stmt (dump_file, use, TDF_SLIM);
+ }
+ return false;
+ }
+ }
+ }
+
+ if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "vector stmt in loop!\n");
+ print_generic_stmt (dump_file, stmt, TDF_SLIM);
+ }
+ return false;
+ }
+
+ if (STMT_VINFO_DATA_REF (stmt_info))
+ scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
+ else
+ scalar_type = TREE_TYPE (stmt);
+ vectype = get_vectype_for_scalar_type (scalar_type);
+ if (!vectype)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "no vectype for stmt.\n");
+ print_generic_stmt (dump_file, stmt, TDF_SLIM);
+ }
+ return false;
+ }
+ STMT_VINFO_VECTYPE (stmt_info) = vectype;
+
+ ok = (vect_is_supportable_op (stmt)
+ || vect_is_supportable_assignment (stmt)
+ || vect_is_supportable_load (stmt)
+ || vect_is_supportable_store (stmt));
+
+ if (!ok)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "stmt not supported.\n");
+ print_generic_stmt (dump_file, stmt, TDF_SLIM);
+ }
+ return false;
+ }
+
+ nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "nunits = %d\n", nunits);
+
+ if (vectorization_factor)
+ {
+ /* FORNOW: don't allow mixed units.
+ This restriction will be relaxed in the future. */
+ if (nunits != vectorization_factor)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "mixed types unsupported.\n");
+ print_generic_stmt (dump_file, stmt, TDF_SLIM);
+ }
+ return false;
+ }
+ }
+ else
+ vectorization_factor = nunits;
+ }
+ }
+
+ /* TODO: Analayze cost. Decide if worth while to vectorize. */
+
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+
+ /* FORNOW: handle only cases where the loop bound divides by the
+ vectorization factor. */
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "vectorization_factor = %d, niters = %d\n",
+ vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo));
+
+ if (vectorization_factor == 0
+ || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ || LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "loop bound unknown or doesn't divide by %d\n",
+ vectorization_factor);
+ return false;
+ }
+
+ return true;
+}
+
+
+/* Function exist_non_indexing_operands_for_use_p
+
+ USE is one of the uses attached to STMT. Check if USE is
+ used in STMT for anything other than indexing an array. */
+
+static bool
+exist_non_indexing_operands_for_use_p (tree use, tree stmt)
+{
+ tree operand;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "exist_non_indexing_operands_for_use_p?:\n");
+ print_generic_stmt (dump_file, stmt, TDF_SLIM);
+ }
+
+ /* USE corresponds to some operand in STMT. If there is no data
+ reference in STMT, then any operand that corresponds to USE
+ is not indexing an array. */
+ if (!STMT_VINFO_DATA_REF (stmt_info))
+ return true;
+
+ /* STMT has a data_ref. FORNOW this means that its of one of
+ the following forms:
+ -1- ARRAY_REF = var
+ -2- var = ARRAY_REF
+ (This should have been verified in analyze_data_refs).
+
+ 'var' in the second case corresponds to a def, not a use,
+ so USE cannot correspond to any operands that are not used
+ for array indexing.
+
+ Therefore, all we need to check is if STMT falls into the
+ first case, and whether var corresponds to USE. */
+
+ if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
+ return false;
+
+ operand = TREE_OPERAND (stmt, 1);
+
+ if (TREE_CODE (operand) != SSA_NAME)
+ return false;
+
+ if (operand == use)
+ return true;
+
+ return false;
+}
+
+
+/* Function vect_is_simple_iv_evolution.
+
+ FORNOW: A simple evolution of an induction variables in the loop is
+ considered a polynomial evolution with step 1. */
+
+static bool
+vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
+ tree * step, bool strict)
+{
+ tree init_expr;
+ tree step_expr;
+
+ tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
+
+ /* When there is no evolution in this loop, the evolution function
+ is not "simple". */
+ if (evolution_part == NULL_TREE)
+ return false;
+
+ /* When the evolution is a polynomial of degree >= 2 or
+ exponential, the evolution function is not "simple". */
+ if (TREE_CODE (evolution_part) == POLYNOMIAL_CHREC
+ || TREE_CODE (evolution_part) == EXPONENTIAL_CHREC)
+ return false;
+
+ step_expr = evolution_part;
+ init_expr = initial_condition (access_fn);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nstep: ");
+ print_generic_expr (dump_file, step_expr, TDF_SLIM);
+ fprintf (dump_file, "\ninit: ");
+ print_generic_expr (dump_file, init_expr, TDF_SLIM);
+ fprintf (dump_file, "\n");
+ }
+
+ *init = init_expr;
+ *step = step_expr;
+
+ if (TREE_CODE (step_expr) != INTEGER_CST)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nstep unknown.\n");
+ return false;
+ }
+
+ if (strict)
+ if (!integer_onep (step_expr))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ print_generic_expr (dump_file, step_expr, TDF_SLIM);
+ return false;
+ }
+
+ return true;
+}
+
+
+/* Function vect_analyze_scalar_cycles.
+
+ Examine the cross iteration def-use cycles of scalar variables, by
+ analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
+ cycles that they represent do not impede vectorization.
+
+ FORNOW: Reduction as in the following loop, is not supported yet:
+ loop1:
+ for (i=0; i<N; i++)
+ sum += a[i];
+ The cross-iteration cycle corresponding to variable 'sum' will be
+ considered too complicated and will impede vectorization.
+
+ FORNOW: Induction as in the following loop, is not supported yet:
+ loop2:
+ for (i=0; i<N; i++)
+ a[i] = i;
+
+ However, the following loop *is* vectorizable:
+ loop3:
+ for (i=0; i<N; i++)
+ a[i] = b[i];
+
+ In both loops there exists a def-use cycle for the variable i:
+ loop: i_2 = PHI (i_0, i_1)
+ a[i_2] = ...;
+ i_1 = i_2 + 1;
+ GOTO loop;
+
+ The evolution of the above cycle is considered simple enough,
+ however, we also check that the cycle does not need to be
+ vectorized, i.e - we check that the variable that this cycle
+ defines is only used for array indexing or in stmts that do not
+ need to be vectorized. This is not the case in loop2, but it
+ *is* the case in loop3. */
+
+static bool
+vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
+{
+ tree phi;
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block bb = loop->header;
+ tree dummy;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n<<vect_analyze_scalar_evolutions>>\n");
+
+ for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
+ {
+#if 0
+ int i;
+ int num_uses;
+ dataflow_t df;
+#endif
+ tree access_fn = NULL;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Analyze phi\n");
+ print_generic_expr (dump_file, phi, TDF_SLIM);
+ }
+
+ /* Skip virtual phi's. The data dependences that are associated with
+ virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
+
+ /* CHECKME: correct way to check for a virtual phi? */
+
+ if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "virtual phi. skip.\n");
+ continue;
+ }
+
+ /* Analyze the evolution function. */
+
+ /* FORNOW: The only scalar cross-iteration cycles that we allow are
+ those of the loop induction variable;
+ Furthermore, if that induction variable is used in an operation
+ that needs to be vectorized (i.e, is not solely used to index
+ arrays and check the exit condition) - we do not support its
+ vectorization Yet. */
+
+ /* 1. Verify that it is an IV with a simple enough access pattern. */
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "analyze cycles: call monev analyzer!\n");
+
+ access_fn = instantiate_parameters
+ (loop,
+ analyze_scalar_evolution (loop, PHI_RESULT (phi)));
+
+ if (!access_fn)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "No Access function.");
+ return false;
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Access function of PHI: ");
+ print_generic_expr (dump_file, access_fn, TDF_SLIM);
+ }
+
+ if (!vect_is_simple_iv_evolution (loop_num (loop), access_fn, &dummy,
+ &dummy, false))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "unsupported cross iter cycle.\n");
+ return false;
+ }
+
+#if 0 /* following check is now performed in "vect_is_simple_use" */
+
+ /* 2. Verify that this variable is only used in stmts that do not need
+ to be vectorized.
+ FIXME: the following checks should be applied to other defs in
+ this def-use cycle (not just to the phi result). */
+
+ df = get_immediate_uses (phi);
+ num_uses = num_immediate_uses (df);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "num uses = %d\n", num_uses);
+ for (i = 0; i < num_uses; i++)
+ {
+ tree use = immediate_use (df, i);
+ stmt_vec_info stmt_info = vinfo_for_stmt (use);
+
+ if (!stmt_info)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nused out side the loop??\n");
+ print_generic_expr (dump_file, use, TDF_SLIM);
+ }
+ return false;
+ }
+
+ if (STMT_VINFO_RELEVANT_P (stmt_info)
+ && exist_non_indexing_operands_for_use_p (PHI_RESULT (phi), use))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "\ninduction vectorization. Unsupported.\n");
+ print_generic_expr (dump_file, use, TDF_SLIM);
+ }
+ return false;
+ }
+ }
+
+#endif
+
+ }
+
+ return true;
+}
+
+
+/* Function get_array_base.
+
+ Return the base of the array_ref EXPR. */
+
+static tree
+get_array_base (tree expr)
+{
+ tree expr1;
+ if (TREE_CODE (expr) != ARRAY_REF)
+ abort ();
+
+ expr1 = TREE_OPERAND (expr, 0);
+ while (TREE_CODE (expr1) == ARRAY_REF)
+ expr1 = TREE_OPERAND (expr1, 0);
+
+ return expr1;
+}
+
+
+/* Function vect_analyze_data_ref_dependence.
+
+ Return TRUE if there (might) exist a dependence between a memory-reference
+ DRA and a memory-reference DRB. */
+
+static bool
+vect_analyze_data_ref_dependence (struct data_reference *dra,
+ struct data_reference *drb)
+{
+ /* FORNOW: use most trivial and conservative test. */
+
+ /* CHECKME: this test holds only if the array base is not a pointer.
+ This had been verified by analyze_data_refs.
+ This restriction will be relaxed in the future. */
+
+ if (!array_base_name_differ_p (dra, drb))
+ {
+ enum data_dependence_direction ddd =
+ ddg_direction_between_stmts (DR_STMT (dra), DR_STMT (drb),
+ loop_num (loop_of_stmt (DR_STMT (dra))));
+
+ if (ddd == dir_independent)
+ return true;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "vect_analyze_data_ref_dependence: same base\n");
+ return false;
+ }
+
+ return true;
+}
+
+
+/* Function vect_analyze_data_ref_dependences.
+
+ Examine all the data references in the loop, and make sure there do not
+ exist any data dependences between them.
+
+ FORNOW: We do not construct a data dependence graph and try to deal with
+ dependences, but fail at the first data dependence that we
+ encounter.
+
+ FORNOW: We only handle array references.
+
+ FORNOW: We apply a trivial conservative dependence test. */
+
+static bool
+vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
+{
+ unsigned int i, j;
+ varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
+ varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
+
+ /* examine store-store (output) dependences */
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "compare all store-store pairs\n");
+
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
+ {
+ for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
+ {
+ struct data_reference *dra =
+ VARRAY_GENERIC_PTR (loop_write_refs, i);
+ struct data_reference *drb =
+ VARRAY_GENERIC_PTR (loop_write_refs, j);
+ bool ok = vect_analyze_data_ref_dependence (dra, drb);
+ if (!ok)
+ return false;
+ }
+ }
+
+ /* examine load-store (true/anti) dependences */
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "compare all load-store pairs\n");
+
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
+ {
+ for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
+ {
+ struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
+ struct data_reference *drb =
+ VARRAY_GENERIC_PTR (loop_write_refs, j);
+ bool ok = vect_analyze_data_ref_dependence (dra, drb);
+ if (!ok)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
+/* Function vect_get_array_first_index.
+
+ REF is an array reference. Find the lower bound of the array dimension and
+ return it in ARRAY_FIRST_INDEX (e.g, 0 in C arrays, 1 in Fortran arrays
+ (unless defined otherwise). At the moment, gfortran arrays are represented
+ with a poiner which points to one element lower than the array base, so
+ ARRAY_FIRST_INDEX is currently 0 also for Fortran arrays).
+ Return TRUE if such lower bound was found, and FLASE otherwise. */
+
+static bool
+vect_get_array_first_index (tree ref, int *array_first_index)
+{
+ tree array_start;
+ tree array_base_type;
+ int array_start_val;
+
+ array_base_type = TREE_TYPE (TREE_OPERAND (ref, 0));
+ if (! TYPE_DOMAIN (array_base_type))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "no domain for array base type\n");
+ print_generic_expr (dump_file, array_base_type, TDF_DETAILS);
+ }
+ return false;
+ }
+
+ array_start = TYPE_MIN_VALUE (TYPE_DOMAIN (array_base_type));
+ if (TREE_CODE (array_start) != INTEGER_CST)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "array min val not integer cst\n");
+ print_generic_expr (dump_file, array_start, TDF_DETAILS);
+ }
+ return false;
+ }
+
+ if (TREE_INT_CST_HIGH (array_start) != 0)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "array min val CST_HIGH != 0\n");
+ return false;
+ }
+
+ array_start_val = TREE_INT_CST_LOW (array_start);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ print_generic_expr (dump_file, array_start, TDF_DETAILS);
+ fprintf (dump_file, "\narray min val = %d\n", array_start_val);
+ }
+
+ *array_first_index = array_start_val;
+
+ return true;
+}
+
+
+/* Function vect_compute_data_ref_alignment
+
+ Compute the mislignment of the data reference DR.
+
+ FOR NOW: No analysis is actually performed. Misalignment is calculated
+ only for trivial cases. TODO. */
+
+static void
+vect_compute_data_ref_alignment (struct data_reference *dr,
+ loop_vec_info loop_vinfo ATTRIBUTE_UNUSED)
+{
+ tree stmt = DR_STMT (dr);
+ tree ref = DR_REF (dr);
+ tree vectype;
+ tree access_fn = DR_ACCESS_FN (dr, 0); /* CHECKME */
+ tree init;
+ int init_val;
+ tree scalar_type;
+ int misalign;
+ int array_start_val;
+ bool ok;
+
+ /* Initialize misalignment to unknown. */
+ DR_MISALIGNMENT (dr) = -1;
+
+
+ /* In the special case of an array which alignment can be forced, we may be
+ able to compute more informative information. */
+
+ if (!vect_force_dr_alignment_p (dr))
+ return;
+
+ init = initial_condition (access_fn);
+
+ /* FORNOW: In order to simplify the handling of alignment, we make sure
+ that the first location at which the array is accessed ('init') is on an
+ 'NUNITS' boundary, since we are assuming here that the alignment of the
+ 'array base' is aligned. This is too conservative, since we require that
+ both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
+ NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
+ This should be relaxed in the future. */
+
+ if (init && TREE_CODE (init) != INTEGER_CST)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "init not INTEGER_CST\n");
+ return;
+ }
+
+ /* CHECKME */
+ if (TREE_INT_CST_HIGH (init) != 0)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "init CST_HIGH != 0\n");
+ return;
+ }
+
+ init_val = TREE_INT_CST_LOW (init);
+
+ scalar_type = TREE_TYPE (ref);
+ vectype = get_vectype_for_scalar_type (scalar_type);
+ if (!vectype)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "no vectype for stmt: ");
+ print_generic_expr (dump_file, stmt, TDF_SLIM);
+ fprintf (dump_file, "\nscalar_type: ");
+ print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
+ fprintf (dump_file, "\n");
+ }
+ return;
+ }
+
+ ok = vect_get_array_first_index (ref, &array_start_val);
+ if (!ok)
+ return;
+
+ misalign = (init_val - array_start_val) %
+ GET_MODE_NUNITS (TYPE_MODE (vectype));
+
+ DR_MISALIGNMENT (dr) = misalign;
+
+ return;
+}
+
+
+/* Function vect_compute_data_refs_alignment
+
+ Compute the mislignment of data references in the loop.
+ This pass may take place at function granularity instead of at loop
+ granularity.
+
+ FOR NOW: No analysis is actually performed. Misalignment is calculated
+ only for trivial cases. TODO. */
+
+static void
+vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
+{
+ varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
+ varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
+ unsigned int i;
+
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
+ {
+ struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
+ vect_compute_data_ref_alignment (dr, loop_vinfo);
+ }
+
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
+ {
+ struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
+ vect_compute_data_ref_alignment (dr, loop_vinfo);
+ }
+
+ return;
+}
+
+
+/* Function vect_enhance_data_refs_alignment
+
+ This pass will use loop versioning and loop peeling in order to enhance
+ the alignment of data references in the loop.
+
+ FOR NOW: we assume that whatever versioning/peeling takes place, only the
+ original loop is to be vectorized; Any other loops that are created by
+ the transformations performed in this pass - are not supposed to be
+ vectorized. This restriction will be relaxed.
+
+ FOR NOW: No transformation is actually performed. TODO. */
+
+static void
+vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo ATTRIBUTE_UNUSED)
+{
+ /*
+ This pass will require a cost model to guide it whether to apply peeling
+ or versioning or a combination of the two. For example, the scheme that
+ intel uses when given a loop with several memory accesses, is as follows:
+ choose one memory access ('p') which alignment you want to force by doing
+ peeling. Then, either (1) generate a loop in which 'p' is aligned and all
+ other accesses are not necessarily aligned, or (2) use loop versioning to
+ generate one loop in which all accesses are aligned, and another loop in
+ which only 'p' is necessarily aligned.
+
+ ("Automatic Intra-Register Vectorization for the Intel Architecture",
+ Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
+ Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
+
+ Devising a cost model is the most critical aspect of this work. It will
+ guide us on which access to peel for, whether to use loop versioning, how
+ many versions to create, etc. The cost model will probably consist of
+ generic considerations as well as target specific considerations (on
+ powerpc for example, misaligned stores are more painful than misaligned
+ loads).
+
+ Here is the general steps involved in alignment enhancements:
+
+ -- original loop, before alignment analysis:
+ for (i=0; i<N; i++){
+ x = q[i]; # DR_MISALIGNMENT(q) = unknown
+ p[i] = y; # DR_MISALIGNMENT(p) = unknown
+ }
+
+ -- After vect_compute_data_refs_alignment:
+ for (i=0; i<N; i++){
+ x = q[i]; # DR_MISALIGNMENT(q) = 3
+ p[i] = y; # DR_MISALIGNMENT(p) = unknown
+ }
+
+ -- Possibility 1: we do loop versioning:
+ if (p is aligned) {
+ for (i=0; i<N; i++){ # loop 1A
+ x = q[i]; # DR_MISALIGNMENT(q) = 3
+ p[i] = y; # DR_MISALIGNMENT(p) = 0
+ }
+ }
+ else {
+ for (i=0; i<N; i++){ # loop 1B
+ x = q[i]; # DR_MISALIGNMENT(q) = 3
+ p[i] = y; # DR_MISALIGNMENT(p) = unaligned
+ }
+ }
+
+ -- Possibility 2: we do loop peeling:
+ for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
+ x = q[i];
+ p[i] = y;
+ }
+ for (i = 3; i < N; i++){ # loop 2A
+ x = q[i]; # DR_MISALIGNMENT(q) = 0
+ p[i] = y; # DR_MISALIGNMENT(p) = unknown
+ }
+
+ -- Possibility 3: combination of loop peeling and versioning:
+ for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
+ x = q[i];
+ p[i] = y;
+ }
+ if (p is aligned) {
+ for (i = 3; i<N; i++){ # loop 3A
+ x = q[i]; # DR_MISALIGNMENT(q) = 0
+ p[i] = y; # DR_MISALIGNMENT(p) = 0
+ }
+ }
+ else {
+ for (i = 3; i<N; i++){ # loop 3B
+ x = q[i]; # DR_MISALIGNMENT(q) = 0
+ p[i] = y; # DR_MISALIGNMENT(p) = unaligned
+ }
+ }
+
+ These loops are later passed to loop_transform to be vectorized. The
+ vectorizer will use the alignment information to guide the transformation
+ (whether to generate regular loads/stores, or with special handling for
+ misalignment).
+ */
+
+ return;
+}
+
+
+/* Function vect_analyze_data_refs_alignment
+
+ Analyze the alignment of the data-references in the loop.
+ FOR NOW: Until support fot misliagned accesses is in place, only if all
+ accesses are aligned can the loop be vectorized. This restruction will be
+ relaxed. */
+
+static bool
+vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
+{
+ varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
+ varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
+ unsigned int i;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
+
+
+ /* This pass may take place at function granularity instead of at loop
+ granularity. */
+
+ vect_compute_data_refs_alignment (loop_vinfo);
+
+
+ /* This pass will use loop versioning and loop peeling in order to enhance
+ the alignment of data references in the loop.
+ FOR NOW: we assume that whatever versioning/peeling took place, the
+ original loop is to be vectorized. Any other loops that were created by
+ the transformations performed in this pass - are not supposed to be
+ vectorized. This restriction will be relaxed. */
+
+ vect_enhance_data_refs_alignment (loop_vinfo);
+
+
+ /* Finally, check that loop can be vectorized.
+ FOR NOW: Until support fot misliagned accesses is in place, only if all
+ accesses are aligned can the loop be vectorized. This restruction will be
+ relaxed. */
+
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
+ {
+ struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
+ if (!aligned_access_p (dr))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "first access not aligned.\n");
+ return false;
+ }
+ }
+
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
+ {
+ struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
+ if (!aligned_access_p (dr))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "first access not aligned.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
+/* Function vect_analyze_data_ref_access.
+
+ Analyze the access pattern of the data-reference DR. For now, a data access
+ has to consecutive and aligned to be considered vectorizable. */
+
+static bool
+vect_analyze_data_ref_access (struct data_reference *dr)
+{
+ varray_type access_fns = DR_ACCESS_FNS (dr);
+ tree access_fn;
+ tree init, step;
+
+ /* FORNOW: handle only one dimensional arrays.
+ This restriction will be relaxed in the future. */
+ if (VARRAY_ACTIVE_SIZE (access_fns) != 1)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "multi dimensional array reference.\n");
+ return false;
+ }
+ access_fn = DR_ACCESS_FN (dr, 0);
+
+ if (!vect_is_simple_iv_evolution (loop_num (loop_of_stmt (DR_STMT (dr))),
+ access_fn, &init, &step, true))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "too complicated access function\n");
+ print_generic_expr (dump_file, access_fn, TDF_SLIM);
+ }
+ return false;
+ }
+
+ return true;
+}
+
+
+/* Function vect_analyze_data_ref_accesses.
+
+ Analyze the access pattern of all the data references in the loop.
+
+ FORNOW: the only access pattern that is considered vectorizable is a
+ simple step 1 (consecutive) access.
+
+ FORNOW: handle only one dimensional arrays. */
+
+static bool
+vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
+{
+ unsigned int i;
+ varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
+ varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
+
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
+ {
+ struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
+ bool ok = vect_analyze_data_ref_access (dr);
+ if (!ok)
+ return false;
+ }
+
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
+ {
+ struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
+ bool ok = vect_analyze_data_ref_access (dr);
+ if (!ok)
+ return false;
+ }
+
+ return true;
+}
+
+
+/* Function vect_analyze_data_refs.
+
+ Find all the data references in the loop.
+
+ FORNOW: Handle only one dimensional ARRAY_REFs which base is really an
+ array (not a pointer) which alignment can be forced. This
+ restriction will be relaxed. */
+
+static bool
+vect_analyze_data_refs (loop_vec_info loop_vinfo)
+{
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+ int nbbs = loop->num_nodes;
+ block_stmt_iterator si;
+ int j;
+ struct data_reference *dr;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
+
+ for (j = 0; j < nbbs; j++)
+ {
+ basic_block bb = bbs[j];
+ for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+ {
+ tree stmt = bsi_stmt (si);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ vdef_optype vdefs = STMT_VDEF_OPS (stmt);
+ vuse_optype vuses = STMT_VUSE_OPS (stmt);
+ varray_type *datarefs = NULL;
+ int nvuses = 0, nvdefs = 0;
+ tree ref = NULL;
+ tree array_base;
+
+ /* CHECKME: Relying on the fact that there exists a data-ref
+ in stmt, if and only if it has vuses/vdefs. */
+
+ if (!vuses && !vdefs)
+ continue;
+
+ if (vuses)
+ nvuses = NUM_VUSES (vuses);
+ if (vdefs)
+ nvdefs = NUM_VDEFS (vdefs);
+
+ if (nvuses + nvdefs != 1)
+ {
+ /* CHECKME: multiple vdefs/vuses in a GIMPLE stmt are
+ assumed to indicate a non vectorizable stmt (e.g, ASM,
+ CALL_EXPR) or the presence of an aliasing problem. The
+ first case is ruled out during vect_analyze_operations;
+ As for the second case, currently the vuses/vdefs are
+ meaningless as they are too conservative. We therefore
+ ignore them. */
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Warning: multiple vops!\n");
+ print_generic_stmt (dump_file, stmt,
+ ~(TDF_RAW | TDF_SLIM | TDF_LINENO));
+ }
+ }
+
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ {
+ /* CHECKME: a vdef/vuse in a GIMPLE stmt is assumed to
+ appear only in a MODIFY_EXPR. */
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "unexpected vops in stmt\n");
+ print_generic_stmt (dump_file, stmt, TDF_SLIM);
+ }
+ return false;
+ }
+
+ if (vuses)
+ {
+ if (TREE_CODE (TREE_OPERAND (stmt, 1)) == ARRAY_REF)
+ {
+ ref = TREE_OPERAND (stmt, 1);
+ datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
+ }
+ }
+
+ if (vdefs)
+ {
+ if (TREE_CODE (TREE_OPERAND (stmt, 0)) == ARRAY_REF)
+ {
+ ref = TREE_OPERAND (stmt, 0);
+ datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
+ }
+ }
+
+ if (!ref)
+ {
+ /* A different type of data reference (pointer?, struct?)
+ FORNOW: Do not attempt to handle. */
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "unhandled non-array data ref\n");
+ print_generic_stmt (dump_file, stmt, TDF_SLIM);
+ }
+ return false;
+ }
+
+ dr = analyze_array (stmt, ref);
+
+ array_base = TREE_OPERAND (ref, 0);
+
+ /* FORNOW: make sure that the array is one dimensional.
+ This restriction will be relaxed in the future. */
+ if (TREE_CODE (array_base) == ARRAY_REF)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "unhandled 2D-array data ref\n");
+ print_generic_stmt (dump_file, stmt, TDF_SLIM);
+ }
+ return false;
+ }
+
+ VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
+ STMT_VINFO_DATA_REF (stmt_info) = dr;
+ }
+ }
+
+ return true;
+}
+
+
+/* Utility functions used by vect_mark_stmts_to_be_vectorized.
+ Implementation inspired by tree-ssa-dce.c. */
+
+/* Function vect_mark_relevant.
+
+ Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
+
+static void
+vect_mark_relevant (varray_type worklist, tree stmt)
+{
+ stmt_vec_info stmt_info;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "mark relevant.\n");
+
+ if (TREE_CODE (stmt) == PHI_NODE)
+ {
+ VARRAY_PUSH_TREE (worklist, stmt);
+ return;
+ }
+
+ stmt_info = vinfo_for_stmt (stmt);
+
+ if (!stmt_info)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "mark relevant: no stmt info!!\n");
+ print_generic_expr (dump_file, stmt, TDF_SLIM);
+ }
+ return;
+ }
+
+ if (STMT_VINFO_RELEVANT_P (stmt_info))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "already marked relevant.\n");
+ return;
+ }
+
+ STMT_VINFO_RELEVANT_P (stmt_info) = 1;
+ VARRAY_PUSH_TREE (worklist, stmt);
+}
+
+
+/* Function vect_stmt_relevant_p.
+
+ Return true if STMT in loop that is represented by LOOP_VINFO is
+ "relevant for vectorization".
+
+ A stmt is considered "relevant for vectorization" if:
+ - it has uses outside the loop.
+ - it has vdefs (it alters memory).
+ - control stmts in the loop (except for the exit condition).
+
+ CHECKME: what other side effects would the vectorizer allow? */
+
+static bool
+vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
+{
+ vdef_optype vdefs;
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ int i;
+ dataflow_t df;
+ int num_uses;
+
+ /* cond stmt other than loop exit cond. */
+ if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
+ return true;
+
+ /* changing memory. */
+ vdefs = STMT_VDEF_OPS (stmt);
+ if (vdefs)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs:\n");
+ return true;
+ }
+
+ /* uses outside the loop. */
+ df = get_immediate_uses (stmt);
+ num_uses = num_immediate_uses (df);
+ for (i = 0; i < num_uses; i++)
+ {
+ tree use = immediate_use (df, i);
+ basic_block bb = bb_for_stmt (use);
+ if (!flow_bb_inside_loop_p (loop, bb))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "vec_stmt_relevant_p: used out of loop:\n");
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+/* Function vect_mark_stmts_to_be_vectorized.
+
+ Not all stmts in the loop need to be vectorized. For example:
+
+ for i...
+ for j...
+ 1. T0 = i + j
+ 2. T1 = a[T0]
+
+ 3. j = j + 1
+
+ Stmt 1 and 3 do not need to be vectorized, because loop control and
+ addressing of vectorized data-refs are handled differently.
+
+ This pass detects such stmts. */
+
+static bool
+vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
+{
+ varray_type worklist;
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+ unsigned int nbbs = loop->num_nodes;
+ block_stmt_iterator si;
+ tree stmt;
+ stmt_ann_t ann;
+ unsigned int i;
+ int j;
+ use_optype use_ops;
+ stmt_vec_info stmt_info;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
+
+ VARRAY_TREE_INIT (worklist, 64, "work list");
+
+ /* 1. Init worklist. */
+
+ for (i = 0; i < nbbs; i++)
+ {
+ basic_block bb = bbs[i];
+ for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+ {
+ stmt = bsi_stmt (si);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "init: stmt relevant?\n");
+ print_generic_stmt (dump_file, stmt, TDF_SLIM);
+ }
+
+ stmt_info = vinfo_for_stmt (stmt);
+ STMT_VINFO_RELEVANT_P (stmt_info) = 0;
+
+ if (vect_stmt_relevant_p (stmt, loop_vinfo))
+ vect_mark_relevant (worklist, stmt);
+ }
+ }
+
+
+ /* 2. Process_worklist */
+
+ while (VARRAY_ACTIVE_SIZE (worklist) > 0)
+ {
+ stmt = VARRAY_TOP_TREE (worklist);
+ VARRAY_POP (worklist);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "worklist: examine stmt:\n");
+ print_generic_stmt (dump_file, stmt, TDF_SLIM);
+ }
+
+ /* Examine the USES in this statement. Mark all the statements which
+ feed this statement's uses as "relevant", unless the USE is used as
+ an array index. */
+
+ if (TREE_CODE (stmt) == PHI_NODE)
+ {
+ /* follow the def-use chain inside the loop. */
+ for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
+ {
+ tree arg = PHI_ARG_DEF (stmt, j);
+ if (TREE_CODE (arg) == SSA_NAME)
+ {
+ tree def_stmt = NULL_TREE;
+ basic_block bb;
+
+ if (TREE_CODE (arg) == SSA_NAME)
+ def_stmt = SSA_NAME_DEF_STMT (arg);
+
+ if (def_stmt == NULL_TREE )
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nworklist: no def_stmt!\n");
+ varray_clear (worklist);
+ return false;
+ }
+
+ if (TREE_CODE (def_stmt) == NOP_EXPR)
+ {
+ tree arg = TREE_OPERAND (def_stmt, 0);
+ if (TREE_CODE (arg) != INTEGER_CST
+ && TREE_CODE (arg) != REAL_CST)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nworklist: NOP def_stmt?\n");
+ varray_clear (worklist);
+ return false;
+ }
+ continue;
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nworklist: def_stmt:\n");
+ print_generic_expr (dump_file, def_stmt, TDF_SLIM);
+ }
+
+ bb = bb_for_stmt (def_stmt);
+ if (flow_bb_inside_loop_p (loop, bb))
+ vect_mark_relevant (worklist, def_stmt);
+ }
+ }
+
+ continue;
+ }
+
+ ann = stmt_ann (stmt);
+ use_ops = USE_OPS (ann);
+
+ for (i = 0; i < NUM_USES (use_ops); i++)
+ {
+ tree use = USE_OP (use_ops, i);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nworklist: examine use %d:\n", i);
+ print_generic_expr (dump_file, use, TDF_SLIM);
+ }
+
+ if (exist_non_indexing_operands_for_use_p (use, stmt))
+ {
+ tree def_stmt = NULL_TREE;
+ basic_block bb;
+
+ if (TREE_CODE (use) == SSA_NAME)
+ def_stmt = SSA_NAME_DEF_STMT (use);
+
+ if (def_stmt == NULL_TREE)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nworklist: no def_stmt!\n");
+ varray_clear (worklist);
+ return false;
+ }
+
+ if (TREE_CODE (def_stmt) == NOP_EXPR)
+ {
+ tree arg = TREE_OPERAND (def_stmt, 0);
+ if (TREE_CODE (arg) != INTEGER_CST
+ && TREE_CODE (arg) != REAL_CST)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nworklist: NOP def_stmt?\n");
+ varray_clear (worklist);
+ return false;
+ }
+ continue;
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nworklist: def_stmt:\n");
+ print_generic_expr (dump_file, def_stmt, TDF_SLIM);
+ }
+
+ bb = bb_for_stmt (def_stmt);
+ if (flow_bb_inside_loop_p (loop, bb))
+ vect_mark_relevant (worklist, def_stmt);
+ }
+ }
+
+ } /* while worklist */
+
+ varray_clear (worklist);
+ return true;
+}
+
+
+/* Function vect_get_loop_niters.
+
+ Determine how many iterations the loop is executed. */
+
+static tree
+vect_get_loop_niters (struct loop *loop, int *number_of_iterations)
+{
+ tree niters;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n<<get_loop_niters>>\n");
+
+ niters = number_of_iterations_in_loop (loop);
+
+ if (niters != NULL_TREE
+ && TREE_CODE (niters) == INTEGER_CST)
+ {
+ *number_of_iterations = TREE_INT_CST_LOW (niters);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "get_loop_niters: %d.\n",
+ *number_of_iterations);
+ }
+
+ return get_loop_exit_condition (loop);
+}
+
+
+/* Function vect_analyze_loop_form.
+
+ Verify the following restrictions:
+ Some of these maybe relaxed in the future.
+
+ - it's an inner-most loop
+ - number of BBs = 2 (which are the loop header and the latch)
+ - the loop has a pre header
+ - the loop has a single entry and exit
+ - the loop exit condition is simple enough */
+
+static loop_vec_info
+vect_analyze_loop_form (struct loop *loop)
+{
+ loop_vec_info loop_vinfo;
+ tree loop_cond;
+ int number_of_iterations = -1;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
+
+ if (loop->level > 1 /* FORNOW: inner-most loop (CHECKME) */
+ || loop->num_exits > 1 || loop->num_entries > 1 || loop->num_nodes != 2
+ || !loop->pre_header || !loop->header || !loop->latch)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "loop_analyzer: bad loop form (entry/exit, nbbs, level...)\n");
+ flow_loop_dump (loop, dump_file, NULL, 1);
+ }
+
+ return NULL;
+ }
+
+ loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
+ if (!loop_cond)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Complicated exit condition.\n");
+ return NULL;
+ }
+
+ if (number_of_iterations < 0)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Can't determine num iters.\n");
+ return NULL;
+ }
+
+ /* CHECKME: check monev analyzer. */
+ if (number_of_iterations == 0)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "0 iterations??\n");
+ return NULL;
+ }
+
+ loop_vinfo = new_loop_vec_info (loop);
+
+ LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
+ LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
+
+ return loop_vinfo;
+}
+
+
+/* Function vect_analyze_loop.
+
+ Apply a set of analyses on LOOP, and create a loop_vec_info struct
+ for it. The different analyses will record information in the
+ loop_vec_info struct. */
+
+static loop_vec_info
+vect_analyze_loop (struct loop *loop)
+{
+ bool ok;
+ loop_vec_info loop_vinfo;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n\n\n<<<<<<< analyze_loop_nest >>>>>>>\n");
+
+ /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
+
+ loop_vinfo = vect_analyze_loop_form (loop);
+ if (!loop_vinfo)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "loop_analyzer: bad loop form.\n");
+ return NULL;
+ }
+
+ /* Find all data references in the loop (which correspond to vdefs/vuses)
+ and analyze their evolution in the loop.
+
+ FORNOW: Handle only simple, one-dimensional, array references, which
+ alignment can be forced. */
+
+ ok = vect_analyze_data_refs (loop_vinfo);
+ if (!ok)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "loop_analyzer: bad data references.\n");
+ destroy_loop_vec_info (loop_vinfo);
+ return NULL;
+ }
+
+
+ /* Data-flow analysis to detect stmts that do not need to be vectorized. */
+
+ ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
+ if (!ok)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "loop_analyzer: unexpected pattern.\n");
+ destroy_loop_vec_info (loop_vinfo);
+ return NULL;
+ }
+
+
+ /* Check that all cross-iteration scalar data-flow cycles are OK.
+ Cross-iteration cycles caused by virtual phis are analyzed separately. */
+
+ ok = vect_analyze_scalar_cycles (loop_vinfo);
+ if (!ok)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "loop_analyzer: bad scalar cycle.\n");
+ destroy_loop_vec_info (loop_vinfo);
+ return NULL;
+ }
+
+
+ /* Analyze data dependences between the data-refs in the loop.
+ FORNOW: We do not construct a data dependence graph and try to deal
+ with dependences, but fail at the first data dependence that
+ we encounter. */
+
+ ok = vect_analyze_data_ref_dependences (loop_vinfo);
+
+ /* TODO: May want to generate run time pointer aliasing checks and
+ loop versioning. */
+
+ /* TODO: May want to perform loop transformations to break dependence
+ cycles. */
+
+ if (!ok)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "loop_analyzer: bad data dependence.\n");
+ destroy_loop_vec_info (loop_vinfo);
+ return NULL;
+ }
+
+
+ /* Analyze the access patterns of the data-refs in the loop (consecutive,
+ complex, etc.). FORNOW: Only handle consecutive access pattern. */
+
+ ok = vect_analyze_data_ref_accesses (loop_vinfo);
+ if (!ok)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "loop_analyzer: bad data access.\n");
+ destroy_loop_vec_info (loop_vinfo);
+ return NULL;
+ }
+
+
+ /* Analyze the alignment of the data-refs in the loop.
+ FORNOW: Only aligned accesses are handled. */
+
+ ok = vect_analyze_data_refs_alignment (loop_vinfo);
+ if (!ok)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "loop_analyzer: bad data alignment.\n");
+ destroy_loop_vec_info (loop_vinfo);
+ return NULL;
+ }
+
+
+ /* Scan all the operations in the loop and make sure they are
+ vectorizable. */
+
+ ok = vect_analyze_operations (loop_vinfo);
+ if (!ok)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "loop_analyzer: bad operations.\n");
+ destroy_loop_vec_info (loop_vinfo);
+ return NULL;
+ }
+
+ /* TODO: May want to collapse conditional code and loop versioning. */
+
+ /* TODO: Alignment: May want to perform loop peeling and/or run time
+ tests and loop versioning. */
+
+ LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
+
+ return loop_vinfo;
+}
+
+
+/* Function indicating whether we ought to include information for 'var'
+ when calculating immediate uses. For this pass we only want use
+ information for non-virtual variables. */
+
+static bool
+need_imm_uses_for (tree var)
+{
+ return is_gimple_reg (var);
+}
+
+
+/* Function vectorize_loops.
+ Entry Point to loop vectorization phase. */
+
+void
+vectorize_loops (struct loops *loops)
+{
+ unsigned int i;
+ unsigned int num_vectorized_loops = 0;
+
+ /* Does the target support SIMD? */
+ /* FORNOW: until more sophisticated machine modelling is in place. */
+ if (!UNITS_PER_SIMD_WORD)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "vectorizer: target vector size is not defined.\n");
+ return;
+ }
+
+ compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
+
+ /* ----------- Analyze loops. ----------- */
+ /* CHECKME */
+ for (i = 1; i < loops->num; i++)
+ {
+ loop_vec_info loop_vinfo;
+ struct loop *loop = loops->parray[i];
+
+ flow_loop_scan (loop, LOOP_ALL);
+
+ loop_vinfo = vect_analyze_loop (loop);
+ loop->aux = loop_vinfo;
+
+#ifndef ANALYZE_ALL_THEN_VECTORIZE_ALL
+ if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
+ continue;
+
+ vect_transform_loop (loop_vinfo);
+ num_vectorized_loops++;
+#endif
+ }
+
+#ifdef ANALYZE_ALL_THEN_VECTORIZE_ALL
+ for (i = 1; i < loops->num; i++)
+ {
+ struct loop *loop = loops->parray[i];
+ loop_vec_info loop_vinfo = loop->aux;
+
+ if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
+ continue;
+
+ vect_transform_loop (loop_vinfo);
+ num_vectorized_loops++;
+ }
+#endif
+
+ if (dump_file && (dump_flags & TDF_STATS))
+ fprintf (dump_file, "vectorized %u loops in function.\n",
+ num_vectorized_loops);
+
+ /* ----------- Finialize. ----------- */
+
+ free_df ();
+ for (i = 1; i < loops->num; i++)
+ {
+ struct loop *loop = loops->parray[i];
+ loop_vec_info loop_vinfo = loop->aux;
+ destroy_loop_vec_info (loop_vinfo);
+ loop->aux = NULL;
+ }
+}