aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJ"orn Rennecke <joern.rennecke@st.com>2009-06-04 17:48:38 +0000
committerJ"orn Rennecke <joern.rennecke@st.com>2009-06-04 17:48:38 +0000
commita3842acd872541a0f94709bb0673d64b81d525f9 (patch)
treeaf9ac765e78883a0af71490a5b493efabf17f870
parent2a9ff8bf117b1d9cdbefb32acaafa46223de7850 (diff)
* tree-parloops.c (parallelize_loops): Don't check for vector phiarc-milepost-branch
nodes when processing a loop that should be executed on a different target than CFUN. * tree-vectorizer.c (vectorize_loops): Also analyze and vectorize loops for different targets than CFUN. * tree-vect-transform.c (vect_transform_loop): If the loop is for a different target than CFUN, don't do alignment peeling. Switch to loop target during transformation and back afterwards. * cfgloop.c (alloc_loop): Initialize target_arch member from CFUN. * tree-ssa-loop.c (pass_vectorize): Add TODO_rebuild_alias to todo_flags_finish. * tree-vect-transform.c (vect_create_addr_base_for_vector_ref): New parameter ptr_alias_set. Changed all callers. git-svn-id: https://gcc.gnu.org/svn/gcc/branches/arc-milepost-branch@148179 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog.multi-target18
-rw-r--r--gcc/cfgloop.c1
-rw-r--r--gcc/tree-parloops.c6
-rw-r--r--gcc/tree-ssa-loop.c2
-rw-r--r--gcc/tree-vect-transform.c35
-rw-r--r--gcc/tree-vectorizer.c39
6 files changed, 75 insertions, 26 deletions
diff --git a/gcc/ChangeLog.multi-target b/gcc/ChangeLog.multi-target
index 8b297811280..515fc6b7f27 100644
--- a/gcc/ChangeLog.multi-target
+++ b/gcc/ChangeLog.multi-target
@@ -1,3 +1,21 @@
+2009-06-04 J"orn Rennecke <joern.rennecke@arc.com>
+
+ * tree-parloops.c (parallelize_loops): Don't check for vector phi
+ nodes when processing a loop that should be executed on a different
+ target than CFUN.
+ * tree-vectorizer.c (vectorize_loops): Also analyze and vectorize
+ loops for different targets than CFUN.
+ * tree-vect-transform.c (vect_transform_loop): If the loop is for a
+ different target than CFUN, don't do alignment peeling. Switch
+ to loop target during transformation and back afterwards.
+
+ * cfgloop.c (alloc_loop): Initialize target_arch member from CFUN.
+
+ * tree-ssa-loop.c (pass_vectorize): Add TODO_rebuild_alias to
+ todo_flags_finish.
+ * tree-vect-transform.c (vect_create_addr_base_for_vector_ref): New
+ parameter ptr_alias_set. Changed all callers.
+
2009-06-01 J"orn Rennecke <joern.rennecke@arc.com>
* target.h (struct gcc_target): Add member ptr_mode.
diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c
index e74284e8988..c1d096420cc 100644
--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@@ -338,6 +338,7 @@ alloc_loop (void)
loop->exits = GGC_CNEW (struct loop_exit);
loop->exits->next = loop->exits->prev = loop->exits;
+ loop->target_arch = cfun->target_arch;
return loop;
}
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 6c24a03bac9..8d4b3a5524c 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -1887,12 +1887,12 @@ parallelize_loops (void)
/* And of course, the loop must be parallelizable. */
|| !can_duplicate_loop_p (loop)
|| loop_has_blocks_with_irreducible_flag (loop)
- /* FIXME: the check for vector phi nodes could be removed. */
- || loop_has_vector_phi_nodes (loop)
|| (loop->target_arch != cfun->target_arch
? !number_of_iterations_exit (loop, single_dom_exit (loop),
&niter_desc, false)
- : (flag_tree_parallelize_loops <= 1
+ /* FIXME: the check for vector phi nodes could be removed. */
+ : (loop_has_vector_phi_nodes (loop)
+ || flag_tree_parallelize_loops <= 1
|| !loop_parallel_p (loop, reduction_list, &niter_desc))))
continue;
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index 6fb1dad0987..b80c288c0dc 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -243,7 +243,7 @@ struct gimple_opt_pass pass_vectorize =
0, /* properties_provided */
0, /* properties_destroyed */
TODO_verify_loops, /* todo_flags_start */
- TODO_dump_func | TODO_update_ssa
+ TODO_dump_func | TODO_update_ssa | TODO_rebuild_alias
| TODO_ggc_collect /* todo_flags_finish */
}
};
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index 13f152ba191..08035a3b8ec 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -53,7 +53,7 @@ static tree vect_create_destination_var (tree, tree);
static tree vect_create_data_ref_ptr
(gimple, struct loop*, tree, tree *, gimple *, bool, bool *, tree);
static tree vect_create_addr_base_for_vector_ref
- (gimple, gimple_seq *, tree, struct loop *);
+ (gimple, gimple_seq *, tree, struct loop *, alias_set_type);
static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
static tree vect_get_vec_def_for_operand (tree, gimple, tree *);
static tree vect_init_vector (gimple, tree, tree, gimple_stmt_iterator *);
@@ -875,7 +875,8 @@ static tree
vect_create_addr_base_for_vector_ref (gimple stmt,
gimple_seq *new_stmt_list,
tree offset,
- struct loop *loop)
+ struct loop *loop,
+ alias_set_type ptr_alias_set)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
@@ -944,10 +945,18 @@ vect_create_addr_base_for_vector_ref (gimple stmt,
/* addr_expr = addr_base */
addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
get_name (base_name));
+ if (ptr_alias_set)
+ DECL_POINTER_ALIAS_SET (addr_expr) = ptr_alias_set;
+ /* FIXME: as addr_expr has no memory tag, alias analysis thinks it
+ 'points-to anything' . */
add_referenced_var (addr_expr);
vec_stmt = fold_convert (vect_ptr_type, addr_base);
addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
get_name (base_name));
+ if (ptr_alias_set)
+ DECL_POINTER_ALIAS_SET (addr_expr2) = ptr_alias_set;
+ /* FIXME: as addr_expr2 has no memory tag, alias analysis thinks it
+ 'points-to anything' . */
add_referenced_var (addr_expr2);
vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr2);
gimple_seq_add_seq (new_stmt_list, seq);
@@ -1034,6 +1043,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
tree indx_before_incr, indx_after_incr;
gimple incr;
tree step;
+ alias_set_type ptr_alias_set = 0;
/* Check the step (evolution) of the load in LOOP, and record
whether it's invariant. */
@@ -1082,7 +1092,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
&& TYPE_RESTRICT (TREE_TYPE (DR_BASE_ADDRESS (dr))))
{
get_alias_set (base_name);
- DECL_POINTER_ALIAS_SET (vect_ptr)
+ DECL_POINTER_ALIAS_SET (vect_ptr) = ptr_alias_set
= DECL_POINTER_ALIAS_SET (SSA_NAME_VAR (DR_BASE_ADDRESS (dr)));
}
@@ -1141,7 +1151,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
/* Create: (&(base[init_val+offset]) in the loop preheader. */
new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
- offset, loop);
+ offset, loop, ptr_alias_set);
pe = loop_preheader_edge (loop);
if (new_stmt_list)
{
@@ -5659,7 +5669,7 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
{
/* Generate the INIT_ADDR computation outside LOOP. */
init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
- NULL_TREE, loop);
+ NULL_TREE, loop, 0);
pe = loop_preheader_edge (loop);
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
gcc_assert (!new_bb);
@@ -7621,7 +7631,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
{
gimple_seq new_stmts = NULL;
tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
- &new_stmts, NULL_TREE, loop);
+ &new_stmts, NULL_TREE, loop, 0);
tree ptr_type = TREE_TYPE (start_addr);
tree size = TYPE_SIZE (ptr_type);
tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
@@ -7859,7 +7869,7 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
/* create: addr_tmp = (int)(address_of_first_vector) */
addr_base =
vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
- NULL_TREE, loop);
+ NULL_TREE, loop, 0);
if (new_stmt_list != NULL)
gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
@@ -8024,10 +8034,10 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
addr_base_a =
vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
- NULL_TREE, loop);
+ NULL_TREE, loop, 0);
addr_base_b =
vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
- NULL_TREE, loop);
+ NULL_TREE, loop, 0);
segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
@@ -8328,6 +8338,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
bool strided_store;
bool slp_scheduled = false;
unsigned int nunits;
+ bool arch_change = loop->target_arch != cfun->target_arch;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vec_transform_loop ===");
@@ -8343,7 +8354,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
/* Peel the loop if there are data refs with unknown alignment.
Only one data ref with unknown store is allowed. */
- if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
+ if (!arch_change && LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
vect_do_peeling_for_alignment (loop_vinfo);
/* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
@@ -8369,6 +8380,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
split_edge (loop_preheader_edge (loop));
+ targetm_pnt = targetm_array[loop->target_arch];
+
/* FORNOW: the vectorizer supports only loops which body consist
of one basic block (header + empty latch). When the vectorizer will
support more involved loop forms, the order by which the BBs are
@@ -8517,6 +8530,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
until all the loops have been transformed? */
update_ssa (TODO_update_ssa);
+ targetm_pnt = targetm_array[cfun->target_arch];
+
if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
fprintf (vect_dump, "LOOP VECTORIZED.");
if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index a455896a6f1..d3791690f3d 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -520,7 +520,7 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
update_phi = gsi_stmt (gsi_update);
/* Virtual phi; Mark it for renaming. We actually want to call
- mar_sym_for_renaming, but since all ssa renaming datastructures
+ mark_sym_for_renaming, but since all ssa renaming datastructures
are going to be freed before we get to call ssa_update, we just
record this name for now in a bitmap, and will mark it for
renaming later. */
@@ -2788,12 +2788,9 @@ vectorize_loops (void)
destroy_loop_vec_info (loop_vinfo, true);
loop_vinfo = 0;
}
- if (best_arch == (int) cfun->target_arch)
- {
- targetm_pnt = targetm_array[best_arch];
- loop_vinfo = vect_analyze_loop (loop);
- target_arch = best_arch;
- }
+ targetm_pnt = targetm_array[best_arch];
+ loop_vinfo = vect_analyze_loop (loop);
+ target_arch = best_arch;
}
targetm_pnt = targetm_array[cfun->target_arch];
loop->aux = loop_vinfo;
@@ -2803,12 +2800,30 @@ vectorize_loops (void)
if (best_arch != (int) cfun->target_arch)
{
- /* This loop should be vectorized for another target. Since we
- might to have more than one thread on this other target, but
- do the reduction on the main processor, leave this to
- parallelize_loops. */
+ /* This loop should be vectorized for another target.
+ We do the vectorization now because, if required, alias checks
+ and a loop version for the aliased case should run on the main
+ target (saving code space on the extra target).
+ Likewise, peeling to obtain the vectorization factor
+ (vect_do_peeling_for_loop_bound) should be done for the main
+ target. ??? We might want to extend this peeling to do
+ a bit of looping to work concurrently with the extra target.
+ ??? This is good for arc-mxp or ppc-spu, but h8300-sh64 would
+ be better off (at least if power is no object once we activate
+ the sh64) doing more work on the sh64.
+ Alignment checks will not be necessary because alignment
+ mismatch is taken care of during data transfer.
+ (Might need to modify this aspect if the DMA mechanism for
+ some target architecture pair as alignment restrictions).
+ Since we might to have more than one thread on this other
+ target, but do the reduction on the main processor, we leave
+ the outlining parallelize_loops.
+ As parallelize_loops will see the vectorized loop, there should
+ be no trouble with a thread other than on the main target
+ gettingvector subunits not making up a full vector.
+ An additional task that vectorization the will have to do now
+ is to translate pointers to use the appropriate ptr_mode. */
loop->target_arch = best_arch;
- continue;
}
vect_transform_loop (loop_vinfo);
num_vectorized_loops++;