aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZdenek Dvorak <dvorakz@suse.cz>2006-10-24 22:14:34 +0000
committerZdenek Dvorak <dvorakz@suse.cz>2006-10-24 22:14:34 +0000
commit3e3045068c3e2a33cc268ec8db120ff0a326c0df (patch)
treec80208991820bbaa2447e9c38aacc692b11cc9e6
parent0e2b1943948bb72433477f6d3aed8cdc485ccc4c (diff)
* tree-ssa-loop-manip.c (split_loop_exit_edge): Return the new block.parloop
* tree.c (build6_stat): New function. * tree.h (build6_stat): Declare. (build6): New macro. * omp-low.c: Include cfgloop.h. (mark_call_virtual_operands): Moved from tree-parloop.c. (expand_omp_parallel, expand_omp_for_static_nochunk): Update loops. (build_omp_regions_1): Enable finding the regions locally. (build_omp_regions_root, omp_expand_local): New functions. (build_omp_regions): Pass new argument to build_omp_regions_1. * tree-parloops.c (loop_parallel_p): Require that there are no loop exit phi nodes, and that the loop can be duplicated. (mark_virtual_ops_for_renaming_list): Removed. (mark_call_virtual_operands): Moved to omp-low.c. (separate_decls_in_loop_name): Handle the case of non-SSA name argument. (separate_decls_in_loop): Do not return name_copies hashtable. (shift_ivs_for_iteration, record_steps, remap_ssa_names, record_and_update_steps, change_exit_condition, extract_loop_to_function, call_in_parallel, shift_ivs_for_remaining_iterations, finalize_threads): Removed. (canonicalize_loop_ivs, transform_to_exit_first_loop, create_parallel_loop): New functions. (gen_parallel_loop): Use OMP tree codes. * tree-flow.h (omp_expand_local, tree_duplicate_sese_tail): Declare. (mark_call_virtual_operands, add_phi_args_after_copy): Declaration removed. (split_loop_exit_edge): Declaration changed. * Makefile.in (omp-low.o): Add CFGLOOP_H dependency. * tree-cfg.c (add_phi_args_after_copy_edge): New function. (add_phi_args_after_copy_bb): Use add_phi_args_after_copy_edge. (add_phi_args_after_copy): Made static. Also copy arguments on one additional edge. (tree_duplicate_sese_region): Fix updating of frequencies. Add argument to add_phi_args_after_copy call. (tree_duplicate_sese_tail): New function. (move_sese_region_to_fn): Update loops. * testsuite/gcc.dg/tree-ssa/parallelization-1.c: Update outcome. git-svn-id: https://gcc.gnu.org/svn/gcc/branches/parloop@118013 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog.parloop42
-rw-r--r--gcc/Makefile.in2
-rw-r--r--gcc/omp-low.c74
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/parallelization-1.c2
-rw-r--r--gcc/tree-cfg.c250
-rw-r--r--gcc/tree-flow.h9
-rw-r--r--gcc/tree-parloops.c820
-rw-r--r--gcc/tree-ssa-loop-manip.c6
-rw-r--r--gcc/tree.c29
-rw-r--r--gcc/tree.h3
10 files changed, 681 insertions, 556 deletions
diff --git a/gcc/ChangeLog.parloop b/gcc/ChangeLog.parloop
index 45a13b36027..a806774d19c 100644
--- a/gcc/ChangeLog.parloop
+++ b/gcc/ChangeLog.parloop
@@ -1,3 +1,45 @@
+2006-10-25 Zdenek Dvorak <dvorakz@suse.cz>
+
+ * tree-ssa-loop-manip.c (split_loop_exit_edge): Return the new block.
+ * tree.c (build6_stat): New function.
+ * tree.h (build6_stat): Declare.
+ (build6): New macro.
+ * omp-low.c: Include cfgloop.h.
+ (mark_call_virtual_operands): Moved from tree-parloop.c.
+ (expand_omp_parallel, expand_omp_for_static_nochunk): Update loops.
+ (build_omp_regions_1): Enable finding the regions locally.
+ (build_omp_regions_root, omp_expand_local): New functions.
+ (build_omp_regions): Pass new argument to build_omp_regions_1.
+ * tree-parloops.c (loop_parallel_p): Require that there are no
+ loop exit phi nodes, and that the loop can be duplicated.
+ (mark_virtual_ops_for_renaming_list): Removed.
+ (mark_call_virtual_operands): Moved to omp-low.c.
+ (separate_decls_in_loop_name): Handle the case of non-SSA name
+ argument.
+ (separate_decls_in_loop): Do not return name_copies hashtable.
+ (shift_ivs_for_iteration, record_steps, remap_ssa_names,
+ record_and_update_steps, change_exit_condition,
+ extract_loop_to_function, call_in_parallel,
+ shift_ivs_for_remaining_iterations, finalize_threads): Removed.
+ (canonicalize_loop_ivs, transform_to_exit_first_loop,
+ create_parallel_loop): New functions.
+ (gen_parallel_loop): Use OMP tree codes.
+ * tree-flow.h (omp_expand_local, tree_duplicate_sese_tail): Declare.
+ (mark_call_virtual_operands, add_phi_args_after_copy): Declaration
+ removed.
+ (split_loop_exit_edge): Declaration changed.
+ * Makefile.in (omp-low.o): Add CFGLOOP_H dependency.
+ * tree-cfg.c (add_phi_args_after_copy_edge): New function.
+ (add_phi_args_after_copy_bb): Use add_phi_args_after_copy_edge.
+ (add_phi_args_after_copy): Made static. Also copy arguments on
+ one additional edge.
+ (tree_duplicate_sese_region): Fix updating of frequencies.
+ Add argument to add_phi_args_after_copy call.
+ (tree_duplicate_sese_tail): New function.
+ (move_sese_region_to_fn): Update loops.
+
+ * testsuite/gcc.dg/tree-ssa/parallelization-1.c: Update outcome.
+
2006-10-16 Zdenek Dvorak <dvorakz@suse.cz>
* tree-pretty-print.c (dump_generic_node): Handle OMP_SECTIONS_SWITCH
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 97d5abd8664..afa9f68975a 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2065,7 +2065,7 @@ gimple-low.o : gimple-low.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) \
omp-low.o : omp-low.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
$(RTL_H) $(TREE_GIMPLE_H) $(TREE_INLINE_H) langhooks.h $(DIAGNOSTIC_H) \
$(TREE_FLOW_H) $(TIMEVAR_H) $(FLAGS_H) $(EXPR_H) toplev.h tree-pass.h \
- $(GGC_H)
+ $(GGC_H) $(CFGLOOP_H)
tree-browser.o : tree-browser.c tree-browser.def $(CONFIG_H) $(SYSTEM_H) \
$(TREE_H) $(TREE_INLINE_H) $(DIAGNOSTIC_H) $(HASHTAB_H) \
$(TM_H) coretypes.h
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 6dde9eacc89..c3bf19e325a 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -41,7 +41,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
#include "tree-pass.h"
#include "ggc.h"
#include "except.h"
-
+#include "cfgloop.h"
/* Lowering of OpenMP parallel and workshare constructs proceeds in two
phases. The first phase scans the function looking for OMP statements
@@ -2573,6 +2573,8 @@ expand_omp_parallel (struct omp_region *region)
gcc_assert (t && TREE_CODE (t) == OMP_PARALLEL);
bsi_remove (&si, true);
e = split_block (entry_bb, t);
+ if (current_loops)
+ add_bb_to_loop (e->dest, e->src->loop_father);
entry_bb = e->dest;
single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
@@ -2606,6 +2608,26 @@ expand_omp_parallel (struct omp_region *region)
update_ssa (TODO_update_ssa_only_virtuals);
}
+/* Marks operands of calls for renaming. */
+
+static void
+mark_call_virtual_operands (void)
+{
+ basic_block bb;
+ block_stmt_iterator bsi;
+ tree stmt;
+
+ FOR_EACH_BB (bb)
+ {
+ for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+ {
+ stmt = bsi_stmt (bsi);
+ if (get_call_expr_in (stmt) != NULL_TREE)
+ mark_new_vars_to_rename (stmt);
+ }
+ }
+}
+
/* A subroutine of expand_omp_for. Generate code for a parallel
loop with any schedule. Given parameters:
@@ -2860,7 +2882,12 @@ expand_omp_for_static_nochunk (struct omp_region *region,
cont_bb = region->cont;
gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
- seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
+
+ if (current_loops)
+ seq_start_bb = loop_split_edge_with (FALLTHRU_EDGE (entry_bb), NULL);
+ else
+ seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
+
body_bb = single_succ (seq_start_bb);
gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
@@ -3598,10 +3625,13 @@ expand_omp (struct omp_region *region)
/* Helper for build_omp_regions. Scan the dominator tree starting at
- block BB. PARENT is the region that contains BB. */
+ block BB. PARENT is the region that contains BB. If SINGLE_TREE is
+ true, the function ends once a single tree is built (otherwise, whole
+ forest of OMP constructs may be built). */
static void
-build_omp_regions_1 (basic_block bb, struct omp_region *parent)
+build_omp_regions_1 (basic_block bb, struct omp_region *parent,
+ bool single_tree)
{
block_stmt_iterator si;
tree stmt;
@@ -3650,12 +3680,44 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent)
}
}
+ if (single_tree && !parent)
+ return;
+
for (son = first_dom_son (CDI_DOMINATORS, bb);
son;
son = next_dom_son (CDI_DOMINATORS, son))
- build_omp_regions_1 (son, parent);
+ build_omp_regions_1 (son, parent, single_tree);
}
+/* Builds the tree of OMP regions rooted at ROOT, storing it to
+ root_omp_region. */
+
+static void
+build_omp_regions_root (basic_block root)
+{
+ gcc_assert (root_omp_region == NULL);
+ build_omp_regions_1 (root, NULL, true);
+ gcc_assert (root_omp_region != NULL);
+}
+
+/* Expands piece of parallel code starting in HEAD. */
+
+void
+omp_expand_local (basic_block head)
+{
+ build_omp_regions_root (head);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nOMP region tree\n\n");
+ dump_omp_region (dump_file, root_omp_region, 0);
+ fprintf (dump_file, "\n");
+ }
+
+ remove_exit_barriers (root_omp_region);
+ expand_omp (root_omp_region);
+
+ free_omp_regions ();
+}
/* Scan the CFG and build a tree of OMP regions. Return the root of
the OMP region tree. */
@@ -3665,7 +3727,7 @@ build_omp_regions (void)
{
gcc_assert (root_omp_region == NULL);
calculate_dominance_info (CDI_DOMINATORS);
- build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL);
+ build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL, false);
}
/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/parallelization-1.c b/gcc/testsuite/gcc.dg/tree-ssa/parallelization-1.c
index 493f34881bb..ee62ff666b8 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/parallelization-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/parallelization-1.c
@@ -28,6 +28,6 @@ int main(void)
/* Check that the first loop in parloop got parallelized. */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops" } } */
-/* { dg-final { scan-tree-dump-times "loopfn" 4 "final_cleanup" } } */
+/* { dg-final { scan-tree-dump-times "loopfn" 5 "final_cleanup" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "final_cleanup" } } */
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 0ac813ae725..105d5d4ce03 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4342,6 +4342,51 @@ tree_duplicate_bb (basic_block bb)
return new_bb;
}
+/* Adds phi node arguments for edge E_COPY after basic block duplication. */
+
+static void
+add_phi_args_after_copy_edge (edge e_copy)
+{
+ basic_block bb, bb_copy = e_copy->src, dest;
+ edge e;
+ edge_iterator ei;
+ tree phi, phi_copy, phi_next, def;
+
+ if (!phi_nodes (e_copy->dest))
+ return;
+
+ bb = bb_copy->flags & BB_DUPLICATED ? get_bb_original (bb_copy) : bb_copy;
+
+ if (e_copy->dest->flags & BB_DUPLICATED)
+ dest = get_bb_original (e_copy->dest);
+ else
+ dest = e_copy->dest;
+
+ e = find_edge (bb, dest);
+ if (!e)
+ {
+ /* During loop unrolling the target of the latch edge is copied.
+ In this case we are not looking for edge to dest, but to
+ duplicated block whose original was dest. */
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if ((e->dest->flags & BB_DUPLICATED)
+ && get_bb_original (e->dest) == dest)
+ break;
+ }
+
+ gcc_assert (e != NULL);
+ }
+
+ for (phi = phi_nodes (e->dest), phi_copy = phi_nodes (e_copy->dest);
+ phi;
+ phi = phi_next, phi_copy = PHI_CHAIN (phi_copy))
+ {
+ phi_next = PHI_CHAIN (phi);
+ def = PHI_ARG_DEF_FROM_EDGE (phi, e);
+ add_phi_arg (phi_copy, def, e_copy);
+ }
+}
/* Basic block BB_COPY was created by code duplication. Add phi node
arguments for edges going out of BB_COPY. The blocks that were
@@ -4350,54 +4395,23 @@ tree_duplicate_bb (basic_block bb)
void
add_phi_args_after_copy_bb (basic_block bb_copy)
{
- basic_block bb, dest;
- edge e, e_copy;
edge_iterator ei;
- tree phi, phi_copy, phi_next, def;
-
- bb = get_bb_original (bb_copy);
+ edge e_copy;
FOR_EACH_EDGE (e_copy, ei, bb_copy->succs)
{
- if (!phi_nodes (e_copy->dest))
- continue;
-
- if (e_copy->dest->flags & BB_DUPLICATED)
- dest = get_bb_original (e_copy->dest);
- else
- dest = e_copy->dest;
-
- e = find_edge (bb, dest);
- if (!e)
- {
- /* During loop unrolling the target of the latch edge is copied.
- In this case we are not looking for edge to dest, but to
- duplicated block whose original was dest. */
- FOR_EACH_EDGE (e, ei, bb->succs)
- if ((e->dest->flags & BB_DUPLICATED)
- && get_bb_original (e->dest) == dest)
- break;
-
- gcc_assert (e != NULL);
- }
-
- for (phi = phi_nodes (e->dest), phi_copy = phi_nodes (e_copy->dest);
- phi;
- phi = phi_next, phi_copy = PHI_CHAIN (phi_copy))
- {
- phi_next = PHI_CHAIN (phi);
- def = PHI_ARG_DEF_FROM_EDGE (phi, e);
- add_phi_arg (phi_copy, def, e_copy);
- }
+ add_phi_args_after_copy_edge (e_copy);
}
}
/* Blocks in REGION_COPY array of length N_REGION were created by
duplication of basic blocks. Add phi node arguments for edges
- going from these blocks. */
+ going from these blocks. If E_COPY is not NULL, also add
+ phi node arguments for its destination. */
-void
-add_phi_args_after_copy (basic_block *region_copy, unsigned n_region)
+static void
+add_phi_args_after_copy (basic_block *region_copy, unsigned n_region,
+ edge e_copy)
{
unsigned i;
@@ -4406,6 +4420,8 @@ add_phi_args_after_copy (basic_block *region_copy, unsigned n_region)
for (i = 0; i < n_region; i++)
add_phi_args_after_copy_bb (region_copy[i]);
+ if (e_copy)
+ add_phi_args_after_copy_edge (e_copy);
for (i = 0; i < n_region; i++)
region_copy[i]->flags &= ~BB_DUPLICATED;
@@ -4504,7 +4520,7 @@ tree_duplicate_sese_region (edge entry, edge exit,
frequencies. */
if (total_freq == 0)
total_freq = 1;
- else if (entry_freq > total_freq)
+ if (entry_freq > total_freq)
entry_freq = total_freq;
}
@@ -4546,7 +4562,154 @@ tree_duplicate_sese_region (edge entry, edge exit,
free (doms);
/* Add the other PHI node arguments. */
- add_phi_args_after_copy (region_copy, n_region);
+ add_phi_args_after_copy (region_copy, n_region, NULL);
+
+ /* Update the SSA web. */
+ update_ssa (TODO_update_ssa);
+
+ if (free_region_copy)
+ free (region_copy);
+
+ free_original_copy_tables ();
+ return true;
+}
+
+/* Duplicates REGION consisting of N_REGION blocks. The new blocks
+ are stored to REGION_COPY in the same order in that they appear
+ in REGION, if REGION_COPY is not NULL. ENTRY is the entry to
+ the region, EXIT an exit from it. The condition guarding EXIT
+ is moved to ENTRY. Returns true if duplication succeeds, false
+ otherwise. */
+
+bool
+tree_duplicate_sese_tail (edge entry, edge exit,
+ basic_block *region, unsigned n_region,
+ basic_block *region_copy)
+{
+ unsigned i, n_doms;
+ bool free_region_copy = false;
+ struct loop *loop = exit->dest->loop_father;
+ struct loop *orig_loop = entry->dest->loop_father;
+ basic_block *doms, switch_bb, entry_bb, nentry_bb;
+ int total_freq = 0, exit_freq = 0;
+ gcov_type total_count = 0, exit_count = 0;
+ edge exits[2], nexits[2], e;
+ block_stmt_iterator bsi;
+ tree cond;
+ edge sorig, snew, single_exit = orig_loop->single_exit;
+
+ gcc_assert (EDGE_COUNT (exit->src->succs) == 2);
+ exits[0] = exit;
+ exits[1] = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
+
+ if (!can_copy_bbs_p (region, n_region))
+ return false;
+
+ /* Some sanity checking. Note that we do not check for all possible
+ missuses of the functions. I.e. if you ask to copy something weird,
+ it will work, but the state of structures probably will not be
+ correct. */
+ for (i = 0; i < n_region; i++)
+ {
+ /* We do not handle subloops, i.e. all the blocks must belong to the
+ same loop. */
+ if (region[i]->loop_father != orig_loop)
+ return false;
+
+ if (region[i] == orig_loop->latch)
+ return false;
+ }
+
+ orig_loop->copy = loop;
+
+ if (!region_copy)
+ {
+ region_copy = XNEWVEC (basic_block, n_region);
+ free_region_copy = true;
+ }
+
+ gcc_assert (!need_ssa_update_p ());
+
+ /* Record blocks outside the region that are dominated by something
+ inside. */
+ doms = XNEWVEC (basic_block, n_basic_blocks);
+ initialize_original_copy_tables ();
+
+ n_doms = get_dominated_by_region (CDI_DOMINATORS, region, n_region, doms);
+
+ if (exit->src->count)
+ {
+ total_count = exit->src->count;
+ exit_count = exit->count;
+ /* Fix up corner cases, to avoid division by zero or creation of negative
+ frequencies. */
+ if (exit_count > total_count)
+ exit_count = total_count;
+ }
+ else
+ {
+ total_freq = exit->src->frequency;
+ exit_freq = EDGE_FREQUENCY (exit);
+ /* Fix up corner cases, to avoid division by zero or creation of negative
+ frequencies. */
+ if (total_freq == 0)
+ total_freq = 1;
+ if (exit_freq > total_freq)
+ exit_freq = total_freq;
+ }
+
+ copy_bbs (region, n_region, region_copy, exits, 2, nexits, orig_loop,
+ split_edge_bb_loc (exit));
+ if (total_count)
+ {
+ scale_bbs_frequencies_gcov_type (region, n_region,
+ total_count - exit_count,
+ total_count);
+ scale_bbs_frequencies_gcov_type (region_copy, n_region, exit_count,
+ total_count);
+ }
+ else
+ {
+ scale_bbs_frequencies_int (region, n_region, total_freq - exit_freq,
+ total_freq);
+ scale_bbs_frequencies_int (region_copy, n_region, exit_freq, total_freq);
+ }
+
+ /* Create the switch block, and put the exit condition to it. */
+ entry_bb = entry->dest;
+ nentry_bb = get_bb_copy (entry_bb);
+ if (!last_stmt (entry->src)
+ || !stmt_ends_bb_p (last_stmt (entry->src)))
+ switch_bb = entry->src;
+ else
+ switch_bb = loop_split_edge_with (entry, NULL);
+ set_immediate_dominator (CDI_DOMINATORS, nentry_bb, switch_bb);
+
+ bsi = bsi_last (switch_bb);
+ cond = last_stmt (exit->src);
+ gcc_assert (TREE_CODE (cond) == COND_EXPR);
+ bsi_insert_after (&bsi, unshare_expr (cond), BSI_NEW_STMT);
+
+ sorig = single_succ_edge (switch_bb);
+ sorig->flags = exits[1]->flags;
+ snew = make_edge (switch_bb, nentry_bb, exits[0]->flags);
+ if (exits[0] == single_exit)
+ orig_loop->single_exit = snew;
+
+ /* Add the PHI node arguments. */
+ add_phi_args_after_copy (region_copy, n_region, snew);
+
+ /* Get rid of now superfluous conditions and associated edges (and phi node
+ arguments). */
+ e = redirect_edge_and_branch (exits[0], exits[1]->dest);
+ PENDING_STMT (e) = NULL_TREE;
+ e = redirect_edge_and_branch (nexits[1], nexits[0]->dest);
+ PENDING_STMT (e) = NULL_TREE;
+
+ /* Anything that is outside of the region, but was dominated by something
+ inside needs to update dominance info. */
+ iterate_fix_dominators (CDI_DOMINATORS, doms, n_doms);
+ free (doms);
/* Update the SSA web. */
update_ssa (TODO_update_ssa);
@@ -4798,8 +4961,10 @@ move_block_to_fn (struct function *dest_cfun, basic_block bb,
basic_block *addr;
tree phi;
- /* Remove BB from dominance structures. */
+ /* Remove BB from dominance and loop structures. */
delete_from_dominance_info (CDI_DOMINATORS, bb);
+ if (current_loops)
+ remove_bb_from_loops (bb);
/* Link BB to the new linked list. */
move_block_after (bb, after);
@@ -5001,6 +5166,7 @@ move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb,
edge e;
edge_iterator ei;
htab_t new_label_map, vars_map;
+ struct loop *loop = entry_bb->loop_father;
saved_cfun = cfun;
@@ -5134,6 +5300,8 @@ move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb,
/* Back in the original function, the SESE region has disappeared,
create a new basic block in its place. */
bb = create_empty_bb (entry_pred[0]);
+ if (current_loops)
+ add_bb_to_loop (bb, loop);
for (i = 0; i < num_entry_edges; i++)
{
e = make_edge (entry_pred[i], bb, entry_flag[i]);
diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h
index b24f3b47521..a774ef78552 100644
--- a/gcc/tree-flow.h
+++ b/gcc/tree-flow.h
@@ -534,6 +534,8 @@ extern struct omp_region *root_omp_region;
extern struct omp_region *new_omp_region (basic_block, enum tree_code,
struct omp_region *);
extern void free_omp_regions (void);
+void omp_expand_local (basic_block);
+
tree copy_var_decl (tree, tree, tree);
tree find_omp_clause (tree, enum tree_code);
@@ -582,8 +584,9 @@ extern tree tree_block_label (basic_block);
extern void extract_true_false_edges_from_block (basic_block, edge *, edge *);
extern bool tree_duplicate_sese_region (edge, edge, basic_block *, unsigned,
basic_block *);
+extern bool tree_duplicate_sese_tail (edge, edge, basic_block *, unsigned,
+ basic_block *);
extern void add_phi_args_after_copy_bb (basic_block);
-extern void add_phi_args_after_copy (basic_block *, unsigned);
extern bool tree_purge_dead_eh_edges (basic_block);
extern bool tree_purge_all_dead_eh_edges (bitmap);
extern tree gimplify_val (block_stmt_iterator *, tree, tree);
@@ -626,7 +629,6 @@ extern void add_referenced_var (tree);
extern void mark_new_vars_to_rename (tree);
extern void find_new_referenced_vars (tree *);
void mark_virtual_ops_for_renaming (tree);
-void mark_call_virtual_operands (void);
extern tree make_rename_temp (tree, const char *);
extern void set_default_def (tree, tree);
@@ -819,7 +821,7 @@ void loop_commit_inserts (void);
bool for_each_index (tree *, bool (*) (tree, tree *, void *), void *);
void create_iv (tree, tree, tree, struct loop *, block_stmt_iterator *, bool,
tree *, tree *);
-void split_loop_exit_edge (edge);
+basic_block split_loop_exit_edge (edge);
unsigned force_expr_to_var_cost (tree);
basic_block bsi_insert_on_edge_immediate_loop (edge, tree);
void standard_iv_increment_position (struct loop *, block_stmt_iterator *,
@@ -1016,4 +1018,5 @@ void swap_tree_operands (tree, tree *, tree *);
extern void recalculate_used_alone (void);
extern bool updating_used_alone;
+
#endif /* _TREE_FLOW_H */
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 36ec003a67b..acf0881adab 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -121,8 +121,7 @@ loop_parallel_p (struct loop *loop, struct tree_niter_desc *niter)
{
tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
- if (is_gimple_reg (val)
- && !expr_invariant_in_loop_p (loop, val))
+ if (is_gimple_reg (val))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " FAILED: value used outside loop\n");
@@ -146,11 +145,18 @@ loop_parallel_p (struct loop *loop, struct tree_niter_desc *niter)
}
}
- datarefs = VEC_alloc (data_reference_p, heap, 10);
- dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
+ /* We need to version the loop to verify assumptions in runtime. */
+ if (!can_duplicate_loop_p (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " FAILED: cannot be duplicated\n");
+ return false;
+ }
/* Check for problems with dependences. If the loop can be reversed,
the iterations are indepent. */
+ datarefs = VEC_alloc (data_reference_p, heap, 10);
+ dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
compute_data_dependences_for_loop (loop, true, &datarefs,
&dependence_relations);
if (dump_file && (dump_flags & TDF_DETAILS))
@@ -174,37 +180,6 @@ loop_parallel_p (struct loop *loop, struct tree_niter_desc *niter)
return ret;
}
-/* Calls mark_virtual_ops_for_renaming for all members of LIST. */
-
-static void
-mark_virtual_ops_for_renaming_list (tree list)
-{
- tree_stmt_iterator tsi;
-
- for (tsi = tsi_start (list); !tsi_end_p (tsi); tsi_next (&tsi))
- mark_virtual_ops_for_renaming (tsi_stmt (tsi));
-}
-
-/* Marks operands of calls for renaming. */
-
-void
-mark_call_virtual_operands (void)
-{
- basic_block bb;
- block_stmt_iterator bsi;
- tree stmt;
-
- FOR_EACH_BB (bb)
- {
- for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
- {
- stmt = bsi_stmt (bsi);
- if (get_call_expr_in (stmt) != NULL_TREE)
- mark_new_vars_to_rename (stmt);
- }
- }
-}
-
/* Assigns the address of VAR in TYPE to an ssa name, and returns this name.
The assignment statement is placed before LOOP. DECL_ADDRESS maps decls
to their addresses that can be reused. */
@@ -377,6 +352,9 @@ separate_decls_in_loop_name (tree name,
struct name_to_copy_elt elt, *nelt;
void **slot, **dslot;
+ if (TREE_CODE (name) != SSA_NAME)
+ return name;
+
idx = SSA_NAME_VERSION (name);
elt.version = idx;
slot = htab_find_slot_with_hash (name_copies, &elt, idx,
@@ -559,16 +537,16 @@ bb1:
`old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
pointer `new' is intentionally not initialized (the loop will be split to a
separate function later, and `new' will be initialized from its arguments).
- *PER_THREAD is updated to the ssa name to that the value is loaded in
- bb1. A mapping of old to new copies is stored to *NAME_COPIES. */
+ */
static void
-separate_decls_in_loop (struct loop *loop, tree *per_thread,
- tree *arg_struct, tree *new_arg_struct,
- htab_t *name_copies)
+separate_decls_in_loop (struct loop *loop, tree *arg_struct,
+ tree *new_arg_struct)
{
basic_block bb1 = loop_split_edge_with (loop_preheader_edge (loop), NULL);
basic_block bb0 = single_pred (bb1);
+ htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
+ name_to_copy_elt_eq, free);
htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
free);
basic_block bb, *body = get_loop_body (loop);
@@ -577,29 +555,21 @@ separate_decls_in_loop (struct loop *loop, tree *per_thread,
block_stmt_iterator bsi;
struct clsn_data clsn_data;
- *name_copies = htab_create (10, name_to_copy_elt_hash,
- name_to_copy_elt_eq, free);
-
/* Find and rename the ssa names defined outside of loop. */
for (i = 0; i < loop->num_nodes; i++)
{
bb = body[i];
for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
- separate_decls_in_loop_stmt (loop, phi, *name_copies, decl_copies);
+ separate_decls_in_loop_stmt (loop, phi, name_copies, decl_copies);
for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
- separate_decls_in_loop_stmt (loop, bsi_stmt (bsi), *name_copies,
+ separate_decls_in_loop_stmt (loop, bsi_stmt (bsi), name_copies,
decl_copies);
}
-
- if (TREE_CODE (*per_thread) == SSA_NAME)
- *per_thread
- = separate_decls_in_loop_name (*per_thread, *name_copies, decl_copies,
- true);
free (body);
- if (htab_elements (*name_copies) == 0)
+ if (htab_elements (name_copies) == 0)
{
/* It may happen that there is nothing to copy (if there are only
loop carried and external variables in the loop). */
@@ -614,14 +584,14 @@ separate_decls_in_loop (struct loop *loop, tree *per_thread,
type);
TYPE_NAME (type) = type_name;
- htab_traverse (*name_copies, add_field_for_name, type);
+ htab_traverse (name_copies, add_field_for_name, type);
layout_type (type);
/* Create the loads and stores. */
*arg_struct = create_tmp_var (type, ".paral_data_store");
- add_referenced_var(*arg_struct);
+ add_referenced_var (*arg_struct);
nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
- add_referenced_var(nvar);
+ add_referenced_var (nvar);
*new_arg_struct = make_ssa_name (nvar, NULL_TREE);
/* We should mark *arg_struct call clobbered. However, this means
@@ -633,209 +603,12 @@ separate_decls_in_loop (struct loop *loop, tree *per_thread,
clsn_data.load = *new_arg_struct;
clsn_data.store_bb = bb0;
clsn_data.load_bb = bb1;
- htab_traverse (*name_copies, create_loads_and_stores_for_name,
+ htab_traverse (name_copies, create_loads_and_stores_for_name,
&clsn_data);
}
htab_delete (decl_copies);
-}
-
-/* Replaces initial values of induction variables in LOOP to the start of the
- PER_THREAD * (thread number - 1)-th iteration. STEPS is the hashtable that
- contains steps of each induction variable. */
-
-static void
-shift_ivs_for_iteration (struct loop *loop, tree per_thread, htab_t steps)
-{
- tree phi, name, step, ncopy, stmts, call, cdecl, type, init, delta, ninit;
- edge entry = loop_preheader_edge (loop);
- struct int_tree_map tem, *elt;
- unsigned ver;
- block_stmt_iterator bsi = bsi_last (entry->src);
- use_operand_p init_p;
-
- for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
- {
- name = PHI_RESULT (phi);
- if (!is_gimple_reg (name))
- continue;
-
- ver = SSA_NAME_VERSION (name);
- tem.uid = ver;
- elt = htab_find_with_hash (steps, &tem, ver);
- step = elt->to;
-
- init_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, entry);
- init = USE_FROM_PTR (init_p);
- type = TREE_TYPE (init);
- cdecl = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
- call = fold_convert (type, build_function_call_expr (cdecl, NULL));
- ncopy = build2 (MINUS_EXPR, type, call, build_int_cst (type, 1));
- delta = fold_build2 (MULT_EXPR, type, unshare_expr (step), per_thread);
- delta = fold_build2 (MULT_EXPR, type, delta, ncopy);
- ninit = fold_build2 (PLUS_EXPR, type, init, delta);
-
- ninit = force_gimple_operand (ninit, &stmts, true, NULL_TREE);
- if (stmts)
- bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
-
- SET_USE (init_p, ninit);
- }
-}
-
-/* Records steps of induction variables of LOOP to STEPS. */
-
-static void
-record_steps (struct loop *loop, htab_t *steps)
-{
- tree phi, name;
- unsigned ver;
- affine_iv iv;
- bool ok;
- void **slot;
- struct int_tree_map tem, *elt;
-
- *steps = htab_create (10, int_tree_map_hash, int_tree_map_eq, free);
- for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
- {
- name = PHI_RESULT (phi);
- if (!is_gimple_reg (name))
- continue;
-
- ver = SSA_NAME_VERSION (name);
- ok = simple_iv (loop, phi, name, &iv, true);
- gcc_assert (ok);
-
- tem.uid = ver;
- slot = htab_find_slot_with_hash (*steps, &tem, ver, INSERT);
- elt = XNEW (struct int_tree_map);
- elt->uid = ver;
- elt->to = iv.step;
- *slot = elt;
- }
-}
-
-/* Remaps ssa names in EXPR according to NAME_COPIES, and returns the updated
- expression. */
-
-static tree
-remap_ssa_names (tree expr, htab_t name_copies)
-{
- struct name_to_copy_elt tem, *elt;
- unsigned ver;
- tree copied;
- unsigned i, n;
- tree nop;
-
- if (TREE_CODE (expr) == SSA_NAME)
- {
- ver = SSA_NAME_VERSION (expr);
- tem.version = ver;
- elt = htab_find_with_hash (name_copies, &tem, ver);
-
- return elt->new_name;
- }
-
- if (!EXPR_P (expr))
- return unshare_expr (expr);
-
- copied = copy_node (expr);
- n = TREE_CODE_LENGTH (TREE_CODE (expr));
-
- for (i = 0; i < n; i++)
- {
- nop = remap_ssa_names (TREE_OPERAND (expr, i), name_copies);
- TREE_OPERAND (copied, i) = nop;
- }
-
- return copied;
-}
-
-/* Records steps for induction variables of NLOOP in STEPS, and remap the steps
- of induction variables in LOOP according to NAME_COPIES. */
-
-static void
-record_and_update_steps (struct loop *loop, struct loop *nloop, htab_t steps,
- htab_t name_copies)
-{
- tree phi, nphi, name, nname;
- unsigned ver, nver;
- void **slot;
- struct int_tree_map tem, *elt, *nelt;
-
- /* The corresponding phi nodes in LOOP and in NLOOP should be in the same
- order. */
- for (phi = phi_nodes (loop->header), nphi = phi_nodes (nloop->header); phi;
- phi = PHI_CHAIN (phi), nphi = PHI_CHAIN (nphi))
- {
- name = PHI_RESULT (phi);
- nname = PHI_RESULT (nphi);
-
- if (!is_gimple_reg (name))
- {
- gcc_assert (!is_gimple_reg (nname));
- continue;
- }
- gcc_assert (is_gimple_reg (nname));
- ver = SSA_NAME_VERSION (name);
- nver = SSA_NAME_VERSION (nname);
-
- /* First copy the values from LOOP to NLOOP. */
- tem.uid = ver;
- elt = htab_find_with_hash (steps, &tem, ver);
- tem.uid = nver;
- slot = htab_find_slot_with_hash (steps, &tem, nver, INSERT);
- nelt = XNEW (struct int_tree_map);
- nelt->uid = nver;
- nelt->to = unshare_expr (elt->to);
- *slot = nelt;
-
- /* Then rewrite the old ones. */
- elt->to = remap_ssa_names (elt->to, name_copies);
- }
- gcc_assert (nphi == NULL_TREE);
-}
-
-/* Change the exit condition of LOOP so that it exits after PER_THREAD
- iterations, and remove the old exit. */
-
-static void
-change_exit_condition (struct loop *loop, tree per_thread)
-{
- basic_block ex_bb = loop->single_exit->src;
- basic_block ex_to = loop->single_exit->dest;
- block_stmt_iterator bsi = bsi_last (ex_bb);
- edge in_loop, e;
- tree exit_stmt = bsi_stmt (bsi);
- tree type = TREE_TYPE (per_thread);
- tree iv, cond;
-
- gcc_assert (TREE_CODE (exit_stmt) == COND_EXPR);
- bsi_remove (&bsi, true);
-
- in_loop = EDGE_SUCC (ex_bb, 0);
- if (in_loop == loop->single_exit)
- in_loop = EDGE_SUCC (ex_bb, 1);
-
- remove_edge (loop->single_exit);
- in_loop->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
- in_loop->flags |= EDGE_FALLTHRU;
-
- e = single_pred_edge (loop_split_edge_with (loop_latch_edge (loop), NULL));
- bsi = bsi_last (e->src);
- create_iv (build_int_cst (type, 0), build_int_cst (type, 1), NULL_TREE,
- loop, &bsi, true, NULL, &iv);
- cond = build3 (COND_EXPR, void_type_node,
- fold_build2 (LT_EXPR, boolean_type_node, iv, per_thread),
- build1 (GOTO_EXPR, void_type_node,
- tree_block_label (e->dest)),
- build1 (GOTO_EXPR, void_type_node,
- tree_block_label (ex_to)));
- bsi_insert_after (&bsi, cond, BSI_NEW_STMT);
-
- e->flags &= ~EDGE_FALLTHRU;
- e->flags |= EDGE_TRUE_VALUE;
- loop->single_exit = make_edge (e->src, ex_to, EDGE_FALSE_VALUE);
+ htab_delete (name_copies);
}
/* Bitmap containing uids of functions created by parallelization. We cannot
@@ -910,182 +683,285 @@ create_loop_fn (void)
return decl;
}
-/* Extracts LOOP and its preheader to a separate function. This function is
- returned in LOOP_FN. ARG_STRUCT is initialized in the new function from
- an argument of the function. The single basic block that replaces LOOP is
- returned. */
+/* Bases all the induction variables in LOOP on a single induction variable
+ (unsigned with base 0 and step 1), whose final value is compared with
+ NIT. The induction variable is incremented in the loop latch. */
-static basic_block
-extract_loop_to_function (struct loop *loop, tree arg_struct, tree *loop_fn)
+static void
+canonicalize_loop_ivs (struct loop *loop, tree nit)
{
- basic_block bb_to = loop_split_edge_with (loop->single_exit, NULL);
- basic_block bb_from = loop_preheader_edge (loop)->src;
- basic_block repl_bb;
- tree arg, narg, stmt;
- struct function *act_cfun = cfun;
- tree act_decl = current_function_decl;
+ unsigned precision = TYPE_PRECISION (TREE_TYPE (nit));
+ tree phi, prev, res, type, var_before, val, atype, t, next;
block_stmt_iterator bsi;
- basic_block *body = get_loop_body (loop);
- struct loop *outer = loop->outer;
- unsigned i, n = loop->num_nodes;
+ bool ok;
+ affine_iv iv;
+ edge exit = single_dom_exit (loop);
- cancel_loop_tree (current_loops, loop);
- for (i = 0; i < n; i++)
- remove_bb_from_loops (body[i]);
- free (body);
- remove_bb_from_loops (bb_from);
- remove_bb_from_loops (bb_to);
+ for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
+ {
+ res = PHI_RESULT (phi);
- bsi = bsi_last (bb_to);
- bsi_insert_after (&bsi, build1 (RETURN_EXPR, void_type_node, NULL),
- BSI_NEW_STMT);
+ if (is_gimple_reg (res)
+ && TYPE_PRECISION (TREE_TYPE (res)) > precision)
+ precision = TYPE_PRECISION (TREE_TYPE (res));
+ }
- *loop_fn = create_loop_fn ();
- repl_bb = move_sese_region_to_fn (DECL_STRUCT_FUNCTION (*loop_fn),
- bb_from, bb_to);
- add_bb_to_loop (repl_bb, outer);
+ type = lang_hooks.types.type_for_size (precision, 1);
- cfun = DECL_STRUCT_FUNCTION (*loop_fn);
- current_function_decl = *loop_fn;
+ bsi = bsi_last (loop->latch);
+ create_iv (build_int_cst_type (type, 0), build_int_cst (type, 1), NULL_TREE,
+ loop, &bsi, true, &var_before, NULL);
- /* Initialize the arg_struct. */
- if (arg_struct)
+ bsi = bsi_after_labels (loop->header);
+ prev = NULL;
+ for (phi = phi_nodes (loop->header); phi; phi = next)
{
- arg = DECL_ARGUMENTS (*loop_fn);
- add_referenced_var (arg);
- narg = make_ssa_name (arg, build_empty_stmt ());
- set_default_def (arg, narg);
-
- bsi = bsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
- stmt = build2 (MODIFY_EXPR, void_type_node, arg_struct,
- fold_convert (TREE_TYPE (arg_struct), narg));
- SSA_NAME_DEF_STMT (arg_struct) = stmt;
- bsi_insert_before (&bsi, stmt, BSI_NEW_STMT);
+ next = PHI_CHAIN (phi);
+ res = PHI_RESULT (phi);
+
+ if (!is_gimple_reg (res)
+ || res == var_before)
+ {
+ prev = phi;
+ continue;
+ }
+
+ ok = simple_iv (loop, phi, res, &iv, true);
+ gcc_assert (ok);
+
+ SET_PHI_RESULT (phi, NULL_TREE);
+ remove_phi_node (phi, prev);
+
+ atype = TREE_TYPE (res);
+ val = fold_build2 (PLUS_EXPR, atype,
+ unshare_expr (iv.base),
+ fold_build2 (MULT_EXPR, atype,
+ unshare_expr (iv.step),
+ fold_convert (atype, var_before)));
+ val = force_gimple_operand_bsi (&bsi, val, false, NULL_TREE, true,
+ BSI_SAME_STMT);
+ t = build2 (MODIFY_EXPR, void_type_node, res, val);
+ bsi_insert_before (&bsi, t, BSI_SAME_STMT);
+ SSA_NAME_DEF_STMT (res) = t;
}
- cfun = act_cfun;
- current_function_decl = act_decl;
- go_out_of_ssa (*loop_fn);
+ t = last_stmt (exit->src);
+ /* Make the loop exit if the control condition is not satisfied. */
+ if (exit->flags & EDGE_TRUE_VALUE)
+ {
+ edge te, fe;
- return repl_bb;
+ extract_true_false_edges_from_block (exit->src, &te, &fe);
+ te->flags = EDGE_FALSE_VALUE;
+ fe->flags = EDGE_TRUE_VALUE;
+ }
+ COND_EXPR_COND (t) = build2 (LT_EXPR, boolean_type_node, var_before, nit);
}
-/* Emits the code to call LOOP_FN with argument ARG in N_THREADS threads
- after BSI. */
+/* Moves the exit condition of LOOP to the beginning of its header, and
+ duplicates the part of the last iteration that gets disabled to the
+ exit of the loop. NIT is the number of iterations of the loop
+ (used to initialize the variables in the duplicated part).
+
+ TODO: the common case is that latch of the loop is empty and immediatelly
+ follows the loop exit. In this case, it would be better not to copy the
+ body of the loop, but rather increase the number of iterations of the loop
+ by one. This may need some additional preconditioning in case NIT = ~0. */
static void
-call_in_parallel (block_stmt_iterator bsi, tree loop_fn, tree arg,
- unsigned n_threads)
+transform_to_exit_first_loop (struct loop *loop, tree nit)
{
- tree start_decl = built_in_decls[BUILT_IN_GOMP_PARALLEL_START];
- tree args, stmts, adata;
+ basic_block *bbs, *nbbs, ex_bb, orig_header;
+ unsigned n;
+ bool ok;
+ edge exit = single_dom_exit (loop), hpred;
+ tree phi, nphi, cond, control, control_name, res, t, cond_stmt;
+ block_stmt_iterator bsi;
+
+ split_block_after_labels (loop->header);
+ orig_header = single_succ (loop->header);
+ hpred = single_succ_edge (loop->header);
+ add_bb_to_loop (orig_header, loop);
- args = tree_cons (NULL, build_int_cst (unsigned_type_node, n_threads),
- NULL);
- if (arg)
+ cond_stmt = last_stmt (exit->src);
+ cond = COND_EXPR_COND (cond_stmt);
+ control = TREE_OPERAND (cond, 0);
+ gcc_assert (TREE_OPERAND (cond, 1) == nit);
+
+ /* Make sure that we have phi nodes on exit for all loop header phis
+ (create_parallel_loop requires that). */
+ for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
{
- adata = build_fold_addr_expr (arg);
- mark_call_clobbered (arg, ESCAPE_TO_CALL);
- }
- else
- adata = null_pointer_node;
- args = tree_cons (NULL, adata, args);
- args = tree_cons (NULL, build_fold_addr_expr (loop_fn), args);
- force_gimple_operand (build_function_call_expr (start_decl, args),
- &stmts, false, NULL_TREE);
- mark_virtual_ops_for_renaming_list (stmts);
- bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
-}
+ res = PHI_RESULT (phi);
+ t = make_ssa_name (SSA_NAME_VAR (res), phi);
+ SET_PHI_RESULT (phi, t);
-/* Makes the induction variables in LOOP start at
- (N_THREADS - 1) * PER_THREAD-th iteration when the LOOP is entered through
- NEW_ENTRY. STEPS describe the steps of induction variables in LOOP. */
+ nphi = create_phi_node (res, orig_header);
+ SSA_NAME_DEF_STMT (res) = nphi;
+ add_phi_arg (nphi, t, hpred);
-static void
-shift_ivs_for_remaining_iterations (struct loop *loop, tree per_thread,
- unsigned n_threads, edge new_entry,
- htab_t steps)
-{
- tree phi, name, nphi;
- unsigned ver;
- struct int_tree_map tem, *elt;
- basic_block bb = new_entry->dest;
- edge old_entry, entry;
- tree stmts, new_init, init, pass, type, delta;
- use_operand_p init_p;
-
- old_entry = EDGE_PRED (bb, 0);
- if (old_entry == new_entry)
- old_entry = EDGE_PRED (bb, 1);
- entry = loop_preheader_edge (loop);
+ if (res == control)
+ {
+ TREE_OPERAND (cond, 0) = t;
+ update_stmt (cond_stmt);
+ control = t;
+ }
+ }
- for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
+ bbs = get_loop_body_in_dom_order (loop);
+ for (n = 0; bbs[n] != exit->src; n++)
+ continue;
+ nbbs = XNEWVEC (basic_block, n);
+ ok = tree_duplicate_sese_tail (single_succ_edge (loop->header), exit,
+ bbs + 1, n, nbbs);
+ gcc_assert (ok);
+ free (bbs);
+ ex_bb = nbbs[0];
+ free (nbbs);
+
+ /* The only gimple reg that should be copied out of the loop is the
+ control variable. */
+ control_name = NULL_TREE;
+ for (phi = phi_nodes (ex_bb); phi; phi = PHI_CHAIN (phi))
{
- name = PHI_RESULT (phi);
- if (!is_gimple_reg (name))
+ res = PHI_RESULT (phi);
+ if (!is_gimple_reg (res))
continue;
- ver = SSA_NAME_VERSION (name);
- tem.uid = ver;
- elt = htab_find_with_hash (steps, &tem, ver);
+ gcc_assert (control_name == NULL_TREE
+ && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
+ control_name = res;
+ }
+ gcc_assert (control_name != NULL_TREE);
+ phi = SSA_NAME_DEF_STMT (control_name);
+ SET_PHI_RESULT (phi, NULL_TREE);
+ remove_phi_node (phi, NULL_TREE);
+
+ /* Initialize the control variable to NIT. */
+ bsi = bsi_after_labels (ex_bb);
+ t = build2 (MODIFY_EXPR, void_type_node, control_name, nit);
+ bsi_insert_before (&bsi, t, BSI_NEW_STMT);
+ SSA_NAME_DEF_STMT (control_name) = t;
+}
- nphi = create_phi_node (SSA_NAME_VAR (name), bb);
- pass = PHI_RESULT (nphi);
+/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
+ LOOP_FN and DATA are the arguments of OMP_PARALLEL.
+ NEW_DATA is the variable that should be initialized from the argument
+ of LOOP_FN. N_THREADS is the requested number of threads. Returns the
+ basic block containing OMP_PARALLEL tree. */
- init_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, entry);
- init = USE_FROM_PTR (init_p);
- SET_USE (init_p, pass);
+static basic_block
+create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
+ tree new_data, unsigned n_threads)
+{
+ block_stmt_iterator bsi;
+ basic_block bb, paral_bb, for_bb, ex_bb;
+ tree t, param, res;
+ tree cvar, cvar_init, initvar, cvar_next, cvar_base, cond, phi, type;
+ edge exit, nexit, guard, end, e;
+
+ /* Prepare the OMP_PARALLEL statement. */
+ bb = loop_preheader_edge (loop)->src;
+ paral_bb = single_pred (bb);
+ bsi = bsi_last (paral_bb);
+
+ t = build_omp_clause (OMP_CLAUSE_NUM_THREADS);
+ OMP_CLAUSE_NUM_THREADS_EXPR (t)
+ = build_int_cst (integer_type_node, n_threads);
+ t = build4 (OMP_PARALLEL, void_type_node, NULL_TREE, t,
+ loop_fn, data);
+
+ bsi_insert_after (&bsi, t, BSI_NEW_STMT);
+
+ /* Initialize NEW_DATA. */
+ if (data)
+ {
+ bsi = bsi_after_labels (bb);
- add_phi_arg (nphi, init, old_entry);
- type = TREE_TYPE (name);
- delta = fold_build2 (MULT_EXPR, type,
- build_int_cst (type, n_threads - 1),
- per_thread);
- delta = fold_build2 (MULT_EXPR, type, delta, unshare_expr (elt->to));
- new_init = fold_build2 (PLUS_EXPR, type, init, delta);
+ param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL_TREE);
+ t = build2 (MODIFY_EXPR, void_type_node, param, build_fold_addr_expr (data));
+ bsi_insert_before (&bsi, t, BSI_SAME_STMT);
+ SSA_NAME_DEF_STMT (param) = t;
- new_init = force_gimple_operand (new_init, &stmts, true, NULL_TREE);
- if (stmts)
- bsi_insert_on_edge_immediate_loop (new_entry, stmts);
- add_phi_arg (nphi, new_init, new_entry);
+ t = build2 (MODIFY_EXPR, void_type_node, new_data,
+ fold_convert (TREE_TYPE (new_data), param));
+ bsi_insert_before (&bsi, t, BSI_SAME_STMT);
+ SSA_NAME_DEF_STMT (new_data) = t;
}
-}
-/* If PAR is true, emit the code on E to finalize the threads. */
+ /* Emit OMP_RETURN for OMP_PARALLEL. */
+ bb = split_loop_exit_edge (single_dom_exit (loop));
+ bsi = bsi_last (bb);
+ bsi_insert_after (&bsi, make_node (OMP_RETURN), BSI_NEW_STMT);
+
+ /* Extract data for OMP_FOR. */
+ gcc_assert (loop->header == single_dom_exit (loop)->src);
+ cond = COND_EXPR_COND (last_stmt (loop->header));
+
+ cvar = TREE_OPERAND (cond, 0);
+ cvar_base = SSA_NAME_VAR (cvar);
+ phi = SSA_NAME_DEF_STMT (cvar);
+ cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
+ initvar = make_ssa_name (cvar_base, NULL_TREE);
+ SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
+ initvar);
+ cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
+
+ bsi = bsi_last (loop->latch);
+ gcc_assert (bsi_stmt (bsi) == SSA_NAME_DEF_STMT (cvar_next));
+ bsi_remove (&bsi, true);
-static void
-finalize_threads (edge e, tree par)
-{
- basic_block cond_bb = loop_split_edge_with (e, NULL);
- basic_block fin_bb = loop_split_edge_with (single_succ_edge (cond_bb), NULL);
- basic_block cont_bb = single_succ (fin_bb);
- block_stmt_iterator bsi = bsi_last (cond_bb);
- tree decl = built_in_decls[BUILT_IN_GOMP_PARALLEL_END];
- tree stmt = build_function_call_expr (decl, NULL);
- edge te, fe, be;
- tree phi;
+ /* Prepare cfg. */
+ for_bb = loop_split_edge_with (loop_preheader_edge (loop), NULL);
+ ex_bb = split_loop_exit_edge (single_dom_exit (loop));
+ extract_true_false_edges_from_block (loop->header, &nexit, &exit);
+ gcc_assert (exit == single_dom_exit (loop));
- bsi_insert_after (&bsi,
- build3 (COND_EXPR, void_type_node,
- par,
- build1 (GOTO_EXPR, void_type_node,
- tree_block_label (fin_bb)),
- build1 (GOTO_EXPR, void_type_node,
- tree_block_label (cont_bb))),
- BSI_NEW_STMT);
- te = single_succ_edge (cond_bb);
- te->flags = EDGE_TRUE_VALUE;
- be = single_succ_edge (fin_bb);
- fe = make_edge (cond_bb, cont_bb, EDGE_FALSE_VALUE);
-
- for (phi = phi_nodes (cont_bb); phi != NULL_TREE; phi = PHI_CHAIN (phi))
- add_phi_arg (phi, PHI_ARG_DEF_FROM_EDGE (phi, be), fe);
-
- set_immediate_dominator (CDI_DOMINATORS, cont_bb,
- recount_dominator (CDI_DOMINATORS, cont_bb));
- bsi = bsi_last (fin_bb);
- mark_virtual_ops_for_renaming (stmt);
- bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
+ guard = make_edge (for_bb, ex_bb, 0);
+ single_succ_edge (loop->latch)->flags = 0;
+ end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
+ for (phi = phi_nodes (ex_bb); phi; phi = PHI_CHAIN (phi))
+ {
+ res = PHI_RESULT (phi);
+ gcc_assert (!is_gimple_reg (phi));
+ t = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
+ add_phi_arg (phi, PHI_ARG_DEF_FROM_EDGE (t, loop_preheader_edge (loop)),
+ guard);
+ add_phi_arg (phi, PHI_ARG_DEF_FROM_EDGE (t, loop_latch_edge (loop)),
+ end);
+ }
+ e = redirect_edge_and_branch (exit, nexit->dest);
+ PENDING_STMT (e) = NULL;
+
+ /* Emit OMP_FOR. */
+ TREE_OPERAND (cond, 0) = cvar_base;
+ type = TREE_TYPE (cvar);
+ t = build_omp_clause (OMP_CLAUSE_SCHEDULE);
+ OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
+ t = build6 (OMP_FOR, void_type_node, NULL_TREE, t,
+ build2 (MODIFY_EXPR, void_type_node, initvar, cvar_init),
+ cond,
+ build2 (MODIFY_EXPR, void_type_node,
+ cvar_base,
+ build2 (PLUS_EXPR, type,
+ cvar_base,
+ build_int_cst (type, 1))),
+ NULL_TREE);
+ bsi = bsi_last (for_bb);
+ bsi_insert_after (&bsi, t, BSI_NEW_STMT);
+ SSA_NAME_DEF_STMT (initvar) = t;
+
+ /* Emit OMP_CONTINUE. */
+ bsi = bsi_last (loop->latch);
+ t = build2 (OMP_CONTINUE, void_type_node, cvar_next, cvar);
+ bsi_insert_after (&bsi, t, BSI_NEW_STMT);
+ SSA_NAME_DEF_STMT (cvar_next) = t;
+
+ /* Emit OMP_RETURN for OMP_FOR. */
+ bsi = bsi_last (ex_bb);
+ bsi_insert_after (&bsi, make_node (OMP_RETURN), BSI_NEW_STMT);
+
+ return paral_bb;
}
/* Generates code to execute the iterations of LOOP in N_THREADS threads in
@@ -1096,11 +972,9 @@ gen_parallel_loop (struct loop *loop, unsigned n_threads,
struct tree_niter_desc *niter)
{
struct loop *nloop;
- tree many_iterations_cond, type, per_thread, new_per_thread;
- tree stmts, arg_struct, new_arg_struct, loop_fn, par_flag, phi;
- basic_block repl_bb, npre;
- htab_t steps, name_copies;
- edge orig_entry;
+ tree many_iterations_cond, type, nit;
+ tree stmts, arg_struct, new_arg_struct;
+ basic_block parallel_head;
/* From
@@ -1119,20 +993,24 @@ gen_parallel_loop (struct loop *loop, unsigned n_threads,
we generate the following code:
---------------------------------------------------------------------
- parallelized = false;
if (MAY_BE_ZERO
|| NITER < MIN_PER_THREAD * N_THREADS)
- goto rest;
-
- per_thread = NITER / N_THREADS;
- store all local loop-invariant variables (including per_thread in case it
- is not a constant) used in body of the loop to DATA.
- __builtin_GOMP_parallel_start (loop_fn, &DATA, N_THREADS);
- INIT += STEP * per_thread * (N_THREADS - 1);
- parallelized = true;
-
- rest:
+ goto original;
+
+ BODY1;
+ store all local loop-invariant variables used in body of the loop to DATA.
+ OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
+ load the variables from DATA.
+ OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
+ BODY2;
+ BODY1;
+ OMP_CONTINUE;
+ OMP_RETURN -- OMP_FOR
+ OMP_RETURN -- OMP_PARALLEL
+ goto end;
+
+ original:
loop
{
IV = phi (INIT, IV + STEP)
@@ -1142,53 +1020,31 @@ gen_parallel_loop (struct loop *loop, unsigned n_threads,
BODY2;
}
- if (parallelized)
- __builtin_GOMP_parallel_end ();
- ---------------------------------------------------------------------
-
- With the function
+ end:
- ---------------------------------------------------------------------
- void loop_fn (void *data)
- {
- load all local loop-invariant variables used in body of the loop
- from data
- INIT += STEP * per_thread * (thread_id - 1);
-
- loop
- {
- IV = phi (INIT, IV + STEP)
- NEW_CTR = phi (0, NEW_CTR');
- BODY1;
- BODY2;
- NEW_CTR' = NEW_CTR + 1;
- if (NEW_CTR' == per_thread)
- return;
- }
- }
- ---------------------------------------------------------------------
*/
- /* Record the steps of the induction variables. */
- record_steps (loop, &steps);
-
/* Create two versions of the loop -- in the old one, we know that the
number of iterations is large enough, and we will transform it into the
loop that will be split to loop_fn, the new one will be used for the
remaining iterations. */
type = TREE_TYPE (niter->niter);
+ nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
+ NULL_TREE);
+ if (stmts)
+ bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
+
many_iterations_cond =
fold_build2 (GE_EXPR, boolean_type_node,
- niter->niter,
- build_int_cst (type, MIN_PER_THREAD * n_threads));
+ nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
many_iterations_cond
= fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
- invert_truthvalue (niter->may_be_zero),
+ invert_truthvalue (unshare_expr (niter->may_be_zero)),
many_iterations_cond);
many_iterations_cond
- = force_gimple_operand (unshare_expr (many_iterations_cond),
- &stmts, false, NULL_TREE);
+ = force_gimple_operand (many_iterations_cond, &stmts,
+ false, NULL_TREE);
if (stmts)
bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
if (!is_gimple_condexpr (many_iterations_cond))
@@ -1205,74 +1061,34 @@ gen_parallel_loop (struct loop *loop, unsigned n_threads,
update_ssa (TODO_update_ssa);
free_original_copy_tables ();
- /* Compute number of iterations per thread. */
- per_thread = fold_build2 (FLOOR_DIV_EXPR, type, niter->niter,
- build_int_cst (type, n_threads));
- per_thread = force_gimple_operand (unshare_expr (per_thread), &stmts, true,
- NULL_TREE);
- if (stmts)
- bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
+ /* Base all the induction variables in LOOP on a single control one. */
+ canonicalize_loop_ivs (loop, nit);
+
+ /* Ensure that the exit condition is the first statement in the loop. */
+ transform_to_exit_first_loop (loop, nit);
/* Eliminate the references to local variables from the loop. */
eliminate_local_variables (loop);
/* In the old loop, move all variables non-local to the loop to a structure
and back, and create separate decls for the variables used in loop. */
- new_per_thread = per_thread;
- separate_decls_in_loop (loop, &new_per_thread, &arg_struct, &new_arg_struct,
- &name_copies);
-
- /* Record the steps of induction variables in the copy, and remap those
- in the original loop. */
- record_and_update_steps (loop, nloop, steps, name_copies);
-
- /* Update the initial values of induction variables. */
- shift_ivs_for_iteration (loop, new_per_thread, steps);
+ separate_decls_in_loop (loop, &arg_struct, &new_arg_struct);
- /* Add the new exit condition. */
- change_exit_condition (loop, new_per_thread);
+ /* Create the parallel constructs. */
+ parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct,
+ new_arg_struct, n_threads);
- /* Split the loop to the new function. */
- repl_bb = extract_loop_to_function (loop, new_arg_struct, &loop_fn);
- cgraph_add_new_function (loop_fn);
-
- /* Call the builtin to run the function in several threads. */
- call_in_parallel (bsi_last (repl_bb), loop_fn, arg_struct, n_threads);
-
- /* Redirect the exit edge to the versioned copy, and set the parallelized
- flag. */
- par_flag = create_tmp_var (boolean_type_node, "parallelized");
- add_referenced_var (par_flag);
-
- npre = loop_preheader_edge (nloop)->src;
- gcc_assert (phi_nodes (npre) == NULL);
-
- orig_entry = single_pred_edge (npre);
- redirect_edge_and_branch (single_succ_edge (repl_bb),
- loop_preheader_edge (nloop)->src);
- phi = create_phi_node (par_flag, npre);
- add_phi_arg (phi, boolean_false_node, orig_entry);
- add_phi_arg (phi, boolean_true_node, single_succ_edge (repl_bb));
- par_flag = PHI_RESULT (phi);
-
- /* Update the initial values of induction variables in the versioned
- copy. */
- shift_ivs_for_remaining_iterations (nloop, per_thread, n_threads,
- single_succ_edge (repl_bb), steps);
-
- /* Finalize the threads after the loop if necessary. */
- finalize_threads (nloop->single_exit, par_flag);
-
- htab_delete (name_copies);
- htab_delete (steps);
scev_reset ();
- /* We created a new call clobbered variable. This means every call in the
- function gets a new virtual operand. However, we cannot rerun alias
- analysis after vectorizer (or at least, passes.c claims so), thus we
- must mark them for renaming manually. */
- mark_call_virtual_operands ();
- update_ssa (TODO_update_ssa_only_virtuals);
+ /* Cancel the loop (it is simpler to do it here rather than to teach the
+ expander to do it). */
+ cancel_loop_tree (current_loops, loop);
+
+ /* Expand the parallel constructs. We do it directly here instead of running
+ a separate expand_omp pass, since it is more efficient, and less likely to
+ cause troubles with further analyses not being able to deal with the
+ OMP trees. */
+ omp_expand_local (parallel_head);
}
/* Detect parallel loops and generate parallel code using libgomp
diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c
index 0b29c5ee8b4..f20be11630e 100644
--- a/gcc/tree-ssa-loop-manip.c
+++ b/gcc/tree-ssa-loop-manip.c
@@ -439,9 +439,9 @@ verify_loop_closed_ssa (void)
}
/* Split loop exit edge EXIT. The things are a bit complicated by a need to
- preserve the loop closed ssa form. */
+ preserve the loop closed ssa form. The new block is returned. */
-void
+basic_block
split_loop_exit_edge (edge exit)
{
basic_block dest = exit->dest;
@@ -468,6 +468,8 @@ split_loop_exit_edge (edge exit)
add_phi_arg (new_phi, name, exit);
SET_USE (op_p, new_name);
}
+
+ return bb;
}
/* Insert statement STMT to the edge E and update the loop structures.
diff --git a/gcc/tree.c b/gcc/tree.c
index cfbfe1462b6..4b5103b9cda 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -3095,6 +3095,35 @@ build5_stat (enum tree_code code, tree tt, tree arg0, tree arg1,
}
tree
+build6_stat (enum tree_code code, tree tt, tree arg0, tree arg1,
+ tree arg2, tree arg3, tree arg4, tree arg5 MEM_STAT_DECL)
+{
+ bool constant, read_only, side_effects, invariant;
+ tree t;
+
+ gcc_assert (TREE_CODE_LENGTH (code) == 6);
+
+ t = make_node_stat (code PASS_MEM_STAT);
+ TREE_TYPE (t) = tt;
+
+ side_effects = TREE_SIDE_EFFECTS (t);
+
+ PROCESS_ARG(0);
+ PROCESS_ARG(1);
+ PROCESS_ARG(2);
+ PROCESS_ARG(3);
+ PROCESS_ARG(4);
+ PROCESS_ARG(5);
+
+ TREE_SIDE_EFFECTS (t) = side_effects;
+ TREE_THIS_VOLATILE (t)
+ = (TREE_CODE_CLASS (code) == tcc_reference
+ && arg0 && TREE_THIS_VOLATILE (arg0));
+
+ return t;
+}
+
+tree
build7_stat (enum tree_code code, tree tt, tree arg0, tree arg1,
tree arg2, tree arg3, tree arg4, tree arg5,
tree arg6 MEM_STAT_DECL)
diff --git a/gcc/tree.h b/gcc/tree.h
index 9da85d74292..13f0decb91f 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -3532,6 +3532,9 @@ extern tree build4_stat (enum tree_code, tree, tree, tree, tree,
extern tree build5_stat (enum tree_code, tree, tree, tree, tree, tree,
tree MEM_STAT_DECL);
#define build5(c,t1,t2,t3,t4,t5,t6) build5_stat (c,t1,t2,t3,t4,t5,t6 MEM_STAT_INFO)
+extern tree build6_stat (enum tree_code, tree, tree, tree, tree, tree, tree,
+ tree MEM_STAT_DECL);
+#define build6(c,t1,t2,t3,t4,t5,t6,t7) build6_stat (c,t1,t2,t3,t4,t5,t6, t7 MEM_STAT_INFO)
extern tree build7_stat (enum tree_code, tree, tree, tree, tree, tree,
tree, tree, tree MEM_STAT_DECL);
#define build7(c,t1,t2,t3,t4,t5,t6,t7,t8) \