diff options
author | rakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4> | 2006-10-24 22:14:34 +0000 |
---|---|---|
committer | rakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4> | 2006-10-24 22:14:34 +0000 |
commit | edadcaed23701a51d8164925740e028075f578b9 (patch) | |
tree | c80208991820bbaa2447e9c38aacc692b11cc9e6 | |
parent | ef7ff294516ac28878a603464304eba0290f8d24 (diff) |
* tree-ssa-loop-manip.c (split_loop_exit_edge): Return the new block.parloop
* tree.c (build6_stat): New function.
* tree.h (build6_stat): Declare.
(build6): New macro.
* omp-low.c: Include cfgloop.h.
(mark_call_virtual_operands): Moved from tree-parloop.c.
(expand_omp_parallel, expand_omp_for_static_nochunk): Update loops.
(build_omp_regions_1): Enable finding the regions locally.
(build_omp_regions_root, omp_expand_local): New functions.
(build_omp_regions): Pass new argument to build_omp_regions_1.
* tree-parloops.c (loop_parallel_p): Require that there are no
loop exit phi nodes, and that the loop can be duplicated.
(mark_virtual_ops_for_renaming_list): Removed.
(mark_call_virtual_operands): Moved to omp-low.c.
(separate_decls_in_loop_name): Handle the case of non-SSA name
argument.
(separate_decls_in_loop): Do not return name_copies hashtable.
(shift_ivs_for_iteration, record_steps, remap_ssa_names,
record_and_update_steps, change_exit_condition,
extract_loop_to_function, call_in_parallel,
shift_ivs_for_remaining_iterations, finalize_threads): Removed.
(canonicalize_loop_ivs, transform_to_exit_first_loop,
create_parallel_loop): New functions.
(gen_parallel_loop): Use OMP tree codes.
* tree-flow.h (omp_expand_local, tree_duplicate_sese_tail): Declare.
(mark_call_virtual_operands, add_phi_args_after_copy): Declaration
removed.
(split_loop_exit_edge): Declaration changed.
* Makefile.in (omp-low.o): Add CFGLOOP_H dependency.
* tree-cfg.c (add_phi_args_after_copy_edge): New function.
(add_phi_args_after_copy_bb): Use add_phi_args_after_copy_edge.
(add_phi_args_after_copy): Made static. Also copy arguments on
one additional edge.
(tree_duplicate_sese_region): Fix updating of frequencies.
Add argument to add_phi_args_after_copy call.
(tree_duplicate_sese_tail): New function.
(move_sese_region_to_fn): Update loops.
* testsuite/gcc.dg/tree-ssa/parallelization-1.c: Update outcome.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/parloop@118013 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog.parloop | 42 | ||||
-rw-r--r-- | gcc/Makefile.in | 2 | ||||
-rw-r--r-- | gcc/omp-low.c | 74 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/parallelization-1.c | 2 | ||||
-rw-r--r-- | gcc/tree-cfg.c | 250 | ||||
-rw-r--r-- | gcc/tree-flow.h | 9 | ||||
-rw-r--r-- | gcc/tree-parloops.c | 820 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-manip.c | 6 | ||||
-rw-r--r-- | gcc/tree.c | 29 | ||||
-rw-r--r-- | gcc/tree.h | 3 |
10 files changed, 681 insertions, 556 deletions
diff --git a/gcc/ChangeLog.parloop b/gcc/ChangeLog.parloop index 45a13b36027..a806774d19c 100644 --- a/gcc/ChangeLog.parloop +++ b/gcc/ChangeLog.parloop @@ -1,3 +1,45 @@ +2006-10-25 Zdenek Dvorak <dvorakz@suse.cz> + + * tree-ssa-loop-manip.c (split_loop_exit_edge): Return the new block. + * tree.c (build6_stat): New function. + * tree.h (build6_stat): Declare. + (build6): New macro. + * omp-low.c: Include cfgloop.h. + (mark_call_virtual_operands): Moved from tree-parloop.c. + (expand_omp_parallel, expand_omp_for_static_nochunk): Update loops. + (build_omp_regions_1): Enable finding the regions locally. + (build_omp_regions_root, omp_expand_local): New functions. + (build_omp_regions): Pass new argument to build_omp_regions_1. + * tree-parloops.c (loop_parallel_p): Require that there are no + loop exit phi nodes, and that the loop can be duplicated. + (mark_virtual_ops_for_renaming_list): Removed. + (mark_call_virtual_operands): Moved to omp-low.c. + (separate_decls_in_loop_name): Handle the case of non-SSA name + argument. + (separate_decls_in_loop): Do not return name_copies hashtable. + (shift_ivs_for_iteration, record_steps, remap_ssa_names, + record_and_update_steps, change_exit_condition, + extract_loop_to_function, call_in_parallel, + shift_ivs_for_remaining_iterations, finalize_threads): Removed. + (canonicalize_loop_ivs, transform_to_exit_first_loop, + create_parallel_loop): New functions. + (gen_parallel_loop): Use OMP tree codes. + * tree-flow.h (omp_expand_local, tree_duplicate_sese_tail): Declare. + (mark_call_virtual_operands, add_phi_args_after_copy): Declaration + removed. + (split_loop_exit_edge): Declaration changed. + * Makefile.in (omp-low.o): Add CFGLOOP_H dependency. + * tree-cfg.c (add_phi_args_after_copy_edge): New function. + (add_phi_args_after_copy_bb): Use add_phi_args_after_copy_edge. + (add_phi_args_after_copy): Made static. Also copy arguments on + one additional edge. + (tree_duplicate_sese_region): Fix updating of frequencies. + Add argument to add_phi_args_after_copy call. + (tree_duplicate_sese_tail): New function. + (move_sese_region_to_fn): Update loops. + + * testsuite/gcc.dg/tree-ssa/parallelization-1.c: Update outcome. + 2006-10-16 Zdenek Dvorak <dvorakz@suse.cz> * tree-pretty-print.c (dump_generic_node): Handle OMP_SECTIONS_SWITCH diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 97d5abd8664..afa9f68975a 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2065,7 +2065,7 @@ gimple-low.o : gimple-low.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) \ omp-low.o : omp-low.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \ $(RTL_H) $(TREE_GIMPLE_H) $(TREE_INLINE_H) langhooks.h $(DIAGNOSTIC_H) \ $(TREE_FLOW_H) $(TIMEVAR_H) $(FLAGS_H) $(EXPR_H) toplev.h tree-pass.h \ - $(GGC_H) + $(GGC_H) $(CFGLOOP_H) tree-browser.o : tree-browser.c tree-browser.def $(CONFIG_H) $(SYSTEM_H) \ $(TREE_H) $(TREE_INLINE_H) $(DIAGNOSTIC_H) $(HASHTAB_H) \ $(TM_H) coretypes.h diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 6dde9eacc89..c3bf19e325a 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -41,7 +41,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "tree-pass.h" #include "ggc.h" #include "except.h" - +#include "cfgloop.h" /* Lowering of OpenMP parallel and workshare constructs proceeds in two phases. The first phase scans the function looking for OMP statements @@ -2573,6 +2573,8 @@ expand_omp_parallel (struct omp_region *region) gcc_assert (t && TREE_CODE (t) == OMP_PARALLEL); bsi_remove (&si, true); e = split_block (entry_bb, t); + if (current_loops) + add_bb_to_loop (e->dest, e->src->loop_father); entry_bb = e->dest; single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; @@ -2606,6 +2608,26 @@ expand_omp_parallel (struct omp_region *region) update_ssa (TODO_update_ssa_only_virtuals); } +/* Marks operands of calls for renaming. */ + +static void +mark_call_virtual_operands (void) +{ + basic_block bb; + block_stmt_iterator bsi; + tree stmt; + + FOR_EACH_BB (bb) + { + for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) + { + stmt = bsi_stmt (bsi); + if (get_call_expr_in (stmt) != NULL_TREE) + mark_new_vars_to_rename (stmt); + } + } +} + /* A subroutine of expand_omp_for. Generate code for a parallel loop with any schedule. Given parameters: @@ -2860,7 +2882,12 @@ expand_omp_for_static_nochunk (struct omp_region *region, cont_bb = region->cont; gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); - seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb)); + + if (current_loops) + seq_start_bb = loop_split_edge_with (FALLTHRU_EDGE (entry_bb), NULL); + else + seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb)); + body_bb = single_succ (seq_start_bb); gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); @@ -3598,10 +3625,13 @@ expand_omp (struct omp_region *region) /* Helper for build_omp_regions. Scan the dominator tree starting at - block BB. PARENT is the region that contains BB. */ + block BB. PARENT is the region that contains BB. If SINGLE_TREE is + true, the function ends once a single tree is built (otherwise, whole + forest of OMP constructs may be built). */ static void -build_omp_regions_1 (basic_block bb, struct omp_region *parent) +build_omp_regions_1 (basic_block bb, struct omp_region *parent, + bool single_tree) { block_stmt_iterator si; tree stmt; @@ -3650,12 +3680,44 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent) } } + if (single_tree && !parent) + return; + for (son = first_dom_son (CDI_DOMINATORS, bb); son; son = next_dom_son (CDI_DOMINATORS, son)) - build_omp_regions_1 (son, parent); + build_omp_regions_1 (son, parent, single_tree); } +/* Builds the tree of OMP regions rooted at ROOT, storing it to + root_omp_region. */ + +static void +build_omp_regions_root (basic_block root) +{ + gcc_assert (root_omp_region == NULL); + build_omp_regions_1 (root, NULL, true); + gcc_assert (root_omp_region != NULL); +} + +/* Expands piece of parallel code starting in HEAD. */ + +void +omp_expand_local (basic_block head) +{ + build_omp_regions_root (head); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nOMP region tree\n\n"); + dump_omp_region (dump_file, root_omp_region, 0); + fprintf (dump_file, "\n"); + } + + remove_exit_barriers (root_omp_region); + expand_omp (root_omp_region); + + free_omp_regions (); +} /* Scan the CFG and build a tree of OMP regions. Return the root of the OMP region tree. */ @@ -3665,7 +3727,7 @@ build_omp_regions (void) { gcc_assert (root_omp_region == NULL); calculate_dominance_info (CDI_DOMINATORS); - build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL); + build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL, false); } /* Main entry point for expanding OMP-GIMPLE into runtime calls. */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/parallelization-1.c b/gcc/testsuite/gcc.dg/tree-ssa/parallelization-1.c index 493f34881bb..ee62ff666b8 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/parallelization-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/parallelization-1.c @@ -28,6 +28,6 @@ int main(void) /* Check that the first loop in parloop got parallelized. */ /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops" } } */ -/* { dg-final { scan-tree-dump-times "loopfn" 4 "final_cleanup" } } */ +/* { dg-final { scan-tree-dump-times "loopfn" 5 "final_cleanup" } } */ /* { dg-final { cleanup-tree-dump "parloops" } } */ /* { dg-final { cleanup-tree-dump "final_cleanup" } } */ diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index 0ac813ae725..105d5d4ce03 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -4342,6 +4342,51 @@ tree_duplicate_bb (basic_block bb) return new_bb; } +/* Adds phi node arguments for edge E_COPY after basic block duplication. */ + +static void +add_phi_args_after_copy_edge (edge e_copy) +{ + basic_block bb, bb_copy = e_copy->src, dest; + edge e; + edge_iterator ei; + tree phi, phi_copy, phi_next, def; + + if (!phi_nodes (e_copy->dest)) + return; + + bb = bb_copy->flags & BB_DUPLICATED ? get_bb_original (bb_copy) : bb_copy; + + if (e_copy->dest->flags & BB_DUPLICATED) + dest = get_bb_original (e_copy->dest); + else + dest = e_copy->dest; + + e = find_edge (bb, dest); + if (!e) + { + /* During loop unrolling the target of the latch edge is copied. + In this case we are not looking for edge to dest, but to + duplicated block whose original was dest. */ + FOR_EACH_EDGE (e, ei, bb->succs) + { + if ((e->dest->flags & BB_DUPLICATED) + && get_bb_original (e->dest) == dest) + break; + } + + gcc_assert (e != NULL); + } + + for (phi = phi_nodes (e->dest), phi_copy = phi_nodes (e_copy->dest); + phi; + phi = phi_next, phi_copy = PHI_CHAIN (phi_copy)) + { + phi_next = PHI_CHAIN (phi); + def = PHI_ARG_DEF_FROM_EDGE (phi, e); + add_phi_arg (phi_copy, def, e_copy); + } +} /* Basic block BB_COPY was created by code duplication. Add phi node arguments for edges going out of BB_COPY. The blocks that were @@ -4350,54 +4395,23 @@ tree_duplicate_bb (basic_block bb) void add_phi_args_after_copy_bb (basic_block bb_copy) { - basic_block bb, dest; - edge e, e_copy; edge_iterator ei; - tree phi, phi_copy, phi_next, def; - - bb = get_bb_original (bb_copy); + edge e_copy; FOR_EACH_EDGE (e_copy, ei, bb_copy->succs) { - if (!phi_nodes (e_copy->dest)) - continue; - - if (e_copy->dest->flags & BB_DUPLICATED) - dest = get_bb_original (e_copy->dest); - else - dest = e_copy->dest; - - e = find_edge (bb, dest); - if (!e) - { - /* During loop unrolling the target of the latch edge is copied. - In this case we are not looking for edge to dest, but to - duplicated block whose original was dest. */ - FOR_EACH_EDGE (e, ei, bb->succs) - if ((e->dest->flags & BB_DUPLICATED) - && get_bb_original (e->dest) == dest) - break; - - gcc_assert (e != NULL); - } - - for (phi = phi_nodes (e->dest), phi_copy = phi_nodes (e_copy->dest); - phi; - phi = phi_next, phi_copy = PHI_CHAIN (phi_copy)) - { - phi_next = PHI_CHAIN (phi); - def = PHI_ARG_DEF_FROM_EDGE (phi, e); - add_phi_arg (phi_copy, def, e_copy); - } + add_phi_args_after_copy_edge (e_copy); } } /* Blocks in REGION_COPY array of length N_REGION were created by duplication of basic blocks. Add phi node arguments for edges - going from these blocks. */ + going from these blocks. If E_COPY is not NULL, also add + phi node arguments for its destination. */ -void -add_phi_args_after_copy (basic_block *region_copy, unsigned n_region) +static void +add_phi_args_after_copy (basic_block *region_copy, unsigned n_region, + edge e_copy) { unsigned i; @@ -4406,6 +4420,8 @@ add_phi_args_after_copy (basic_block *region_copy, unsigned n_region) for (i = 0; i < n_region; i++) add_phi_args_after_copy_bb (region_copy[i]); + if (e_copy) + add_phi_args_after_copy_edge (e_copy); for (i = 0; i < n_region; i++) region_copy[i]->flags &= ~BB_DUPLICATED; @@ -4504,7 +4520,7 @@ tree_duplicate_sese_region (edge entry, edge exit, frequencies. */ if (total_freq == 0) total_freq = 1; - else if (entry_freq > total_freq) + if (entry_freq > total_freq) entry_freq = total_freq; } @@ -4546,7 +4562,154 @@ tree_duplicate_sese_region (edge entry, edge exit, free (doms); /* Add the other PHI node arguments. */ - add_phi_args_after_copy (region_copy, n_region); + add_phi_args_after_copy (region_copy, n_region, NULL); + + /* Update the SSA web. */ + update_ssa (TODO_update_ssa); + + if (free_region_copy) + free (region_copy); + + free_original_copy_tables (); + return true; +} + +/* Duplicates REGION consisting of N_REGION blocks. The new blocks + are stored to REGION_COPY in the same order in that they appear + in REGION, if REGION_COPY is not NULL. ENTRY is the entry to + the region, EXIT an exit from it. The condition guarding EXIT + is moved to ENTRY. Returns true if duplication succeeds, false + otherwise. */ + +bool +tree_duplicate_sese_tail (edge entry, edge exit, + basic_block *region, unsigned n_region, + basic_block *region_copy) +{ + unsigned i, n_doms; + bool free_region_copy = false; + struct loop *loop = exit->dest->loop_father; + struct loop *orig_loop = entry->dest->loop_father; + basic_block *doms, switch_bb, entry_bb, nentry_bb; + int total_freq = 0, exit_freq = 0; + gcov_type total_count = 0, exit_count = 0; + edge exits[2], nexits[2], e; + block_stmt_iterator bsi; + tree cond; + edge sorig, snew, single_exit = orig_loop->single_exit; + + gcc_assert (EDGE_COUNT (exit->src->succs) == 2); + exits[0] = exit; + exits[1] = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit); + + if (!can_copy_bbs_p (region, n_region)) + return false; + + /* Some sanity checking. Note that we do not check for all possible + missuses of the functions. I.e. if you ask to copy something weird, + it will work, but the state of structures probably will not be + correct. */ + for (i = 0; i < n_region; i++) + { + /* We do not handle subloops, i.e. all the blocks must belong to the + same loop. */ + if (region[i]->loop_father != orig_loop) + return false; + + if (region[i] == orig_loop->latch) + return false; + } + + orig_loop->copy = loop; + + if (!region_copy) + { + region_copy = XNEWVEC (basic_block, n_region); + free_region_copy = true; + } + + gcc_assert (!need_ssa_update_p ()); + + /* Record blocks outside the region that are dominated by something + inside. */ + doms = XNEWVEC (basic_block, n_basic_blocks); + initialize_original_copy_tables (); + + n_doms = get_dominated_by_region (CDI_DOMINATORS, region, n_region, doms); + + if (exit->src->count) + { + total_count = exit->src->count; + exit_count = exit->count; + /* Fix up corner cases, to avoid division by zero or creation of negative + frequencies. */ + if (exit_count > total_count) + exit_count = total_count; + } + else + { + total_freq = exit->src->frequency; + exit_freq = EDGE_FREQUENCY (exit); + /* Fix up corner cases, to avoid division by zero or creation of negative + frequencies. */ + if (total_freq == 0) + total_freq = 1; + if (exit_freq > total_freq) + exit_freq = total_freq; + } + + copy_bbs (region, n_region, region_copy, exits, 2, nexits, orig_loop, + split_edge_bb_loc (exit)); + if (total_count) + { + scale_bbs_frequencies_gcov_type (region, n_region, + total_count - exit_count, + total_count); + scale_bbs_frequencies_gcov_type (region_copy, n_region, exit_count, + total_count); + } + else + { + scale_bbs_frequencies_int (region, n_region, total_freq - exit_freq, + total_freq); + scale_bbs_frequencies_int (region_copy, n_region, exit_freq, total_freq); + } + + /* Create the switch block, and put the exit condition to it. */ + entry_bb = entry->dest; + nentry_bb = get_bb_copy (entry_bb); + if (!last_stmt (entry->src) + || !stmt_ends_bb_p (last_stmt (entry->src))) + switch_bb = entry->src; + else + switch_bb = loop_split_edge_with (entry, NULL); + set_immediate_dominator (CDI_DOMINATORS, nentry_bb, switch_bb); + + bsi = bsi_last (switch_bb); + cond = last_stmt (exit->src); + gcc_assert (TREE_CODE (cond) == COND_EXPR); + bsi_insert_after (&bsi, unshare_expr (cond), BSI_NEW_STMT); + + sorig = single_succ_edge (switch_bb); + sorig->flags = exits[1]->flags; + snew = make_edge (switch_bb, nentry_bb, exits[0]->flags); + if (exits[0] == single_exit) + orig_loop->single_exit = snew; + + /* Add the PHI node arguments. */ + add_phi_args_after_copy (region_copy, n_region, snew); + + /* Get rid of now superfluous conditions and associated edges (and phi node + arguments). */ + e = redirect_edge_and_branch (exits[0], exits[1]->dest); + PENDING_STMT (e) = NULL_TREE; + e = redirect_edge_and_branch (nexits[1], nexits[0]->dest); + PENDING_STMT (e) = NULL_TREE; + + /* Anything that is outside of the region, but was dominated by something + inside needs to update dominance info. */ + iterate_fix_dominators (CDI_DOMINATORS, doms, n_doms); + free (doms); /* Update the SSA web. */ update_ssa (TODO_update_ssa); @@ -4798,8 +4961,10 @@ move_block_to_fn (struct function *dest_cfun, basic_block bb, basic_block *addr; tree phi; - /* Remove BB from dominance structures. */ + /* Remove BB from dominance and loop structures. */ delete_from_dominance_info (CDI_DOMINATORS, bb); + if (current_loops) + remove_bb_from_loops (bb); /* Link BB to the new linked list. */ move_block_after (bb, after); @@ -5001,6 +5166,7 @@ move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb, edge e; edge_iterator ei; htab_t new_label_map, vars_map; + struct loop *loop = entry_bb->loop_father; saved_cfun = cfun; @@ -5134,6 +5300,8 @@ move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb, /* Back in the original function, the SESE region has disappeared, create a new basic block in its place. */ bb = create_empty_bb (entry_pred[0]); + if (current_loops) + add_bb_to_loop (bb, loop); for (i = 0; i < num_entry_edges; i++) { e = make_edge (entry_pred[i], bb, entry_flag[i]); diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h index b24f3b47521..a774ef78552 100644 --- a/gcc/tree-flow.h +++ b/gcc/tree-flow.h @@ -534,6 +534,8 @@ extern struct omp_region *root_omp_region; extern struct omp_region *new_omp_region (basic_block, enum tree_code, struct omp_region *); extern void free_omp_regions (void); +void omp_expand_local (basic_block); + tree copy_var_decl (tree, tree, tree); tree find_omp_clause (tree, enum tree_code); @@ -582,8 +584,9 @@ extern tree tree_block_label (basic_block); extern void extract_true_false_edges_from_block (basic_block, edge *, edge *); extern bool tree_duplicate_sese_region (edge, edge, basic_block *, unsigned, basic_block *); +extern bool tree_duplicate_sese_tail (edge, edge, basic_block *, unsigned, + basic_block *); extern void add_phi_args_after_copy_bb (basic_block); -extern void add_phi_args_after_copy (basic_block *, unsigned); extern bool tree_purge_dead_eh_edges (basic_block); extern bool tree_purge_all_dead_eh_edges (bitmap); extern tree gimplify_val (block_stmt_iterator *, tree, tree); @@ -626,7 +629,6 @@ extern void add_referenced_var (tree); extern void mark_new_vars_to_rename (tree); extern void find_new_referenced_vars (tree *); void mark_virtual_ops_for_renaming (tree); -void mark_call_virtual_operands (void); extern tree make_rename_temp (tree, const char *); extern void set_default_def (tree, tree); @@ -819,7 +821,7 @@ void loop_commit_inserts (void); bool for_each_index (tree *, bool (*) (tree, tree *, void *), void *); void create_iv (tree, tree, tree, struct loop *, block_stmt_iterator *, bool, tree *, tree *); -void split_loop_exit_edge (edge); +basic_block split_loop_exit_edge (edge); unsigned force_expr_to_var_cost (tree); basic_block bsi_insert_on_edge_immediate_loop (edge, tree); void standard_iv_increment_position (struct loop *, block_stmt_iterator *, @@ -1016,4 +1018,5 @@ void swap_tree_operands (tree, tree *, tree *); extern void recalculate_used_alone (void); extern bool updating_used_alone; + #endif /* _TREE_FLOW_H */ diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c index 36ec003a67b..acf0881adab 100644 --- a/gcc/tree-parloops.c +++ b/gcc/tree-parloops.c @@ -121,8 +121,7 @@ loop_parallel_p (struct loop *loop, struct tree_niter_desc *niter) { tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit); - if (is_gimple_reg (val) - && !expr_invariant_in_loop_p (loop, val)) + if (is_gimple_reg (val)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " FAILED: value used outside loop\n"); @@ -146,11 +145,18 @@ loop_parallel_p (struct loop *loop, struct tree_niter_desc *niter) } } - datarefs = VEC_alloc (data_reference_p, heap, 10); - dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10); + /* We need to version the loop to verify assumptions in runtime. */ + if (!can_duplicate_loop_p (loop)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " FAILED: cannot be duplicated\n"); + return false; + } /* Check for problems with dependences. If the loop can be reversed, the iterations are indepent. */ + datarefs = VEC_alloc (data_reference_p, heap, 10); + dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10); compute_data_dependences_for_loop (loop, true, &datarefs, &dependence_relations); if (dump_file && (dump_flags & TDF_DETAILS)) @@ -174,37 +180,6 @@ loop_parallel_p (struct loop *loop, struct tree_niter_desc *niter) return ret; } -/* Calls mark_virtual_ops_for_renaming for all members of LIST. */ - -static void -mark_virtual_ops_for_renaming_list (tree list) -{ - tree_stmt_iterator tsi; - - for (tsi = tsi_start (list); !tsi_end_p (tsi); tsi_next (&tsi)) - mark_virtual_ops_for_renaming (tsi_stmt (tsi)); -} - -/* Marks operands of calls for renaming. */ - -void -mark_call_virtual_operands (void) -{ - basic_block bb; - block_stmt_iterator bsi; - tree stmt; - - FOR_EACH_BB (bb) - { - for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) - { - stmt = bsi_stmt (bsi); - if (get_call_expr_in (stmt) != NULL_TREE) - mark_new_vars_to_rename (stmt); - } - } -} - /* Assigns the address of VAR in TYPE to an ssa name, and returns this name. The assignment statement is placed before LOOP. DECL_ADDRESS maps decls to their addresses that can be reused. */ @@ -377,6 +352,9 @@ separate_decls_in_loop_name (tree name, struct name_to_copy_elt elt, *nelt; void **slot, **dslot; + if (TREE_CODE (name) != SSA_NAME) + return name; + idx = SSA_NAME_VERSION (name); elt.version = idx; slot = htab_find_slot_with_hash (name_copies, &elt, idx, @@ -559,16 +537,16 @@ bb1: `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The pointer `new' is intentionally not initialized (the loop will be split to a separate function later, and `new' will be initialized from its arguments). - *PER_THREAD is updated to the ssa name to that the value is loaded in - bb1. A mapping of old to new copies is stored to *NAME_COPIES. */ + */ static void -separate_decls_in_loop (struct loop *loop, tree *per_thread, - tree *arg_struct, tree *new_arg_struct, - htab_t *name_copies) +separate_decls_in_loop (struct loop *loop, tree *arg_struct, + tree *new_arg_struct) { basic_block bb1 = loop_split_edge_with (loop_preheader_edge (loop), NULL); basic_block bb0 = single_pred (bb1); + htab_t name_copies = htab_create (10, name_to_copy_elt_hash, + name_to_copy_elt_eq, free); htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq, free); basic_block bb, *body = get_loop_body (loop); @@ -577,29 +555,21 @@ separate_decls_in_loop (struct loop *loop, tree *per_thread, block_stmt_iterator bsi; struct clsn_data clsn_data; - *name_copies = htab_create (10, name_to_copy_elt_hash, - name_to_copy_elt_eq, free); - /* Find and rename the ssa names defined outside of loop. */ for (i = 0; i < loop->num_nodes; i++) { bb = body[i]; for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) - separate_decls_in_loop_stmt (loop, phi, *name_copies, decl_copies); + separate_decls_in_loop_stmt (loop, phi, name_copies, decl_copies); for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) - separate_decls_in_loop_stmt (loop, bsi_stmt (bsi), *name_copies, + separate_decls_in_loop_stmt (loop, bsi_stmt (bsi), name_copies, decl_copies); } - - if (TREE_CODE (*per_thread) == SSA_NAME) - *per_thread - = separate_decls_in_loop_name (*per_thread, *name_copies, decl_copies, - true); free (body); - if (htab_elements (*name_copies) == 0) + if (htab_elements (name_copies) == 0) { /* It may happen that there is nothing to copy (if there are only loop carried and external variables in the loop). */ @@ -614,14 +584,14 @@ separate_decls_in_loop (struct loop *loop, tree *per_thread, type); TYPE_NAME (type) = type_name; - htab_traverse (*name_copies, add_field_for_name, type); + htab_traverse (name_copies, add_field_for_name, type); layout_type (type); /* Create the loads and stores. */ *arg_struct = create_tmp_var (type, ".paral_data_store"); - add_referenced_var(*arg_struct); + add_referenced_var (*arg_struct); nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load"); - add_referenced_var(nvar); + add_referenced_var (nvar); *new_arg_struct = make_ssa_name (nvar, NULL_TREE); /* We should mark *arg_struct call clobbered. However, this means @@ -633,209 +603,12 @@ separate_decls_in_loop (struct loop *loop, tree *per_thread, clsn_data.load = *new_arg_struct; clsn_data.store_bb = bb0; clsn_data.load_bb = bb1; - htab_traverse (*name_copies, create_loads_and_stores_for_name, + htab_traverse (name_copies, create_loads_and_stores_for_name, &clsn_data); } htab_delete (decl_copies); -} - -/* Replaces initial values of induction variables in LOOP to the start of the - PER_THREAD * (thread number - 1)-th iteration. STEPS is the hashtable that - contains steps of each induction variable. */ - -static void -shift_ivs_for_iteration (struct loop *loop, tree per_thread, htab_t steps) -{ - tree phi, name, step, ncopy, stmts, call, cdecl, type, init, delta, ninit; - edge entry = loop_preheader_edge (loop); - struct int_tree_map tem, *elt; - unsigned ver; - block_stmt_iterator bsi = bsi_last (entry->src); - use_operand_p init_p; - - for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi)) - { - name = PHI_RESULT (phi); - if (!is_gimple_reg (name)) - continue; - - ver = SSA_NAME_VERSION (name); - tem.uid = ver; - elt = htab_find_with_hash (steps, &tem, ver); - step = elt->to; - - init_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, entry); - init = USE_FROM_PTR (init_p); - type = TREE_TYPE (init); - cdecl = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM]; - call = fold_convert (type, build_function_call_expr (cdecl, NULL)); - ncopy = build2 (MINUS_EXPR, type, call, build_int_cst (type, 1)); - delta = fold_build2 (MULT_EXPR, type, unshare_expr (step), per_thread); - delta = fold_build2 (MULT_EXPR, type, delta, ncopy); - ninit = fold_build2 (PLUS_EXPR, type, init, delta); - - ninit = force_gimple_operand (ninit, &stmts, true, NULL_TREE); - if (stmts) - bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING); - - SET_USE (init_p, ninit); - } -} - -/* Records steps of induction variables of LOOP to STEPS. */ - -static void -record_steps (struct loop *loop, htab_t *steps) -{ - tree phi, name; - unsigned ver; - affine_iv iv; - bool ok; - void **slot; - struct int_tree_map tem, *elt; - - *steps = htab_create (10, int_tree_map_hash, int_tree_map_eq, free); - for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi)) - { - name = PHI_RESULT (phi); - if (!is_gimple_reg (name)) - continue; - - ver = SSA_NAME_VERSION (name); - ok = simple_iv (loop, phi, name, &iv, true); - gcc_assert (ok); - - tem.uid = ver; - slot = htab_find_slot_with_hash (*steps, &tem, ver, INSERT); - elt = XNEW (struct int_tree_map); - elt->uid = ver; - elt->to = iv.step; - *slot = elt; - } -} - -/* Remaps ssa names in EXPR according to NAME_COPIES, and returns the updated - expression. */ - -static tree -remap_ssa_names (tree expr, htab_t name_copies) -{ - struct name_to_copy_elt tem, *elt; - unsigned ver; - tree copied; - unsigned i, n; - tree nop; - - if (TREE_CODE (expr) == SSA_NAME) - { - ver = SSA_NAME_VERSION (expr); - tem.version = ver; - elt = htab_find_with_hash (name_copies, &tem, ver); - - return elt->new_name; - } - - if (!EXPR_P (expr)) - return unshare_expr (expr); - - copied = copy_node (expr); - n = TREE_CODE_LENGTH (TREE_CODE (expr)); - - for (i = 0; i < n; i++) - { - nop = remap_ssa_names (TREE_OPERAND (expr, i), name_copies); - TREE_OPERAND (copied, i) = nop; - } - - return copied; -} - -/* Records steps for induction variables of NLOOP in STEPS, and remap the steps - of induction variables in LOOP according to NAME_COPIES. */ - -static void -record_and_update_steps (struct loop *loop, struct loop *nloop, htab_t steps, - htab_t name_copies) -{ - tree phi, nphi, name, nname; - unsigned ver, nver; - void **slot; - struct int_tree_map tem, *elt, *nelt; - - /* The corresponding phi nodes in LOOP and in NLOOP should be in the same - order. */ - for (phi = phi_nodes (loop->header), nphi = phi_nodes (nloop->header); phi; - phi = PHI_CHAIN (phi), nphi = PHI_CHAIN (nphi)) - { - name = PHI_RESULT (phi); - nname = PHI_RESULT (nphi); - - if (!is_gimple_reg (name)) - { - gcc_assert (!is_gimple_reg (nname)); - continue; - } - gcc_assert (is_gimple_reg (nname)); - ver = SSA_NAME_VERSION (name); - nver = SSA_NAME_VERSION (nname); - - /* First copy the values from LOOP to NLOOP. */ - tem.uid = ver; - elt = htab_find_with_hash (steps, &tem, ver); - tem.uid = nver; - slot = htab_find_slot_with_hash (steps, &tem, nver, INSERT); - nelt = XNEW (struct int_tree_map); - nelt->uid = nver; - nelt->to = unshare_expr (elt->to); - *slot = nelt; - - /* Then rewrite the old ones. */ - elt->to = remap_ssa_names (elt->to, name_copies); - } - gcc_assert (nphi == NULL_TREE); -} - -/* Change the exit condition of LOOP so that it exits after PER_THREAD - iterations, and remove the old exit. */ - -static void -change_exit_condition (struct loop *loop, tree per_thread) -{ - basic_block ex_bb = loop->single_exit->src; - basic_block ex_to = loop->single_exit->dest; - block_stmt_iterator bsi = bsi_last (ex_bb); - edge in_loop, e; - tree exit_stmt = bsi_stmt (bsi); - tree type = TREE_TYPE (per_thread); - tree iv, cond; - - gcc_assert (TREE_CODE (exit_stmt) == COND_EXPR); - bsi_remove (&bsi, true); - - in_loop = EDGE_SUCC (ex_bb, 0); - if (in_loop == loop->single_exit) - in_loop = EDGE_SUCC (ex_bb, 1); - - remove_edge (loop->single_exit); - in_loop->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); - in_loop->flags |= EDGE_FALLTHRU; - - e = single_pred_edge (loop_split_edge_with (loop_latch_edge (loop), NULL)); - bsi = bsi_last (e->src); - create_iv (build_int_cst (type, 0), build_int_cst (type, 1), NULL_TREE, - loop, &bsi, true, NULL, &iv); - cond = build3 (COND_EXPR, void_type_node, - fold_build2 (LT_EXPR, boolean_type_node, iv, per_thread), - build1 (GOTO_EXPR, void_type_node, - tree_block_label (e->dest)), - build1 (GOTO_EXPR, void_type_node, - tree_block_label (ex_to))); - bsi_insert_after (&bsi, cond, BSI_NEW_STMT); - - e->flags &= ~EDGE_FALLTHRU; - e->flags |= EDGE_TRUE_VALUE; - loop->single_exit = make_edge (e->src, ex_to, EDGE_FALSE_VALUE); + htab_delete (name_copies); } /* Bitmap containing uids of functions created by parallelization. We cannot @@ -910,182 +683,285 @@ create_loop_fn (void) return decl; } -/* Extracts LOOP and its preheader to a separate function. This function is - returned in LOOP_FN. ARG_STRUCT is initialized in the new function from - an argument of the function. The single basic block that replaces LOOP is - returned. */ +/* Bases all the induction variables in LOOP on a single induction variable + (unsigned with base 0 and step 1), whose final value is compared with + NIT. The induction variable is incremented in the loop latch. */ -static basic_block -extract_loop_to_function (struct loop *loop, tree arg_struct, tree *loop_fn) +static void +canonicalize_loop_ivs (struct loop *loop, tree nit) { - basic_block bb_to = loop_split_edge_with (loop->single_exit, NULL); - basic_block bb_from = loop_preheader_edge (loop)->src; - basic_block repl_bb; - tree arg, narg, stmt; - struct function *act_cfun = cfun; - tree act_decl = current_function_decl; + unsigned precision = TYPE_PRECISION (TREE_TYPE (nit)); + tree phi, prev, res, type, var_before, val, atype, t, next; block_stmt_iterator bsi; - basic_block *body = get_loop_body (loop); - struct loop *outer = loop->outer; - unsigned i, n = loop->num_nodes; + bool ok; + affine_iv iv; + edge exit = single_dom_exit (loop); - cancel_loop_tree (current_loops, loop); - for (i = 0; i < n; i++) - remove_bb_from_loops (body[i]); - free (body); - remove_bb_from_loops (bb_from); - remove_bb_from_loops (bb_to); + for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi)) + { + res = PHI_RESULT (phi); - bsi = bsi_last (bb_to); - bsi_insert_after (&bsi, build1 (RETURN_EXPR, void_type_node, NULL), - BSI_NEW_STMT); + if (is_gimple_reg (res) + && TYPE_PRECISION (TREE_TYPE (res)) > precision) + precision = TYPE_PRECISION (TREE_TYPE (res)); + } - *loop_fn = create_loop_fn (); - repl_bb = move_sese_region_to_fn (DECL_STRUCT_FUNCTION (*loop_fn), - bb_from, bb_to); - add_bb_to_loop (repl_bb, outer); + type = lang_hooks.types.type_for_size (precision, 1); - cfun = DECL_STRUCT_FUNCTION (*loop_fn); - current_function_decl = *loop_fn; + bsi = bsi_last (loop->latch); + create_iv (build_int_cst_type (type, 0), build_int_cst (type, 1), NULL_TREE, + loop, &bsi, true, &var_before, NULL); - /* Initialize the arg_struct. */ - if (arg_struct) + bsi = bsi_after_labels (loop->header); + prev = NULL; + for (phi = phi_nodes (loop->header); phi; phi = next) { - arg = DECL_ARGUMENTS (*loop_fn); - add_referenced_var (arg); - narg = make_ssa_name (arg, build_empty_stmt ()); - set_default_def (arg, narg); - - bsi = bsi_after_labels (single_succ (ENTRY_BLOCK_PTR)); - stmt = build2 (MODIFY_EXPR, void_type_node, arg_struct, - fold_convert (TREE_TYPE (arg_struct), narg)); - SSA_NAME_DEF_STMT (arg_struct) = stmt; - bsi_insert_before (&bsi, stmt, BSI_NEW_STMT); + next = PHI_CHAIN (phi); + res = PHI_RESULT (phi); + + if (!is_gimple_reg (res) + || res == var_before) + { + prev = phi; + continue; + } + + ok = simple_iv (loop, phi, res, &iv, true); + gcc_assert (ok); + + SET_PHI_RESULT (phi, NULL_TREE); + remove_phi_node (phi, prev); + + atype = TREE_TYPE (res); + val = fold_build2 (PLUS_EXPR, atype, + unshare_expr (iv.base), + fold_build2 (MULT_EXPR, atype, + unshare_expr (iv.step), + fold_convert (atype, var_before))); + val = force_gimple_operand_bsi (&bsi, val, false, NULL_TREE, true, + BSI_SAME_STMT); + t = build2 (MODIFY_EXPR, void_type_node, res, val); + bsi_insert_before (&bsi, t, BSI_SAME_STMT); + SSA_NAME_DEF_STMT (res) = t; } - cfun = act_cfun; - current_function_decl = act_decl; - go_out_of_ssa (*loop_fn); + t = last_stmt (exit->src); + /* Make the loop exit if the control condition is not satisfied. */ + if (exit->flags & EDGE_TRUE_VALUE) + { + edge te, fe; - return repl_bb; + extract_true_false_edges_from_block (exit->src, &te, &fe); + te->flags = EDGE_FALSE_VALUE; + fe->flags = EDGE_TRUE_VALUE; + } + COND_EXPR_COND (t) = build2 (LT_EXPR, boolean_type_node, var_before, nit); } -/* Emits the code to call LOOP_FN with argument ARG in N_THREADS threads - after BSI. */ +/* Moves the exit condition of LOOP to the beginning of its header, and + duplicates the part of the last iteration that gets disabled to the + exit of the loop. NIT is the number of iterations of the loop + (used to initialize the variables in the duplicated part). + + TODO: the common case is that latch of the loop is empty and immediatelly + follows the loop exit. In this case, it would be better not to copy the + body of the loop, but rather increase the number of iterations of the loop + by one. This may need some additional preconditioning in case NIT = ~0. */ static void -call_in_parallel (block_stmt_iterator bsi, tree loop_fn, tree arg, - unsigned n_threads) +transform_to_exit_first_loop (struct loop *loop, tree nit) { - tree start_decl = built_in_decls[BUILT_IN_GOMP_PARALLEL_START]; - tree args, stmts, adata; + basic_block *bbs, *nbbs, ex_bb, orig_header; + unsigned n; + bool ok; + edge exit = single_dom_exit (loop), hpred; + tree phi, nphi, cond, control, control_name, res, t, cond_stmt; + block_stmt_iterator bsi; + + split_block_after_labels (loop->header); + orig_header = single_succ (loop->header); + hpred = single_succ_edge (loop->header); + add_bb_to_loop (orig_header, loop); - args = tree_cons (NULL, build_int_cst (unsigned_type_node, n_threads), - NULL); - if (arg) + cond_stmt = last_stmt (exit->src); + cond = COND_EXPR_COND (cond_stmt); + control = TREE_OPERAND (cond, 0); + gcc_assert (TREE_OPERAND (cond, 1) == nit); + + /* Make sure that we have phi nodes on exit for all loop header phis + (create_parallel_loop requires that). */ + for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi)) { - adata = build_fold_addr_expr (arg); - mark_call_clobbered (arg, ESCAPE_TO_CALL); - } - else - adata = null_pointer_node; - args = tree_cons (NULL, adata, args); - args = tree_cons (NULL, build_fold_addr_expr (loop_fn), args); - force_gimple_operand (build_function_call_expr (start_decl, args), - &stmts, false, NULL_TREE); - mark_virtual_ops_for_renaming_list (stmts); - bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING); -} + res = PHI_RESULT (phi); + t = make_ssa_name (SSA_NAME_VAR (res), phi); + SET_PHI_RESULT (phi, t); -/* Makes the induction variables in LOOP start at - (N_THREADS - 1) * PER_THREAD-th iteration when the LOOP is entered through - NEW_ENTRY. STEPS describe the steps of induction variables in LOOP. */ + nphi = create_phi_node (res, orig_header); + SSA_NAME_DEF_STMT (res) = nphi; + add_phi_arg (nphi, t, hpred); -static void -shift_ivs_for_remaining_iterations (struct loop *loop, tree per_thread, - unsigned n_threads, edge new_entry, - htab_t steps) -{ - tree phi, name, nphi; - unsigned ver; - struct int_tree_map tem, *elt; - basic_block bb = new_entry->dest; - edge old_entry, entry; - tree stmts, new_init, init, pass, type, delta; - use_operand_p init_p; - - old_entry = EDGE_PRED (bb, 0); - if (old_entry == new_entry) - old_entry = EDGE_PRED (bb, 1); - entry = loop_preheader_edge (loop); + if (res == control) + { + TREE_OPERAND (cond, 0) = t; + update_stmt (cond_stmt); + control = t; + } + } - for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi)) + bbs = get_loop_body_in_dom_order (loop); + for (n = 0; bbs[n] != exit->src; n++) + continue; + nbbs = XNEWVEC (basic_block, n); + ok = tree_duplicate_sese_tail (single_succ_edge (loop->header), exit, + bbs + 1, n, nbbs); + gcc_assert (ok); + free (bbs); + ex_bb = nbbs[0]; + free (nbbs); + + /* The only gimple reg that should be copied out of the loop is the + control variable. */ + control_name = NULL_TREE; + for (phi = phi_nodes (ex_bb); phi; phi = PHI_CHAIN (phi)) { - name = PHI_RESULT (phi); - if (!is_gimple_reg (name)) + res = PHI_RESULT (phi); + if (!is_gimple_reg (res)) continue; - ver = SSA_NAME_VERSION (name); - tem.uid = ver; - elt = htab_find_with_hash (steps, &tem, ver); + gcc_assert (control_name == NULL_TREE + && SSA_NAME_VAR (res) == SSA_NAME_VAR (control)); + control_name = res; + } + gcc_assert (control_name != NULL_TREE); + phi = SSA_NAME_DEF_STMT (control_name); + SET_PHI_RESULT (phi, NULL_TREE); + remove_phi_node (phi, NULL_TREE); + + /* Initialize the control variable to NIT. */ + bsi = bsi_after_labels (ex_bb); + t = build2 (MODIFY_EXPR, void_type_node, control_name, nit); + bsi_insert_before (&bsi, t, BSI_NEW_STMT); + SSA_NAME_DEF_STMT (control_name) = t; +} - nphi = create_phi_node (SSA_NAME_VAR (name), bb); - pass = PHI_RESULT (nphi); +/* Create the parallel constructs for LOOP as described in gen_parallel_loop. + LOOP_FN and DATA are the arguments of OMP_PARALLEL. + NEW_DATA is the variable that should be initialized from the argument + of LOOP_FN. N_THREADS is the requested number of threads. Returns the + basic block containing OMP_PARALLEL tree. */ - init_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, entry); - init = USE_FROM_PTR (init_p); - SET_USE (init_p, pass); +static basic_block +create_parallel_loop (struct loop *loop, tree loop_fn, tree data, + tree new_data, unsigned n_threads) +{ + block_stmt_iterator bsi; + basic_block bb, paral_bb, for_bb, ex_bb; + tree t, param, res; + tree cvar, cvar_init, initvar, cvar_next, cvar_base, cond, phi, type; + edge exit, nexit, guard, end, e; + + /* Prepare the OMP_PARALLEL statement. */ + bb = loop_preheader_edge (loop)->src; + paral_bb = single_pred (bb); + bsi = bsi_last (paral_bb); + + t = build_omp_clause (OMP_CLAUSE_NUM_THREADS); + OMP_CLAUSE_NUM_THREADS_EXPR (t) + = build_int_cst (integer_type_node, n_threads); + t = build4 (OMP_PARALLEL, void_type_node, NULL_TREE, t, + loop_fn, data); + + bsi_insert_after (&bsi, t, BSI_NEW_STMT); + + /* Initialize NEW_DATA. */ + if (data) + { + bsi = bsi_after_labels (bb); - add_phi_arg (nphi, init, old_entry); - type = TREE_TYPE (name); - delta = fold_build2 (MULT_EXPR, type, - build_int_cst (type, n_threads - 1), - per_thread); - delta = fold_build2 (MULT_EXPR, type, delta, unshare_expr (elt->to)); - new_init = fold_build2 (PLUS_EXPR, type, init, delta); + param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL_TREE); + t = build2 (MODIFY_EXPR, void_type_node, param, build_fold_addr_expr (data)); + bsi_insert_before (&bsi, t, BSI_SAME_STMT); + SSA_NAME_DEF_STMT (param) = t; - new_init = force_gimple_operand (new_init, &stmts, true, NULL_TREE); - if (stmts) - bsi_insert_on_edge_immediate_loop (new_entry, stmts); - add_phi_arg (nphi, new_init, new_entry); + t = build2 (MODIFY_EXPR, void_type_node, new_data, + fold_convert (TREE_TYPE (new_data), param)); + bsi_insert_before (&bsi, t, BSI_SAME_STMT); + SSA_NAME_DEF_STMT (new_data) = t; } -} -/* If PAR is true, emit the code on E to finalize the threads. */ + /* Emit OMP_RETURN for OMP_PARALLEL. */ + bb = split_loop_exit_edge (single_dom_exit (loop)); + bsi = bsi_last (bb); + bsi_insert_after (&bsi, make_node (OMP_RETURN), BSI_NEW_STMT); + + /* Extract data for OMP_FOR. */ + gcc_assert (loop->header == single_dom_exit (loop)->src); + cond = COND_EXPR_COND (last_stmt (loop->header)); + + cvar = TREE_OPERAND (cond, 0); + cvar_base = SSA_NAME_VAR (cvar); + phi = SSA_NAME_DEF_STMT (cvar); + cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); + initvar = make_ssa_name (cvar_base, NULL_TREE); + SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)), + initvar); + cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); + + bsi = bsi_last (loop->latch); + gcc_assert (bsi_stmt (bsi) == SSA_NAME_DEF_STMT (cvar_next)); + bsi_remove (&bsi, true); -static void -finalize_threads (edge e, tree par) -{ - basic_block cond_bb = loop_split_edge_with (e, NULL); - basic_block fin_bb = loop_split_edge_with (single_succ_edge (cond_bb), NULL); - basic_block cont_bb = single_succ (fin_bb); - block_stmt_iterator bsi = bsi_last (cond_bb); - tree decl = built_in_decls[BUILT_IN_GOMP_PARALLEL_END]; - tree stmt = build_function_call_expr (decl, NULL); - edge te, fe, be; - tree phi; + /* Prepare cfg. */ + for_bb = loop_split_edge_with (loop_preheader_edge (loop), NULL); + ex_bb = split_loop_exit_edge (single_dom_exit (loop)); + extract_true_false_edges_from_block (loop->header, &nexit, &exit); + gcc_assert (exit == single_dom_exit (loop)); - bsi_insert_after (&bsi, - build3 (COND_EXPR, void_type_node, - par, - build1 (GOTO_EXPR, void_type_node, - tree_block_label (fin_bb)), - build1 (GOTO_EXPR, void_type_node, - tree_block_label (cont_bb))), - BSI_NEW_STMT); - te = single_succ_edge (cond_bb); - te->flags = EDGE_TRUE_VALUE; - be = single_succ_edge (fin_bb); - fe = make_edge (cond_bb, cont_bb, EDGE_FALSE_VALUE); - - for (phi = phi_nodes (cont_bb); phi != NULL_TREE; phi = PHI_CHAIN (phi)) - add_phi_arg (phi, PHI_ARG_DEF_FROM_EDGE (phi, be), fe); - - set_immediate_dominator (CDI_DOMINATORS, cont_bb, - recount_dominator (CDI_DOMINATORS, cont_bb)); - bsi = bsi_last (fin_bb); - mark_virtual_ops_for_renaming (stmt); - bsi_insert_after (&bsi, stmt, BSI_NEW_STMT); + guard = make_edge (for_bb, ex_bb, 0); + single_succ_edge (loop->latch)->flags = 0; + end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU); + for (phi = phi_nodes (ex_bb); phi; phi = PHI_CHAIN (phi)) + { + res = PHI_RESULT (phi); + gcc_assert (!is_gimple_reg (phi)); + t = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit)); + add_phi_arg (phi, PHI_ARG_DEF_FROM_EDGE (t, loop_preheader_edge (loop)), + guard); + add_phi_arg (phi, PHI_ARG_DEF_FROM_EDGE (t, loop_latch_edge (loop)), + end); + } + e = redirect_edge_and_branch (exit, nexit->dest); + PENDING_STMT (e) = NULL; + + /* Emit OMP_FOR. */ + TREE_OPERAND (cond, 0) = cvar_base; + type = TREE_TYPE (cvar); + t = build_omp_clause (OMP_CLAUSE_SCHEDULE); + OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC; + t = build6 (OMP_FOR, void_type_node, NULL_TREE, t, + build2 (MODIFY_EXPR, void_type_node, initvar, cvar_init), + cond, + build2 (MODIFY_EXPR, void_type_node, + cvar_base, + build2 (PLUS_EXPR, type, + cvar_base, + build_int_cst (type, 1))), + NULL_TREE); + bsi = bsi_last (for_bb); + bsi_insert_after (&bsi, t, BSI_NEW_STMT); + SSA_NAME_DEF_STMT (initvar) = t; + + /* Emit OMP_CONTINUE. */ + bsi = bsi_last (loop->latch); + t = build2 (OMP_CONTINUE, void_type_node, cvar_next, cvar); + bsi_insert_after (&bsi, t, BSI_NEW_STMT); + SSA_NAME_DEF_STMT (cvar_next) = t; + + /* Emit OMP_RETURN for OMP_FOR. */ + bsi = bsi_last (ex_bb); + bsi_insert_after (&bsi, make_node (OMP_RETURN), BSI_NEW_STMT); + + return paral_bb; } /* Generates code to execute the iterations of LOOP in N_THREADS threads in @@ -1096,11 +972,9 @@ gen_parallel_loop (struct loop *loop, unsigned n_threads, struct tree_niter_desc *niter) { struct loop *nloop; - tree many_iterations_cond, type, per_thread, new_per_thread; - tree stmts, arg_struct, new_arg_struct, loop_fn, par_flag, phi; - basic_block repl_bb, npre; - htab_t steps, name_copies; - edge orig_entry; + tree many_iterations_cond, type, nit; + tree stmts, arg_struct, new_arg_struct; + basic_block parallel_head; /* From @@ -1119,20 +993,24 @@ gen_parallel_loop (struct loop *loop, unsigned n_threads, we generate the following code: --------------------------------------------------------------------- - parallelized = false; if (MAY_BE_ZERO || NITER < MIN_PER_THREAD * N_THREADS) - goto rest; - - per_thread = NITER / N_THREADS; - store all local loop-invariant variables (including per_thread in case it - is not a constant) used in body of the loop to DATA. - __builtin_GOMP_parallel_start (loop_fn, &DATA, N_THREADS); - INIT += STEP * per_thread * (N_THREADS - 1); - parallelized = true; - - rest: + goto original; + + BODY1; + store all local loop-invariant variables used in body of the loop to DATA. + OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA); + load the variables from DATA. + OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static)) + BODY2; + BODY1; + OMP_CONTINUE; + OMP_RETURN -- OMP_FOR + OMP_RETURN -- OMP_PARALLEL + goto end; + + original: loop { IV = phi (INIT, IV + STEP) @@ -1142,53 +1020,31 @@ gen_parallel_loop (struct loop *loop, unsigned n_threads, BODY2; } - if (parallelized) - __builtin_GOMP_parallel_end (); - --------------------------------------------------------------------- - - With the function + end: - --------------------------------------------------------------------- - void loop_fn (void *data) - { - load all local loop-invariant variables used in body of the loop - from data - INIT += STEP * per_thread * (thread_id - 1); - - loop - { - IV = phi (INIT, IV + STEP) - NEW_CTR = phi (0, NEW_CTR'); - BODY1; - BODY2; - NEW_CTR' = NEW_CTR + 1; - if (NEW_CTR' == per_thread) - return; - } - } - --------------------------------------------------------------------- */ - /* Record the steps of the induction variables. */ - record_steps (loop, &steps); - /* Create two versions of the loop -- in the old one, we know that the number of iterations is large enough, and we will transform it into the loop that will be split to loop_fn, the new one will be used for the remaining iterations. */ type = TREE_TYPE (niter->niter); + nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true, + NULL_TREE); + if (stmts) + bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts); + many_iterations_cond = fold_build2 (GE_EXPR, boolean_type_node, - niter->niter, - build_int_cst (type, MIN_PER_THREAD * n_threads)); + nit, build_int_cst (type, MIN_PER_THREAD * n_threads)); many_iterations_cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, - invert_truthvalue (niter->may_be_zero), + invert_truthvalue (unshare_expr (niter->may_be_zero)), many_iterations_cond); many_iterations_cond - = force_gimple_operand (unshare_expr (many_iterations_cond), - &stmts, false, NULL_TREE); + = force_gimple_operand (many_iterations_cond, &stmts, + false, NULL_TREE); if (stmts) bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts); if (!is_gimple_condexpr (many_iterations_cond)) @@ -1205,74 +1061,34 @@ gen_parallel_loop (struct loop *loop, unsigned n_threads, update_ssa (TODO_update_ssa); free_original_copy_tables (); - /* Compute number of iterations per thread. */ - per_thread = fold_build2 (FLOOR_DIV_EXPR, type, niter->niter, - build_int_cst (type, n_threads)); - per_thread = force_gimple_operand (unshare_expr (per_thread), &stmts, true, - NULL_TREE); - if (stmts) - bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts); + /* Base all the induction variables in LOOP on a single control one. */ + canonicalize_loop_ivs (loop, nit); + + /* Ensure that the exit condition is the first statement in the loop. */ + transform_to_exit_first_loop (loop, nit); /* Eliminate the references to local variables from the loop. */ eliminate_local_variables (loop); /* In the old loop, move all variables non-local to the loop to a structure and back, and create separate decls for the variables used in loop. */ - new_per_thread = per_thread; - separate_decls_in_loop (loop, &new_per_thread, &arg_struct, &new_arg_struct, - &name_copies); - - /* Record the steps of induction variables in the copy, and remap those - in the original loop. */ - record_and_update_steps (loop, nloop, steps, name_copies); - - /* Update the initial values of induction variables. */ - shift_ivs_for_iteration (loop, new_per_thread, steps); + separate_decls_in_loop (loop, &arg_struct, &new_arg_struct); - /* Add the new exit condition. */ - change_exit_condition (loop, new_per_thread); + /* Create the parallel constructs. */ + parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct, + new_arg_struct, n_threads); - /* Split the loop to the new function. */ - repl_bb = extract_loop_to_function (loop, new_arg_struct, &loop_fn); - cgraph_add_new_function (loop_fn); - - /* Call the builtin to run the function in several threads. */ - call_in_parallel (bsi_last (repl_bb), loop_fn, arg_struct, n_threads); - - /* Redirect the exit edge to the versioned copy, and set the parallelized - flag. */ - par_flag = create_tmp_var (boolean_type_node, "parallelized"); - add_referenced_var (par_flag); - - npre = loop_preheader_edge (nloop)->src; - gcc_assert (phi_nodes (npre) == NULL); - - orig_entry = single_pred_edge (npre); - redirect_edge_and_branch (single_succ_edge (repl_bb), - loop_preheader_edge (nloop)->src); - phi = create_phi_node (par_flag, npre); - add_phi_arg (phi, boolean_false_node, orig_entry); - add_phi_arg (phi, boolean_true_node, single_succ_edge (repl_bb)); - par_flag = PHI_RESULT (phi); - - /* Update the initial values of induction variables in the versioned - copy. */ - shift_ivs_for_remaining_iterations (nloop, per_thread, n_threads, - single_succ_edge (repl_bb), steps); - - /* Finalize the threads after the loop if necessary. */ - finalize_threads (nloop->single_exit, par_flag); - - htab_delete (name_copies); - htab_delete (steps); scev_reset (); - /* We created a new call clobbered variable. This means every call in the - function gets a new virtual operand. However, we cannot rerun alias - analysis after vectorizer (or at least, passes.c claims so), thus we - must mark them for renaming manually. */ - mark_call_virtual_operands (); - update_ssa (TODO_update_ssa_only_virtuals); + /* Cancel the loop (it is simpler to do it here rather than to teach the + expander to do it). */ + cancel_loop_tree (current_loops, loop); + + /* Expand the parallel constructs. We do it directly here instead of running + a separate expand_omp pass, since it is more efficient, and less likely to + cause troubles with further analyses not being able to deal with the + OMP trees. */ + omp_expand_local (parallel_head); } /* Detect parallel loops and generate parallel code using libgomp diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c index 0b29c5ee8b4..f20be11630e 100644 --- a/gcc/tree-ssa-loop-manip.c +++ b/gcc/tree-ssa-loop-manip.c @@ -439,9 +439,9 @@ verify_loop_closed_ssa (void) } /* Split loop exit edge EXIT. The things are a bit complicated by a need to - preserve the loop closed ssa form. */ + preserve the loop closed ssa form. The new block is returned. */ -void +basic_block split_loop_exit_edge (edge exit) { basic_block dest = exit->dest; @@ -468,6 +468,8 @@ split_loop_exit_edge (edge exit) add_phi_arg (new_phi, name, exit); SET_USE (op_p, new_name); } + + return bb; } /* Insert statement STMT to the edge E and update the loop structures. diff --git a/gcc/tree.c b/gcc/tree.c index cfbfe1462b6..4b5103b9cda 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -3095,6 +3095,35 @@ build5_stat (enum tree_code code, tree tt, tree arg0, tree arg1, } tree +build6_stat (enum tree_code code, tree tt, tree arg0, tree arg1, + tree arg2, tree arg3, tree arg4, tree arg5 MEM_STAT_DECL) +{ + bool constant, read_only, side_effects, invariant; + tree t; + + gcc_assert (TREE_CODE_LENGTH (code) == 6); + + t = make_node_stat (code PASS_MEM_STAT); + TREE_TYPE (t) = tt; + + side_effects = TREE_SIDE_EFFECTS (t); + + PROCESS_ARG(0); + PROCESS_ARG(1); + PROCESS_ARG(2); + PROCESS_ARG(3); + PROCESS_ARG(4); + PROCESS_ARG(5); + + TREE_SIDE_EFFECTS (t) = side_effects; + TREE_THIS_VOLATILE (t) + = (TREE_CODE_CLASS (code) == tcc_reference + && arg0 && TREE_THIS_VOLATILE (arg0)); + + return t; +} + +tree build7_stat (enum tree_code code, tree tt, tree arg0, tree arg1, tree arg2, tree arg3, tree arg4, tree arg5, tree arg6 MEM_STAT_DECL) diff --git a/gcc/tree.h b/gcc/tree.h index 9da85d74292..13f0decb91f 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -3532,6 +3532,9 @@ extern tree build4_stat (enum tree_code, tree, tree, tree, tree, extern tree build5_stat (enum tree_code, tree, tree, tree, tree, tree, tree MEM_STAT_DECL); #define build5(c,t1,t2,t3,t4,t5,t6) build5_stat (c,t1,t2,t3,t4,t5,t6 MEM_STAT_INFO) +extern tree build6_stat (enum tree_code, tree, tree, tree, tree, tree, tree, + tree MEM_STAT_DECL); +#define build6(c,t1,t2,t3,t4,t5,t6,t7) build6_stat (c,t1,t2,t3,t4,t5,t6, t7 MEM_STAT_INFO) extern tree build7_stat (enum tree_code, tree, tree, tree, tree, tree, tree, tree, tree MEM_STAT_DECL); #define build7(c,t1,t2,t3,t4,t5,t6,t7,t8) \ |