diff options
Diffstat (limited to 'gcc/tree-parloops.c')
-rw-r--r-- | gcc/tree-parloops.c | 38 |
1 files changed, 29 insertions, 9 deletions
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c index 4e9b102973a..8d4b3a5524c 100644 --- a/gcc/tree-parloops.c +++ b/gcc/tree-parloops.c @@ -1156,7 +1156,7 @@ create_loads_and_stores_for_name (void **slot, void *data) static void separate_decls_in_region (edge entry, edge exit, htab_t reduction_list, tree *arg_struct, tree *new_arg_struct, - struct clsn_data *ld_st_data) + struct clsn_data *ld_st_data, unsigned new_target) { basic_block bb1 = split_edge (entry); @@ -1220,7 +1220,10 @@ separate_decls_in_region (edge entry, edge exit, htab_t reduction_list, /* Create the loads and stores. */ *arg_struct = create_tmp_var (type, ".paral_data_store"); add_referenced_var (*arg_struct); - nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load"); + nvar = create_tmp_var (build_pointer_type_for_mode + (type, *targetm_array[new_target]->ptr_mode, + false), + ".paral_data_load"); add_referenced_var (nvar); *new_arg_struct = make_ssa_name (nvar, NULL); @@ -1270,7 +1273,7 @@ parallelized_function_p (tree fn) a parallelized loop. */ static tree -create_loop_fn (void) +create_loop_fn (unsigned int target_arch) { char buf[100]; char *tname; @@ -1312,6 +1315,15 @@ create_loop_fn (void) TREE_USED (t) = 1; DECL_ARGUMENTS (decl) = t; + if (target_arch) + { + const char *target_name = targetm_array[target_arch]->name; + + tree value = build_string (strlen (target_name), target_name); + decl_attributes (&decl, build_tree_list (get_identifier ("target_arch"), + build_tree_list (NULL, value)), + 0); + } allocate_struct_function (decl, false); /* The call to allocate_struct_function clobbers CFUN, so we need to restore @@ -1791,11 +1803,15 @@ gen_parallel_loop (struct loop *loop, htab_t reduction_list, /* In the old loop, move all variables non-local to the loop to a structure and back, and create separate decls for the variables used in loop. */ separate_decls_in_region (entry, exit, reduction_list, &arg_struct, - &new_arg_struct, &clsn_data); + &new_arg_struct, &clsn_data, loop->target_arch); /* Create the parallel constructs. */ - parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct, - new_arg_struct, n_threads); + parallel_head + = create_parallel_loop (loop, create_loop_fn (loop->target_arch), + arg_struct, new_arg_struct, n_threads); + /* ??? for loop->target_arch != cfun->target_arch, should create another + function so that a small slice of the loop can be run on the main + processor. */ if (htab_elements (reduction_list) > 0) create_call_for_reduction (loop, reduction_list, &clsn_data); @@ -1871,9 +1887,13 @@ parallelize_loops (void) /* And of course, the loop must be parallelizable. */ || !can_duplicate_loop_p (loop) || loop_has_blocks_with_irreducible_flag (loop) - /* FIXME: the check for vector phi nodes could be removed. */ - || loop_has_vector_phi_nodes (loop) - || !loop_parallel_p (loop, reduction_list, &niter_desc)) + || (loop->target_arch != cfun->target_arch + ? !number_of_iterations_exit (loop, single_dom_exit (loop), + &niter_desc, false) + /* FIXME: the check for vector phi nodes could be removed. */ + : (loop_has_vector_phi_nodes (loop) + || flag_tree_parallelize_loops <= 1 + || !loop_parallel_p (loop, reduction_list, &niter_desc)))) continue; changed = true; |