aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Pop <sebastian.pop@amd.com>2008-01-27 18:19:34 +0000
committerSebastian Pop <sebastian.pop@amd.com>2008-01-27 18:19:34 +0000
commit10a5ee300551b390c959db457e6cb2d1c2f384d4 (patch)
tree3638121622d1ef7643a63e8365ba44a688612103
parentc1322efc2c1a7778c357dc4c6399c72daca9c4f9 (diff)
2008-01-27 Sebastian Pop <sebastian.pop@amd.com>
* tree-loop-distribution.c (generate_memset_zero, generate_builtin, generate_code_for_partition, rdg_flag_all_uses): New. (rdg_flag_uses): Gather in the same partition the statements defining the VUSES of the current statement. (rdg_flag_similar_stores): Renamed rdg_flag_similar_memory_accesses. Gather in the same partition not only the stores to the same memory access, but also the reads. (ldist_generate_loops): Renamed ldist_gen. git-svn-id: https://gcc.gnu.org/svn/gcc/branches/graphite@131886 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog.graphite11
-rw-r--r--gcc/tree-loop-distribution.c343
2 files changed, 309 insertions, 45 deletions
diff --git a/gcc/ChangeLog.graphite b/gcc/ChangeLog.graphite
index 6e27db0bb7b..59bac188cbf 100644
--- a/gcc/ChangeLog.graphite
+++ b/gcc/ChangeLog.graphite
@@ -1,3 +1,14 @@
+2008-01-27 Sebastian Pop <sebastian.pop@amd.com>
+
+ * tree-loop-distribution.c (generate_memset_zero,
+ generate_builtin, generate_code_for_partition, rdg_flag_all_uses): New.
+ (rdg_flag_uses): Gather in the same partition the statements defining
+ the VUSES of the current statement.
+ (rdg_flag_similar_stores): Renamed rdg_flag_similar_memory_accesses.
+ Gather in the same partition not only the stores to the same memory
+ access, but also the reads.
+ (ldist_generate_loops): Renamed ldist_gen.
+
2008-01-24 Sebastian Pop <sebastian.pop@amd.com>
Tobias Grosser <grosser@fmi.uni-passau.de>
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 488c6fc6006..41bb6682cd6 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -1,24 +1,23 @@
-/* Loop Distribution
- Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+/* Loop distribution.
+ Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
and Sebastian Pop <sebastian.pop@amd.com>.
This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
-
+
You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING. If not, write to the Free
-Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301, USA. */
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
/* This pass performs loop distribution: for example, the loop
@@ -171,7 +170,7 @@ create_bb_after_loop (struct loop *loop)
static bool
generate_loops_for_partition (struct loop *loop, bitmap partition, bool copy_p)
{
- unsigned i, x = 0;
+ unsigned i, x;
block_stmt_iterator bsi;
basic_block *bbs;
@@ -190,7 +189,7 @@ generate_loops_for_partition (struct loop *loop, bitmap partition, bool copy_p)
stmts_from_loop. */
bbs = get_loop_body_in_dom_order (loop);
- for (i = 0; i < loop->num_nodes; i++)
+ for (x = 0, i = 0; i < loop->num_nodes; i++)
{
basic_block bb = bbs[i];
tree phi, prev = NULL_TREE, next;
@@ -222,6 +221,197 @@ generate_loops_for_partition (struct loop *loop, bitmap partition, bool copy_p)
return true;
}
+/* Generate a call to memset. Return true when the operation succeeded. */
+
+static bool
+generate_memset_zero (tree stmt, tree op0, tree nb_iter,
+ block_stmt_iterator bsi)
+{
+ tree s, t, stmts, nb_bytes, addr_base;
+ bool res = false;
+ tree stmt_list = NULL_TREE;
+ tree args [3];
+ tree fn_call, mem, fndecl, fntype, fn;
+ tree_stmt_iterator i;
+ ssa_op_iter iter;
+ struct data_reference *dr = XCNEW (struct data_reference);
+
+ nb_bytes = fold_build2 (MULT_EXPR, TREE_TYPE (nb_iter),
+ nb_iter, TYPE_SIZE_UNIT (TREE_TYPE (op0)));
+ nb_bytes = force_gimple_operand (nb_bytes, &stmts, true, NULL);
+ append_to_statement_list_force (stmts, &stmt_list);
+
+ DR_STMT (dr) = stmt;
+ DR_REF (dr) = op0;
+ dr_analyze_innermost (dr);
+
+ /* Test for a positive stride, iterating over every element. */
+ if (integer_zerop (fold_build2 (MINUS_EXPR, integer_type_node, DR_STEP (dr),
+ TYPE_SIZE_UNIT (TREE_TYPE (op0)))))
+ addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (DR_BASE_ADDRESS (dr)),
+ DR_BASE_ADDRESS (dr),
+ size_binop (PLUS_EXPR,
+ DR_OFFSET (dr), DR_INIT (dr)));
+
+ /* Test for a negative stride, iterating over every element. */
+ else if (integer_zerop (fold_build2 (PLUS_EXPR, integer_type_node,
+ TYPE_SIZE_UNIT (TREE_TYPE (op0)),
+ DR_STEP (dr))))
+ {
+ addr_base = size_binop (PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
+ addr_base = fold_build2 (MINUS_EXPR, sizetype, addr_base, nb_bytes);
+ addr_base = force_gimple_operand (addr_base, &stmts, true, NULL);
+ append_to_statement_list_force (stmts, &stmt_list);
+
+ addr_base = fold_build2 (POINTER_PLUS_EXPR,
+ TREE_TYPE (DR_BASE_ADDRESS (dr)),
+ DR_BASE_ADDRESS (dr), addr_base);
+ }
+ else
+ goto end;
+
+ mem = force_gimple_operand (addr_base, &stmts, true, NULL);
+ append_to_statement_list_force (stmts, &stmt_list);
+
+
+ fndecl = implicit_built_in_decls [BUILT_IN_MEMSET];
+ fntype = TREE_TYPE (fndecl);
+ fn = build1 (ADDR_EXPR, build_pointer_type (fntype), fndecl);
+
+ args[0] = mem;
+ args[1] = integer_zero_node;
+ args[2] = nb_bytes;
+
+ fn_call = build_call_array (fntype, fn, 3, args);
+ append_to_statement_list_force (fn_call, &stmt_list);
+
+ for (i = tsi_start (stmt_list); !tsi_end_p (i); tsi_next (&i))
+ {
+ s = tsi_stmt (i);
+ update_stmt_if_modified (s);
+
+ FOR_EACH_SSA_TREE_OPERAND (t, s, iter, SSA_OP_VIRTUAL_DEFS)
+ {
+ if (TREE_CODE (t) == SSA_NAME)
+ t = SSA_NAME_VAR (t);
+ mark_sym_for_renaming (t);
+ }
+ }
+
+ /* Mark also the uses of the VDEFS of STMT to be renamed. */
+ FOR_EACH_SSA_TREE_OPERAND (t, stmt, iter, SSA_OP_VIRTUAL_DEFS)
+ {
+ if (TREE_CODE (t) == SSA_NAME)
+ {
+ imm_use_iterator imm_iter;
+
+ FOR_EACH_IMM_USE_STMT (s, imm_iter, t)
+ update_stmt (s);
+
+ t = SSA_NAME_VAR (t);
+ }
+ mark_sym_for_renaming (t);
+ }
+
+ bsi_insert_after (&bsi, stmt_list, BSI_CONTINUE_LINKING);
+ res = true;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "generated memset zero\n");
+
+ if (0)
+ fprintf (stdout, "generated memset zero\n");
+
+ end:
+ free_data_ref (dr);
+ return res;
+}
+
+/* Tries to generate a builtin function for the instructions of LOOP
+ pointed to by the bits set in PARTITION. Returns true when the
+ operation succeeded. */
+
+static bool
+generate_builtin (struct loop *loop, bitmap partition, bool copy_p)
+{
+ bool res = false;
+ unsigned i, x = 0;
+ basic_block *bbs;
+ tree write = NULL_TREE;
+ tree op0, op1;
+ block_stmt_iterator bsi;
+ tree nb_iter = number_of_exit_cond_executions (loop);
+
+ if (!nb_iter || nb_iter == chrec_dont_know)
+ return false;
+
+ bbs = get_loop_body_in_dom_order (loop);
+
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ basic_block bb = bbs[i];
+ tree phi;
+
+ for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
+ x++;
+
+ for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+ {
+ tree stmt = bsi_stmt (bsi);
+
+ if (bitmap_bit_p (partition, x++)
+ && TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
+ && !is_gimple_reg (GIMPLE_STMT_OPERAND (stmt, 0)))
+ {
+ /* Don't generate the builtins when there are more than
+ one memory write. */
+ if (write != NULL)
+ goto end;
+
+ write = stmt;
+ }
+ }
+ }
+
+ if (!write)
+ goto end;
+
+ op0 = GIMPLE_STMT_OPERAND (write, 0);
+ op1 = GIMPLE_STMT_OPERAND (write, 1);
+
+ if (!(TREE_CODE (op0) == ARRAY_REF
+ || TREE_CODE (op0) == INDIRECT_REF))
+ goto end;
+
+ /* The new statements will be placed before LOOP. */
+ bsi = bsi_last (loop_preheader_edge (loop)->src);
+
+ if (integer_zerop (op1) || real_zerop (op1))
+ res = generate_memset_zero (write, op0, nb_iter, bsi);
+
+ /* If this is the last partition for which we generate code, we have
+ to destroy the loop. */
+ if (res && !copy_p)
+ cancel_loop_tree (loop);
+
+ end:
+ free (bbs);
+ return res;
+}
+
+/* Generates code for PARTITION. For simple loops, this function can
+ generate a built-in. */
+
+static bool
+generate_code_for_partition (struct loop *loop, bitmap partition, bool copy_p)
+{
+ if (generate_builtin (loop, partition, copy_p))
+ return true;
+
+ return generate_loops_for_partition (loop, partition, copy_p);
+}
+
+
/* Returns true if the node V of RDG cannot be recomputed. */
static bool
@@ -335,9 +525,30 @@ static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap,
/* Flag all the uses of U. */
static void
+rdg_flag_all_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
+ bitmap processed, bool *part_has_writes)
+{
+ struct graph_edge *e;
+
+ for (e = rdg->vertices[u].succ; e; e = e->succ_next)
+ if (!bitmap_bit_p (processed, e->dest))
+ {
+ rdg_flag_vertex_and_dependent (rdg, e->dest, partition, loops,
+ processed, part_has_writes);
+ rdg_flag_all_uses (rdg, e->dest, partition, loops, processed,
+ part_has_writes);
+ }
+}
+
+/* Flag the uses of U stopping following the information from
+ upstream_mem_writes. */
+
+static void
rdg_flag_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
bitmap processed, bool *part_has_writes)
{
+ ssa_op_iter iter;
+ use_operand_p use_p;
struct vertex *x = &(rdg->vertices[u]);
tree stmt = RDGV_STMT (x);
struct graph_edge *anti_dep = has_anti_dependence (x);
@@ -354,6 +565,24 @@ rdg_flag_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
processed, part_has_writes);
}
+ if (TREE_CODE (stmt) != PHI_NODE)
+ {
+ FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VIRTUAL_USES)
+ {
+ tree use = USE_FROM_PTR (use_p);
+
+ if (TREE_CODE (use) == SSA_NAME)
+ {
+ tree def_stmt = SSA_NAME_DEF_STMT (use);
+ int v = rdg_vertex_for_stmt (rdg, def_stmt);
+
+ if (v >= 0)
+ rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
+ processed, part_has_writes);
+ }
+ }
+ }
+
if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
&& has_upstream_mem_writes (u))
{
@@ -494,32 +723,58 @@ typedef struct rdg_component
DEF_VEC_P (rdgc);
DEF_VEC_ALLOC_P (rdgc, heap);
-/* Remove from OTHER_STORES those vertices that are storing to the
- same arrays as the stores from C. */
+/* Flag all the nodes of RDG containing memory accesses that could
+ potentially belong arrays already accessed in the current
+ PARTITION. */
static void
-rdg_flag_similar_stores (struct graph *rdg, rdgc c, bitmap partition, bitmap loops,
- bitmap processed, VEC (int, heap) **other_stores)
+rdg_flag_similar_memory_accesses (struct graph *rdg, bitmap partition,
+ bitmap loops, bitmap processed,
+ VEC (int, heap) **other_stores)
{
- int i, j, x, v;
-
- if (VEC_length (int, *other_stores) == 0)
- return;
+ bool foo;
+ unsigned i, n;
+ int j, k, kk;
+ bitmap_iterator ii;
+ struct graph_edge *e;
- for (i = 0; VEC_iterate (int, c->vertices, i, v); i++)
- if (RDG_MEM_WRITE_STMT (rdg, v))
- for (j = 0; VEC_iterate (int, *other_stores, j, x); )
- {
- if (rdg_has_similar_memory_accesses (rdg, v, x))
+ EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii)
+ if (RDG_MEM_WRITE_STMT (rdg, i)
+ || RDG_MEM_READS_STMT (rdg, i))
+ {
+ for (j = 0; j < rdg->n_vertices; j++)
+ if (!bitmap_bit_p (processed, j)
+ && (RDG_MEM_WRITE_STMT (rdg, j)
+ || RDG_MEM_READS_STMT (rdg, j))
+ && rdg_has_similar_memory_accesses (rdg, i, j))
{
- bool foo;
- rdg_flag_vertex_and_dependent (rdg, x, partition, loops,
+ /* Flag first the node J itself, and all the nodes that
+ are needed to compute J. */
+ rdg_flag_vertex_and_dependent (rdg, j, partition, loops,
processed, &foo);
- VEC_unordered_remove (int, *other_stores, j);
+
+ /* When J is a read, we want to coalesce in the same
+ PARTITION all the nodes that are using J: this is
+ needed for better cache locality. */
+ rdg_flag_all_uses (rdg, j, partition, loops, processed, &foo);
+
+ /* Remove from OTHER_STORES the vertex that we flagged. */
+ if (RDG_MEM_WRITE_STMT (rdg, j))
+ for (k = 0; VEC_iterate (int, *other_stores, k, kk); k++)
+ if (kk == j)
+ {
+ VEC_unordered_remove (int, *other_stores, k);
+ break;
+ }
}
- else
- j++;
- }
+
+ /* If the node I has two uses, then keep these together in the
+ same PARTITION. */
+ for (n = 0, e = rdg->vertices[i].succ; e; e = e->succ_next, n++);
+
+ if (n > 1)
+ rdg_flag_all_uses (rdg, i, partition, loops, processed, &foo);
+ }
}
/* Returns a bitmap in which all the statements needed for computing
@@ -545,9 +800,8 @@ build_rdg_partition_for_component (struct graph *rdg, rdgc c,
and determine those vertices that have some memory affinity with
the current nodes in the component: these are stores to the same
arrays, i.e. we're taking care of cache locality. */
- if (part_has_writes)
- rdg_flag_similar_stores (rdg, c, partition, loops, processed,
- other_stores);
+ rdg_flag_similar_memory_accesses (rdg, partition, loops, processed,
+ other_stores);
rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
@@ -707,13 +961,12 @@ debug_rdg_partitions (VEC (bitmap, heap) *partitions)
dump_rdg_partitions (stderr, partitions);
}
-/* Split the statements contained in LOOP in several loops following
- the STARTING_VERTICES from RDG. Returns the number of distributed
- loops. */
+/* Generate code from STARTING_VERTICES in RDG. Returns the number of
+ distributed loops. */
static int
-ldist_generate_loops (struct loop *loop, struct graph *rdg,
- VEC (int, heap) *starting_vertices)
+ldist_gen (struct loop *loop, struct graph *rdg,
+ VEC (int, heap) *starting_vertices)
{
int i, nbp;
VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
@@ -762,7 +1015,7 @@ ldist_generate_loops (struct loop *loop, struct graph *rdg,
dump_rdg_partitions (dump_file, partitions);
for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++)
- if (!generate_loops_for_partition (loop, partition, i < nbp - 1))
+ if (!generate_code_for_partition (loop, partition, i < nbp - 1))
goto ldist_done;
rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
@@ -838,7 +1091,7 @@ distribute_loop (struct loop *loop, VEC (tree, heap) *stmts)
}
}
- res = ldist_generate_loops (loop, rdg, vertices);
+ res = ldist_gen (loop, rdg, vertices);
VEC_free (int, heap, vertices);
free_rdg (rdg);