aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-ssa-pre.c
diff options
context:
space:
mode:
authorJoern Rennecke <joern.rennecke@st.com>2009-03-05 17:40:15 +0000
committerJoern Rennecke <joern.rennecke@st.com>2009-03-05 17:40:15 +0000
commit6000d164b04d1ff76b01aa08a13c6022a5b929eb (patch)
tree337865b53e42eced79d3723e441de1ebd665309f /gcc/tree-ssa-pre.c
parent3eaed088966a6e06d925191184da47647237756d (diff)
Check ARC patches into arc-20081210-branch.arc-20081210-branch
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/arc-20081210-branch@144652 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-ssa-pre.c')
-rw-r--r--gcc/tree-ssa-pre.c127
1 files changed, 107 insertions, 20 deletions
diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c
index 7773fe96b6f..f58ee94af62 100644
--- a/gcc/tree-ssa-pre.c
+++ b/gcc/tree-ssa-pre.c
@@ -104,6 +104,10 @@ along with GCC; see the file COPYING3. If not see
In order to make it fully redundant, we insert the expression into
the predecessors where it is not available, but is ANTIC.
+ When optimizing for size, we only eliminate the partial redundancy
+ if we need to insert in only one predecessor. This avoids almost
+ completely the code size increase that PRE usually causes.
+
For the partial anticipation case, we only perform insertion if it
is partially anticipated in some block, and fully available in all
of the predecessors.
@@ -426,6 +430,7 @@ static pre_expr bitmap_find_leader (bitmap_set_t, unsigned int, gimple);
static void bitmap_value_insert_into_set (bitmap_set_t, pre_expr);
static void bitmap_value_replace_in_set (bitmap_set_t, pre_expr);
static void bitmap_set_copy (bitmap_set_t, bitmap_set_t);
+static void bitmap_set_and (bitmap_set_t, bitmap_set_t);
static bool bitmap_set_contains_value (bitmap_set_t, unsigned int);
static void bitmap_insert_into_set (bitmap_set_t, pre_expr);
static void bitmap_insert_into_set_1 (bitmap_set_t, pre_expr, bool);
@@ -2978,13 +2983,6 @@ insert_into_preds_of_block (basic_block block, unsigned int exprnum,
tree temp, res;
gimple phi;
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "Found partial redundancy for expression ");
- print_pre_expr (dump_file, expr);
- fprintf (dump_file, " (%04d)\n", val);
- }
-
/* Make sure we aren't creating an induction variable. */
if (block->loop_depth > 0 && EDGE_COUNT (block->preds) == 2
&& expr->kind != REFERENCE)
@@ -3183,6 +3181,47 @@ insert_into_preds_of_block (basic_block block, unsigned int exprnum,
}
+/* Indicate if, when optimizing for speed, it is appropriate to make
+ INSERTS_NEEDED insertions in order to make EXPR in BLOCK redundant. */
+static bool
+ppre_n_insert_for_speed_p (pre_expr expr, basic_block block,
+ unsigned int inserts_needed)
+{
+ /* The more expensive EXPR is, the more we should be prepared to insert
+ in the predecessors of BLOCK to make EXPR fully redundant.
+ For now, only recognize AND, OR, XOR, PLUS and MINUS of a multiple-use
+ SSA_NAME with a constant as cheap. */
+ int cost;
+
+ if (flag_tree_pre_partial_partial_obliviously)
+ return true;
+ if (expr->kind == NARY)
+ {
+ vn_nary_op_t nary = PRE_EXPR_NARY (expr);
+ switch (nary->opcode)
+ {
+ tree name, cnst;
+ case BIT_AND_EXPR: case BIT_IOR_EXPR: case BIT_XOR_EXPR:
+ case PLUS_EXPR: case MINUS_EXPR:
+
+ gcc_assert (nary->length == 2);
+ name = nary->op[0];
+ cnst = nary->op[1];
+ if (TREE_CODE (name) != SSA_NAME || has_single_use (name))
+ return true;
+ if (!is_gimple_min_invariant (cnst))
+ return true;
+ cost = 1;
+ break;
+ default:
+ return true;
+ }
+ }
+ else
+ return true;
+ return EDGE_COUNT (block->preds) * cost >= inserts_needed;
+
+}
/* Perform insertion of partially redundant values.
For BLOCK, do the following:
@@ -3217,6 +3256,7 @@ do_regular_insertion (basic_block block, basic_block dom)
pre_expr *avail;
unsigned int val;
bool by_some = false;
+ unsigned int inserts_needed = 0;
bool cant_insert = false;
bool all_same = true;
pre_expr first_s = NULL;
@@ -3271,6 +3311,7 @@ do_regular_insertion (basic_block block, basic_block dom)
{
avail[bprime->index] = eprime;
all_same = false;
+ inserts_needed++;
}
else
{
@@ -3280,6 +3321,11 @@ do_regular_insertion (basic_block block, basic_block dom)
first_s = edoubleprime;
else if (!pre_expr_eq (first_s, edoubleprime))
all_same = false;
+ /* If the available value is not a NAME, PREing this
+ value will probably result in a copy on the edge
+ to assign the expression to a register. */
+ if (edoubleprime->kind != NAME)
+ inserts_needed++;
}
}
/* If we can insert it, it's not the same value
@@ -3288,9 +3334,28 @@ do_regular_insertion (basic_block block, basic_block dom)
partially redundant. */
if (!cant_insert && !all_same && by_some && dbg_cnt (treepre_insert))
{
- if (insert_into_preds_of_block (block, get_expression_id (expr),
- avail))
- new_stuff = true;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "Found partial redundancy for expression ");
+ print_pre_expr (dump_file, expr);
+ fprintf (dump_file, " (%04d)\n", get_expr_value_id (expr));
+ }
+
+ /* If optimizing for size, insert at most one
+ new expression to avoid increasing code size. */
+ if (optimize_function_for_speed_p (cfun)
+ ? (1 || ppre_n_insert_for_speed_p (expr, block, inserts_needed))
+ : EDGE_COUNT (block->preds) - inserts_needed == 1)
+ new_stuff |=
+ insert_into_preds_of_block (block,
+ get_expression_id (expr),
+ avail);
+ else if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not inserting (optimizing for %s)\n",
+ optimize_function_for_speed_p (cfun)
+ ? "speed" : "size");
+
}
/* If all edges produce the same value and that value is
an invariant, then the PHI has the same value on all
@@ -3419,9 +3484,28 @@ do_partial_partial_insertion (basic_block block, basic_block dom)
if (!cant_insert && by_all && dbg_cnt (treepre_insert))
{
pre_stats.pa_insert++;
- if (insert_into_preds_of_block (block, get_expression_id (expr),
- avail))
- new_stuff = true;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "Found partial redundancy for expression ");
+ print_pre_expr (dump_file, expr);
+ fprintf (dump_file, " (%04d)\n", get_expr_value_id (expr));
+ }
+ /* Assuming the expression is 50% anticipatable, we have to
+ multiply the number of insertions needed by two for a cost
+ comparison. */
+ if (!optimize_function_for_speed_p (cfun)
+ || ppre_n_insert_for_speed_p (expr, block,
+ 2 * EDGE_COUNT (block->preds)))
+ new_stuff |=
+ insert_into_preds_of_block (block,
+ get_expression_id (expr),
+ avail);
+ else if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not inserting (optimizing for %s)\n",
+ optimize_function_for_speed_p (cfun)
+ ? "speed" : "size");
+
}
free (avail);
}
@@ -3462,7 +3546,9 @@ insert_aux (basic_block block)
if (!single_pred_p (block))
{
new_stuff |= do_regular_insertion (block, dom);
- if (do_partial_partial)
+ /* Don't bother with partial-partial redundancies when
+ optimizing for size. */
+ if (do_partial_partial && ! optimize_function_for_size_p (cfun))
new_stuff |= do_partial_partial_insertion (block, dom);
}
}
@@ -4213,11 +4299,11 @@ fini_pre (bool do_fre)
only wants to do full redundancy elimination. */
static unsigned int
-execute_pre (bool do_fre ATTRIBUTE_UNUSED)
+execute_pre (bool do_fre)
{
unsigned int todo = 0;
- do_partial_partial = optimize > 2;
+ do_partial_partial = flag_tree_pre_partial_partial;
/* This has to happen before SCCVN runs because
loop_optimizer_init may create new phis, etc. */
@@ -4290,19 +4376,20 @@ execute_pre (bool do_fre ATTRIBUTE_UNUSED)
return todo;
}
-/* Gate and execute functions for PRE. */
+/* Gate and execute functions for FRE/PRE. */
static unsigned int
do_pre (void)
{
- return TODO_rebuild_alias | execute_pre (false);
+ return TODO_rebuild_alias
+ | execute_pre (! flag_tree_pre);
}
static bool
gate_pre (void)
{
- /* PRE tends to generate bigger code. */
- return flag_tree_pre != 0 && optimize_function_for_speed_p (cfun);
+ /* Run FRE even if we don't run PRE. */
+ return (flag_tree_fre || flag_tree_pre);
}
struct gimple_opt_pass pass_pre =