diff options
Diffstat (limited to 'gcc/tree-ssa-pre.c')
-rw-r--r-- | gcc/tree-ssa-pre.c | 127 |
1 files changed, 107 insertions, 20 deletions
diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c index 7773fe96b6f..f58ee94af62 100644 --- a/gcc/tree-ssa-pre.c +++ b/gcc/tree-ssa-pre.c @@ -104,6 +104,10 @@ along with GCC; see the file COPYING3. If not see In order to make it fully redundant, we insert the expression into the predecessors where it is not available, but is ANTIC. + When optimizing for size, we only eliminate the partial redundancy + if we need to insert in only one predecessor. This avoids almost + completely the code size increase that PRE usually causes. + For the partial anticipation case, we only perform insertion if it is partially anticipated in some block, and fully available in all of the predecessors. @@ -426,6 +430,7 @@ static pre_expr bitmap_find_leader (bitmap_set_t, unsigned int, gimple); static void bitmap_value_insert_into_set (bitmap_set_t, pre_expr); static void bitmap_value_replace_in_set (bitmap_set_t, pre_expr); static void bitmap_set_copy (bitmap_set_t, bitmap_set_t); +static void bitmap_set_and (bitmap_set_t, bitmap_set_t); static bool bitmap_set_contains_value (bitmap_set_t, unsigned int); static void bitmap_insert_into_set (bitmap_set_t, pre_expr); static void bitmap_insert_into_set_1 (bitmap_set_t, pre_expr, bool); @@ -2978,13 +2983,6 @@ insert_into_preds_of_block (basic_block block, unsigned int exprnum, tree temp, res; gimple phi; - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Found partial redundancy for expression "); - print_pre_expr (dump_file, expr); - fprintf (dump_file, " (%04d)\n", val); - } - /* Make sure we aren't creating an induction variable. */ if (block->loop_depth > 0 && EDGE_COUNT (block->preds) == 2 && expr->kind != REFERENCE) @@ -3183,6 +3181,47 @@ insert_into_preds_of_block (basic_block block, unsigned int exprnum, } +/* Indicate if, when optimizing for speed, it is appropriate to make + INSERTS_NEEDED insertions in order to make EXPR in BLOCK redundant. */ +static bool +ppre_n_insert_for_speed_p (pre_expr expr, basic_block block, + unsigned int inserts_needed) +{ + /* The more expensive EXPR is, the more we should be prepared to insert + in the predecessors of BLOCK to make EXPR fully redundant. + For now, only recognize AND, OR, XOR, PLUS and MINUS of a multiple-use + SSA_NAME with a constant as cheap. */ + int cost; + + if (flag_tree_pre_partial_partial_obliviously) + return true; + if (expr->kind == NARY) + { + vn_nary_op_t nary = PRE_EXPR_NARY (expr); + switch (nary->opcode) + { + tree name, cnst; + case BIT_AND_EXPR: case BIT_IOR_EXPR: case BIT_XOR_EXPR: + case PLUS_EXPR: case MINUS_EXPR: + + gcc_assert (nary->length == 2); + name = nary->op[0]; + cnst = nary->op[1]; + if (TREE_CODE (name) != SSA_NAME || has_single_use (name)) + return true; + if (!is_gimple_min_invariant (cnst)) + return true; + cost = 1; + break; + default: + return true; + } + } + else + return true; + return EDGE_COUNT (block->preds) * cost >= inserts_needed; + +} /* Perform insertion of partially redundant values. For BLOCK, do the following: @@ -3217,6 +3256,7 @@ do_regular_insertion (basic_block block, basic_block dom) pre_expr *avail; unsigned int val; bool by_some = false; + unsigned int inserts_needed = 0; bool cant_insert = false; bool all_same = true; pre_expr first_s = NULL; @@ -3271,6 +3311,7 @@ do_regular_insertion (basic_block block, basic_block dom) { avail[bprime->index] = eprime; all_same = false; + inserts_needed++; } else { @@ -3280,6 +3321,11 @@ do_regular_insertion (basic_block block, basic_block dom) first_s = edoubleprime; else if (!pre_expr_eq (first_s, edoubleprime)) all_same = false; + /* If the available value is not a NAME, PREing this + value will probably result in a copy on the edge + to assign the expression to a register. */ + if (edoubleprime->kind != NAME) + inserts_needed++; } } /* If we can insert it, it's not the same value @@ -3288,9 +3334,28 @@ do_regular_insertion (basic_block block, basic_block dom) partially redundant. */ if (!cant_insert && !all_same && by_some && dbg_cnt (treepre_insert)) { - if (insert_into_preds_of_block (block, get_expression_id (expr), - avail)) - new_stuff = true; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, + "Found partial redundancy for expression "); + print_pre_expr (dump_file, expr); + fprintf (dump_file, " (%04d)\n", get_expr_value_id (expr)); + } + + /* If optimizing for size, insert at most one + new expression to avoid increasing code size. */ + if (optimize_function_for_speed_p (cfun) + ? (1 || ppre_n_insert_for_speed_p (expr, block, inserts_needed)) + : EDGE_COUNT (block->preds) - inserts_needed == 1) + new_stuff |= + insert_into_preds_of_block (block, + get_expression_id (expr), + avail); + else if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not inserting (optimizing for %s)\n", + optimize_function_for_speed_p (cfun) + ? "speed" : "size"); + } /* If all edges produce the same value and that value is an invariant, then the PHI has the same value on all @@ -3419,9 +3484,28 @@ do_partial_partial_insertion (basic_block block, basic_block dom) if (!cant_insert && by_all && dbg_cnt (treepre_insert)) { pre_stats.pa_insert++; - if (insert_into_preds_of_block (block, get_expression_id (expr), - avail)) - new_stuff = true; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, + "Found partial redundancy for expression "); + print_pre_expr (dump_file, expr); + fprintf (dump_file, " (%04d)\n", get_expr_value_id (expr)); + } + /* Assuming the expression is 50% anticipatable, we have to + multiply the number of insertions needed by two for a cost + comparison. */ + if (!optimize_function_for_speed_p (cfun) + || ppre_n_insert_for_speed_p (expr, block, + 2 * EDGE_COUNT (block->preds))) + new_stuff |= + insert_into_preds_of_block (block, + get_expression_id (expr), + avail); + else if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not inserting (optimizing for %s)\n", + optimize_function_for_speed_p (cfun) + ? "speed" : "size"); + } free (avail); } @@ -3462,7 +3546,9 @@ insert_aux (basic_block block) if (!single_pred_p (block)) { new_stuff |= do_regular_insertion (block, dom); - if (do_partial_partial) + /* Don't bother with partial-partial redundancies when + optimizing for size. */ + if (do_partial_partial && ! optimize_function_for_size_p (cfun)) new_stuff |= do_partial_partial_insertion (block, dom); } } @@ -4213,11 +4299,11 @@ fini_pre (bool do_fre) only wants to do full redundancy elimination. */ static unsigned int -execute_pre (bool do_fre ATTRIBUTE_UNUSED) +execute_pre (bool do_fre) { unsigned int todo = 0; - do_partial_partial = optimize > 2; + do_partial_partial = flag_tree_pre_partial_partial; /* This has to happen before SCCVN runs because loop_optimizer_init may create new phis, etc. */ @@ -4290,19 +4376,20 @@ execute_pre (bool do_fre ATTRIBUTE_UNUSED) return todo; } -/* Gate and execute functions for PRE. */ +/* Gate and execute functions for FRE/PRE. */ static unsigned int do_pre (void) { - return TODO_rebuild_alias | execute_pre (false); + return TODO_rebuild_alias + | execute_pre (! flag_tree_pre); } static bool gate_pre (void) { - /* PRE tends to generate bigger code. */ - return flag_tree_pre != 0 && optimize_function_for_speed_p (cfun); + /* Run FRE even if we don't run PRE. */ + return (flag_tree_fre || flag_tree_pre); } struct gimple_opt_pass pass_pre = |