aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFeng Xue <fxue@os.amperecomputing.com>2019-12-02 06:37:30 +0000
committerFeng Xue <fxue@os.amperecomputing.com>2019-12-02 06:37:30 +0000
commite1e48f0146c6ef6d839702cf59961c5a48333947 (patch)
tree7898f1998d7895c9f2958982aae053a808ebc318
parenta5fc5d9e9ee48138d09a357cc3b34c144e8616d4 (diff)
Enable recursive function versioning
2019-12-02 Feng Xue <fxue@os.amperecomputing.com> PR ipa/92133 * doc/invoke.texi (ipa-cp-max-recursive-depth): Document new option. (ipa-cp-min-recursive-probability): Likewise. * params.opt (ipa-cp-max-recursive-depth): New. (ipa-cp-min-recursive-probability): Likewise. * ipa-cp.c (ipcp_lattice<valtype>::add_value): Add two new parameters val_p and unlimited. (self_recursively_generated_p): New function. (get_val_across_arith_op): Likewise. (propagate_vals_across_arith_jfunc): Add constant propagation for self-recursive function. (incorporate_penalties): Do not penalize pure self-recursive function. (good_cloning_opportunity_p): Dump node_is_self_scc flag. (propagate_constants_topo): Set node_is_self_scc flag for cgraph node. (get_info_about_necessary_edges): Relax hotness check for edge to self-recursive function. * ipa-prop.h (ipa_node_params): Add new field node_is_self_scc. 2019-12-02 Feng Xue <fxue@os.amperecomputing.com> PR ipa/92133 * gcc.dg/ipa/ipa-clone-2.c: New test. git-svn-id: https://gcc.gnu.org/svn/gcc/trunk@278893 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog20
-rw-r--r--gcc/doc/invoke.texi7
-rw-r--r--gcc/ipa-cp.c221
-rw-r--r--gcc/ipa-prop.h2
-rw-r--r--gcc/params.opt8
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c47
7 files changed, 283 insertions, 27 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9a807baa38e..0765ee2be12 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2019-12-02 Feng Xue <fxue@os.amperecomputing.com>
+
+ PR ipa/92133
+ * doc/invoke.texi (ipa-cp-max-recursive-depth): Document new option.
+ (ipa-cp-min-recursive-probability): Likewise.
+ * params.opt (ipa-cp-max-recursive-depth): New.
+ (ipa-cp-min-recursive-probability): Likewise.
+ * ipa-cp.c (ipcp_lattice<valtype>::add_value): Add two new parameters
+ val_p and unlimited.
+ (self_recursively_generated_p): New function.
+ (get_val_across_arith_op): Likewise.
+ (propagate_vals_across_arith_jfunc): Add constant propagation for
+ self-recursive function.
+ (incorporate_penalties): Do not penalize pure self-recursive function.
+ (good_cloning_opportunity_p): Dump node_is_self_scc flag.
+ (propagate_constants_topo): Set node_is_self_scc flag for cgraph node.
+ (get_info_about_necessary_edges): Relax hotness check for edge to
+ self-recursive function.
+ * ipa-prop.h (ipa_node_params): Add new field node_is_self_scc.
+
2019-12-01 Sandra Loosemore <sandra@codesourcery.com>
PR target/92499
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 403be47d893..d165f31a865 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12035,6 +12035,13 @@ IPA-CP calculates its own score of cloning profitability heuristics
and performs those cloning opportunities with scores that exceed
@option{ipa-cp-eval-threshold}.
+@item ipa-cp-max-recursive-depth
+Maximum depth of recursive cloning for self-recursive function.
+
+@item ipa-cp-min-recursive-probability
+Recursive cloning only when the probability of call being executed exceeds
+the parameter.
+
@item ipa-cp-recursion-penalty
Percentage penalty the recursive functions will receive when they
are evaluated for cloning.
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index d0c6e91edd4..693c7a2fdc5 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -228,7 +228,9 @@ public:
inline bool set_contains_variable ();
bool add_value (valtype newval, cgraph_edge *cs,
ipcp_value<valtype> *src_val = NULL,
- int src_idx = 0, HOST_WIDE_INT offset = -1);
+ int src_idx = 0, HOST_WIDE_INT offset = -1,
+ ipcp_value<valtype> **val_p = NULL,
+ bool unlimited = false);
void print (FILE * f, bool dump_sources, bool dump_benefits);
};
@@ -1817,22 +1819,32 @@ allocate_and_init_ipcp_value (ipa_polymorphic_call_context source)
/* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
meaning. OFFSET -1 means the source is scalar and not a part of an
- aggregate. */
+ aggregate. If non-NULL, VAL_P records address of existing or newly added
+ ipcp_value. UNLIMITED means whether value count should not exceed the limit
+ given by PARAM_IPA_CP_VALUE_LIST_SIZE. */
template <typename valtype>
bool
ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
ipcp_value<valtype> *src_val,
- int src_idx, HOST_WIDE_INT offset)
+ int src_idx, HOST_WIDE_INT offset,
+ ipcp_value<valtype> **val_p,
+ bool unlimited)
{
- ipcp_value<valtype> *val;
+ ipcp_value<valtype> *val, *last_val = NULL;
+
+ if (val_p)
+ *val_p = NULL;
if (bottom)
return false;
- for (val = values; val; val = val->next)
+ for (val = values; val; last_val = val, val = val->next)
if (values_equal_for_ipcp_p (val->value, newval))
{
+ if (val_p)
+ *val_p = val;
+
if (ipa_edge_within_scc (cs))
{
ipcp_value_source<valtype> *s;
@@ -1847,7 +1859,7 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
return false;
}
- if (values_count == param_ipa_cp_value_list_size)
+ if (!unlimited && values_count == param_ipa_cp_value_list_size)
{
/* We can only free sources, not the values themselves, because sources
of other values in this SCC might point to them. */
@@ -1860,7 +1872,6 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
}
}
-
values = NULL;
return set_to_bottom ();
}
@@ -1868,11 +1879,81 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
values_count++;
val = allocate_and_init_ipcp_value (newval);
val->add_source (cs, src_val, src_idx, offset);
- val->next = values;
- values = val;
+ val->next = NULL;
+
+ /* Add the new value to end of value list, which can reduce iterations
+ of propagation stage for recursive function. */
+ if (last_val)
+ last_val->next = val;
+ else
+ values = val;
+
+ if (val_p)
+ *val_p = val;
+
+ return true;
+}
+
+/* Return true, if a ipcp_value VAL is orginated from parameter value of
+ self-feeding recursive function by applying non-passthrough arithmetic
+ transformation. */
+
+static bool
+self_recursively_generated_p (ipcp_value<tree> *val)
+{
+ class ipa_node_params *info = NULL;
+
+ for (ipcp_value_source<tree> *src = val->sources; src; src = src->next)
+ {
+ cgraph_edge *cs = src->cs;
+
+ if (!src->val || cs->caller != cs->callee->function_symbol ()
+ || src->val == val)
+ return false;
+
+ if (!info)
+ info = IPA_NODE_REF (cs->caller);
+
+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (info,
+ src->index);
+ ipcp_lattice<tree> *src_lat = src->offset == -1 ? &plats->itself
+ : plats->aggs;
+ ipcp_value<tree> *src_val;
+
+ for (src_val = src_lat->values; src_val; src_val = src_val->next)
+ if (src_val == val)
+ break;
+
+ if (!src_val)
+ return false;
+ }
+
return true;
}
+/* A helper function that returns result of operation specified by OPCODE on
+ the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
+ value of SRC_VAL. If the operation is binary, OPND2 is a constant value
+ acting as its second operand. If non-NULL, RES_TYPE is expected type of
+ the result. */
+
+static tree
+get_val_across_arith_op (enum tree_code opcode,
+ tree opnd1_type,
+ tree opnd2,
+ ipcp_value<tree> *src_val,
+ tree res_type)
+{
+ tree opnd1 = src_val->value;
+
+ /* Skip source values that is incompatible with specified type. */
+ if (opnd1_type
+ && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
+ return NULL_TREE;
+
+ return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
+}
+
/* Propagate values through an arithmetic transformation described by a jump
function associated with edge CS, taking values from SRC_LAT and putting
them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
@@ -1896,24 +1977,76 @@ propagate_vals_across_arith_jfunc (cgraph_edge *cs,
ipcp_value<tree> *src_val;
bool ret = false;
- /* Do not create new values when propagating within an SCC because if there
- are arithmetic functions with circular dependencies, there is infinite
- number of them and we would just make lattices bottom. If this condition
- is ever relaxed we have to detect self-feeding recursive calls in
- cgraph_edge_brings_value_p in a smarter way. */
+ /* Due to circular dependencies, propagating within an SCC through arithmetic
+ transformation would create infinite number of values. But for
+ self-feeding recursive function, we could allow propagation in a limited
+ count, and this can enable a simple kind of recursive function versioning.
+ For other scenario, we would just make lattices bottom. */
if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
- ret = dest_lat->set_contains_variable ();
+ {
+ int i;
+
+ if (src_lat != dest_lat || param_ipa_cp_max_recursive_depth < 1)
+ return dest_lat->set_contains_variable ();
+
+ /* No benefit if recursive execution is in low probability. */
+ if (cs->sreal_frequency () * 100
+ <= ((sreal) 1) * param_ipa_cp_min_recursive_probability)
+ return dest_lat->set_contains_variable ();
+
+ auto_vec<ipcp_value<tree> *, 8> val_seeds;
+
+ for (src_val = src_lat->values; src_val; src_val = src_val->next)
+ {
+ /* Now we do not use self-recursively generated value as propagation
+ source, this is absolutely conservative, but could avoid explosion
+ of lattice's value space, especially when one recursive function
+ calls another recursive. */
+ if (self_recursively_generated_p (src_val))
+ {
+ ipcp_value_source<tree> *s;
+
+ /* If the lattice has already been propagated for the call site,
+ no need to do that again. */
+ for (s = src_val->sources; s; s = s->next)
+ if (s->cs == cs)
+ return dest_lat->set_contains_variable ();
+ }
+ else
+ val_seeds.safe_push (src_val);
+ }
+
+ /* Recursively generate lattice values with a limited count. */
+ FOR_EACH_VEC_ELT (val_seeds, i, src_val)
+ {
+ for (int j = 1; j < param_ipa_cp_max_recursive_depth; j++)
+ {
+ tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
+ src_val, res_type);
+ if (!cstval)
+ break;
+
+ ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
+ src_offset, &src_val, true);
+ gcc_checking_assert (src_val);
+ }
+ }
+ ret |= dest_lat->set_contains_variable ();
+ }
else
for (src_val = src_lat->values; src_val; src_val = src_val->next)
{
- tree opnd1 = src_val->value;
- tree cstval = NULL_TREE;
-
- /* Skip source values that is incompatible with specified type. */
- if (!opnd1_type
- || useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
- cstval = ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
+ /* Now we do not use self-recursively generated value as propagation
+ source, otherwise it is easy to make value space of normal lattice
+ overflow. */
+ if (self_recursively_generated_p (src_val))
+ {
+ ret |= dest_lat->set_contains_variable ();
+ continue;
+ }
+ tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
+ src_val, res_type);
if (cstval)
ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
src_offset);
@@ -3038,7 +3171,7 @@ hint_time_bonus (ipa_hints hints)
static inline int64_t
incorporate_penalties (ipa_node_params *info, int64_t evaluation)
{
- if (info->node_within_scc)
+ if (info->node_within_scc && !info->node_is_self_scc)
evaluation = (evaluation
* (100 - param_ipa_cp_recursion_penalty)) / 100;
@@ -3082,7 +3215,8 @@ good_cloning_opportunity_p (struct cgraph_node *node, int time_benefit,
count_sum.dump (dump_file);
fprintf (dump_file, "%s%s) -> evaluation: " "%" PRId64
", threshold: %i\n",
- info->node_within_scc ? ", scc" : "",
+ info->node_within_scc
+ ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
info->node_calling_single_call ? ", single_call" : "",
evaluation, param_ipa_cp_eval_threshold);
}
@@ -3100,7 +3234,8 @@ good_cloning_opportunity_p (struct cgraph_node *node, int time_benefit,
"size: %i, freq_sum: %i%s%s) -> evaluation: "
"%" PRId64 ", threshold: %i\n",
time_benefit, size_cost, freq_sum,
- info->node_within_scc ? ", scc" : "",
+ info->node_within_scc
+ ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
info->node_calling_single_call ? ", single_call" : "",
evaluation, param_ipa_cp_eval_threshold);
@@ -3594,14 +3729,30 @@ propagate_constants_topo (class ipa_topo_info *topo)
while (v)
{
struct cgraph_edge *cs;
+ class ipa_node_params *info = NULL;
+ bool self_scc = true;
for (cs = v->callees; cs; cs = cs->next_callee)
if (ipa_edge_within_scc (cs))
{
- IPA_NODE_REF (v)->node_within_scc = true;
+ cgraph_node *callee = cs->callee->function_symbol ();
+
+ if (v != callee)
+ self_scc = false;
+
+ if (!info)
+ {
+ info = IPA_NODE_REF (v);
+ info->node_within_scc = true;
+ }
+
if (propagate_constants_across_call (cs))
- push_node_to_stack (topo, cs->callee->function_symbol ());
+ push_node_to_stack (topo, callee);
}
+
+ if (info)
+ info->node_is_self_scc = self_scc;
+
v = pop_node_from_stack (topo);
}
@@ -3983,6 +4134,9 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
hot |= cs->maybe_hot_p ();
if (cs->caller != dest)
non_self_recursive = true;
+ else if (src->val)
+ gcc_assert (values_equal_for_ipcp_p (src->val->value,
+ val->value));
}
cs = get_next_cgraph_edge_clone (cs);
}
@@ -3996,6 +4150,19 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
*freq_sum = freq;
*count_sum = cnt;
*caller_count = count;
+
+ if (!hot && IPA_NODE_REF (dest)->node_within_scc)
+ {
+ struct cgraph_edge *cs;
+
+ /* Cold non-SCC source edge could trigger hot recursive execution of
+ function. Consider the case as hot and rely on following cost model
+ computation to further select right one. */
+ for (cs = dest->callers; cs; cs = cs->next_caller)
+ if (cs->caller == dest && cs->maybe_hot_p ())
+ return true;
+ }
+
return hot;
}
diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h
index e9d6a5e8305..1958e1ee9cc 100644
--- a/gcc/ipa-prop.h
+++ b/gcc/ipa-prop.h
@@ -497,6 +497,8 @@ public:
unsigned node_dead : 1;
/* Node is involved in a recursion, potentionally indirect. */
unsigned node_within_scc : 1;
+ /* Node contains only direct recursion. */
+ unsigned node_is_self_scc : 1;
/* Node is calling a private function called only once. */
unsigned node_calling_single_call : 1;
/* False when there is something makes versioning impossible. */
diff --git a/gcc/params.opt b/gcc/params.opt
index 586b539ec5f..d88ae0c468b 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -198,6 +198,14 @@ Threshold ipa-cp opportunity evaluation that is still considered beneficial to c
Common Joined UInteger Var(param_ipa_cp_loop_hint_bonus) Init(64) Param
Compile-time bonus IPA-CP assigns to candidates which make loop bounds or strides known.
+-param=ipa-cp-max-recursive-depth=
+Common Joined UInteger Var(param_ipa_cp_max_recursive_depth) Init(8) Param
+Maximum depth of recursive cloning for self-recursive function.
+
+-param=ipa-cp-min-recursive-probability=
+Common Joined UInteger Var(param_ipa_cp_min_recursive_probability) Init(2) Param
+Recursive cloning only when the probability of call being executed exceeds the parameter.
+
-param=ipa-cp-recursion-penalty=
Common Joined UInteger Var(param_ipa_cp_recursion_penalty) Init(40) IntegerRange(0, 100) Param
Percentage penalty the recursive functions will receive when they are evaluated for cloning.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index edf8eadae03..bb36544cfe4 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2019-12-02 Feng Xue <fxue@os.amperecomputing.com>
+
+ PR ipa/92133
+ * gcc.dg/ipa/ipa-clone-2.c: New test.
+
2019-12-01 Sandra Loosemore <sandra@codesourcery.com>
PR target/92499
diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c
new file mode 100644
index 00000000000..d513020ee8b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-ipa-cp-details -fno-early-inlining --param ipa-cp-max-recursive-depth=8" } */
+
+int fn();
+
+int data[100];
+
+int recur_fn (int i)
+{
+ int j;
+
+ if (i == 6)
+ {
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ return 10;
+ }
+
+ data[i] = i;
+
+ for (j = 0; j < 100; j++)
+ recur_fn (i + 1);
+
+ return i;
+}
+
+int main ()
+{
+ int i;
+
+ for (i = 0; i < 100; i++)
+ recur_fn (1) + recur_fn (-5);
+
+ return 1;
+}
+
+/* { dg-final { scan-ipa-dump-times "Creating a specialized node of recur_fn/\[0-9\]*\\." 12 "cp" } } */