aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkelvin <kelvin@138bc75d-0d04-0410-961f-82ee72b054a4>2015-10-23 19:54:53 +0000
committerkelvin <kelvin@138bc75d-0d04-0410-961f-82ee72b054a4>2015-10-23 19:54:53 +0000
commit3e068742a36a4c1e426abb1bf51531dbdc5c279a (patch)
tree391d3b681ce0c1cc28a08c64006cb1fe067654eb
parent418b6d85e22af51e3291e0d81d4e3e1c340a6b57 (diff)
snapshot of code that works for first test case
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/ibm/kelvin-1@229269 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/cfgloopmanip.c936
-rw-r--r--gcc/kelvin-debugs.c801
-rw-r--r--gcc/loop-unroll.c311
3 files changed, 2036 insertions, 12 deletions
diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
index 1f9a2b38fe7..2c9c8d15e1e 100644
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@@ -34,6 +34,12 @@ along with GCC; see the file COPYING3. If not see
#include "tree-ssa-loop-manip.h"
#include "dumpfile.h"
+#define KELVIN_PATCH
+#define KELVIN_NOISE
+#ifdef KELVIN_NOISE
+#include "kelvin-debugs.c"
+#endif
+
static void copy_loops_to (struct loop **, int,
struct loop *);
static void loop_redirect_edge (edge, basic_block);
@@ -44,6 +50,532 @@ static void fix_loop_placements (struct loop *, bool *);
static bool fix_bb_placement (basic_block);
static void fix_bb_placements (basic_block, bool *, bitmap);
+#ifdef KELVIN_PATCH
+
+/**
+ * Issue a fatal error message and abort program execution.
+ */
+static void internal(const char *msg)
+{
+ fprintf(stderr, "Fatal internal error: %s\n", msg);
+ exit(-1);
+}
+
+/*
+ * Return true iff block is considered to reside within the loop
+ * represented by loop_ptr
+ */
+static bool
+in_loop_p(basic_block block, loop_p loop_ptr)
+{
+ basic_block *bbs = get_loop_body (loop_ptr);
+ bool result = false;
+
+ for (unsigned int i = 0; i < loop_ptr->num_nodes; i++)
+ {
+ if (bbs[i] == block)
+ result = true;
+ }
+ free (bbs);
+ return result;
+}
+
+
+/*
+ * Zero all frequencies associated with this loop.
+ */
+static void
+zero_loop_frequencies(loop_p loop_ptr)
+{
+ basic_block *bbs = get_loop_body (loop_ptr);
+ for (unsigned i = 0; i < loop_ptr->num_nodes; ++i)
+ {
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "zero_loop_frequencies: ");
+ kdn_dump_block_id(stderr, bbs[i]);
+#endif
+ bbs[i]->frequency = 0;
+ }
+ free (bbs);
+}
+
+typedef struct block_ladder_rung {
+ basic_block block;
+ struct block_ladder_rung *lower_rung;
+} *ladder_rung_p;
+
+
+/* I'm not sure the same logical edge will always be represented
+ * by the same edge data object. So we will test equality by
+ * comparing source and dest values.
+ */
+static bool same_edge(edge an_edge, edge another_edge)
+{
+ return ((an_edge->src == another_edge->src) &&
+ (an_edge->dest == another_edge->dest));
+}
+
+/* Return true iff an_edge matches one of the nodes that is already
+ * present within set_of_edges.
+ */
+static bool in_edge_set(edge an_edge, vec<edge> set_of_edges)
+{
+ unsigned int j;
+ edge e;
+
+ FOR_EACH_VEC_ELT(set_of_edges, j, e)
+ {
+ if (same_edge(e, an_edge))
+ return true;
+ }
+ return false;
+}
+
+/* return true iff an_edge->dest is already represented within
+ * the ladder_rung.
+ */
+static bool in_call_chain(edge an_edge, ladder_rung_p ladder_rung)
+{
+ if (ladder_rung == NULL)
+ return false;
+ else if (an_edge->dest == ladder_rung->block)
+ return true;
+ else
+ return in_call_chain(an_edge, ladder_rung->lower_rung);
+}
+
+
+static void
+recursively_zero_frequency(loop_p loop_ptr, vec<edge> exit_edges,
+ ladder_rung_p ladder_rung,
+ edge incoming_edge)
+{
+#ifdef KELVIN_NOISE
+ basic_block a_block = incoming_edge->dest;
+ fprintf(stderr, "recursively zero loop frequency for block %d, from %d\n",
+ a_block->index, a_block->frequency);
+#endif
+
+ if (incoming_edge->dest == loop_ptr->header)
+ return;
+ else if (in_edge_set(incoming_edge, exit_edges))
+ return;
+ else if (in_call_chain(incoming_edge, ladder_rung))
+ return;
+ else {
+ struct block_ladder_rung a_rung;
+ basic_block block = incoming_edge->dest;
+
+#ifdef KELVIN_NOISE
+ fprintf(stderr, " ---> actually doing the zentrification\n");
+#endif
+ a_rung.block = block;
+ a_rung.lower_rung = ladder_rung;
+ block->frequency = 0;
+ for (unsigned int i = 0; i < EDGE_COUNT(block->succs); i++)
+ {
+ edge successor = EDGE_SUCC(block, i);
+ recursively_zero_frequency(loop_ptr, exit_edges,
+ &a_rung, successor);
+ }
+ }
+}
+
+/* Return true iff candidate is contained within the loop represented
+ * by loop_header and loop_latch.
+ *
+ * We consider the block to be within the loop if there exists a path
+ * within the control flow graph from this node to the loop's latch
+ * which does not pass through the loop's header. (If all paths to
+ * the latch pass through the loop header, then the node is contained
+ * within an outer-nested loop but not within this loop.)
+ *
+ * Note that if a candidate's success is the loop, then the candidate
+ * itself is also in the loop. If none of the successors are in the loop
+ */
+static bool _in_loop(basic_block candidate,
+ basic_block loop_header,
+ basic_block loop_latch,
+ bool start_of_recursion)
+{
+ if (candidate == loop_latch) {
+ return true;
+ } else if (candidate == loop_header) {
+ return start_of_recursion;
+ } else {
+ for (unsigned int i = 0; i < EDGE_COUNT(candidate->succs); i++) {
+ basic_block successor = EDGE_SUCC(candidate, i)->dest;
+ if (_in_loop(successor, loop_header, loop_latch, false))
+ return true;
+ }
+ return false; /* None of the successors was in loop */
+ }
+}
+
+static bool _in_loop_set(basic_block candidate, vec<basic_block> loop_set)
+{
+ unsigned int j;
+ basic_block b;
+
+ FOR_EACH_VEC_ELT(loop_set, j, b) {
+ if (b == candidate) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Add candidate into the results array at position count if candidate
+ * is in the loop and it is not already contained within the results
+ * array.
+ *
+ * We consider the block to be within the loop if there exists a path
+ * within the control flow graph from this node to the loop's latch
+ * which does not pass through the loop's header. (If all paths to
+ * the latch pass through the loop header, then the node is contained
+ * within an outer-nested loop but not within this loop.)
+ *
+ * If and only if candidate is added to the results array, recursively
+ * do the same for each successor of candidate starting at the following
+ * position (count + 1) within the array.
+ *
+ * Abort with an internal error message if the number of blocks to be
+ * added into the results array exceeds the array's size.
+ *
+ * return the number of nodes stored within the results array
+ */
+static vec<basic_block> _recursively_get_loop_blocks(basic_block candidate,
+ vec<basic_block> results,
+ basic_block loop_header,
+ basic_block loop_latch)
+{
+ basic_block bb;
+ unsigned int u;
+
+ /* if candidate is already in the results array, then we're done */
+ FOR_EACH_VEC_ELT(results, u, bb) {
+ if (bb == candidate)
+ return results;
+ }
+
+ if (_in_loop(candidate, loop_header, loop_latch, true)) {
+ results.safe_push(candidate);
+ for (unsigned int u = 0; u < EDGE_COUNT(candidate->succs); u++) {
+ edge successor = EDGE_SUCC(candidate, u);
+ if (successor->probability != 0) {
+ results = _recursively_get_loop_blocks(successor->dest, results,
+ loop_header, loop_latch);
+ }
+ }
+ }
+
+ return results;
+}
+
+
+static vec<basic_block> _get_loop_blocks(loop_p loop_ptr)
+{
+ vec<basic_block> results;
+
+ results = vNULL;
+ results = _recursively_get_loop_blocks(loop_ptr->header, results,
+ loop_ptr->header, loop_ptr->latch);
+ return results;
+}
+
+static bool _in_block_set(basic_block block, vec<basic_block> block_set)
+{
+ basic_block bb;
+ unsigned int u;
+ FOR_EACH_VEC_ELT(block_set, u, bb) {
+ if (bb == block)
+ return true;
+ }
+ return false;
+}
+
+static vec<edge> _get_loop_exit_edges(vec<basic_block> loop_blocks) {
+ basic_block bb;
+ unsigned int u;
+ edge e;
+ vec<edge> results = vNULL;
+
+ FOR_EACH_VEC_ELT(loop_blocks, u, bb) {
+ for (unsigned int i = 0; i < EDGE_COUNT(bb->succs); i++) {
+ edge successor = EDGE_SUCC(bb, i);
+ basic_block edge_dest = successor->dest;
+
+ if (!_in_block_set(edge_dest, loop_blocks)) {
+ results.safe_push(successor);
+ }
+ }
+ }
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "_get_loop_exit_edges() is returning this set of edges\n");
+ FOR_EACH_VEC_ELT(results, u, e) {
+ kdn_dump_edge(stderr, e, true, true);
+ }
+ fprintf(stderr, " end of edge set\n");
+#endif
+ return results;
+}
+
+/*
+ * Zero all frequencies for all blocks contained within the loop
+ * represented by loop_ptr which are reachable from block without
+ * passing through the block header. If block is not within the loop,
+ * this has no effect. The behavior is as outlined by the following
+ * algorithm:
+ *
+ * If block is contained within loop:
+ * Set block's frequency to zero
+ * Using a depth-first traversal, do the same for each successor
+ * transitively, stopping the recursive traversal if:
+ * the current block is the loop header, or
+ * the current block resides outside the loop, or
+ * the current block has already been visited in this depth-first
+ * traversal.
+ */
+static void
+zero_partial_loop_frequencies(loop_p loop_ptr, basic_block block)
+{
+ /* When zero_partial_loop_frequencies is invoked, the *loop_ptr
+ * object is not entirely coherent, so existing library services
+ * get_loop_blocks() and get_loop_exit_edges() cannot be called
+ * from this context. Instead, we use the _get_loop_blocks()
+ * and _get_loop_exit_edges() functions which assume only
+ * the validity of loop_ptr->loop_header, loop_ptr->loop_latch,
+ * and valid successor and predecessor information for each
+ * block contained within the loop.
+ */
+ vec<basic_block> loop_blocks = _get_loop_blocks(loop_ptr);
+ if (_in_block_set(block, loop_blocks)) {
+ struct block_ladder_rung ladder_rung;
+ ladder_rung.block = block;
+ ladder_rung.lower_rung = NULL;
+ basic_block header;
+
+ vec<edge> exit_edges = _get_loop_exit_edges(loop_blocks);
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ "zeroing loop frequency for block %d, from %d\n",
+ block->index, block->frequency);
+#endif
+ block->frequency = 0;
+ for (unsigned int i = 0; i < EDGE_COUNT(block->succs); i++) {
+ edge successor = EDGE_SUCC(block, i);
+ if (successor->probability != 0) {
+ recursively_zero_frequency(loop_ptr, exit_edges,
+ &ladder_rung, successor);
+ }
+ }
+ exit_edges.release();
+ }
+ loop_blocks.release();
+}
+
+static void
+recursively_increment_frequency(loop_p loop_ptr, vec<edge> exit_edges,
+ ladder_rung_p ladder_rung,
+ edge incoming_edge,
+ int frequency_increment)
+{
+#ifdef KELVIN_NOISE
+ basic_block a_block = incoming_edge->dest;
+ fprintf(stderr, "recursively incrementing loop frequency for block %d, from %d by %d\n",
+ a_block->index, a_block->frequency, frequency_increment);
+#endif
+
+ if (incoming_edge->dest == loop_ptr->header)
+ return;
+ else if (in_edge_set(incoming_edge, exit_edges))
+ return;
+ else if (in_call_chain(incoming_edge, ladder_rung))
+ return;
+ /* right here, I think I need a short-circuit evaluator to return if
+ a_block is not inside the loop
+
+ so why didn't my test for exit_edges membership work?
+
+ in the trace, in_block_set(7) fails, but then I acdtually do an
+ increment for block 7.
+
+ */
+ else {
+ struct block_ladder_rung a_rung;
+ basic_block block = incoming_edge->dest;
+
+#ifdef KELVIN_NOISE
+ fprintf(stderr, " ---> actually doing the increment\n");
+#endif
+ a_rung.block = block;
+ a_rung.lower_rung = ladder_rung;
+ block->frequency += frequency_increment;
+ for (unsigned int i = 0; i < EDGE_COUNT(block->succs); i++) {
+ edge successor = EDGE_SUCC(block, i);
+ int successor_increment =
+ (frequency_increment * successor->probability) / REG_BR_PROB_BASE;
+
+ recursively_increment_frequency(loop_ptr, exit_edges,
+ &a_rung, successor,
+ successor_increment);
+ }
+ }
+}
+
+/*
+ * If block is contained within loop, we do the following:
+ * Add incremental_frequency (which may be negative) to
+ * block->frequency and propogate this change to all successors of
+ * block that reside within the loop, transitively. Use a depth-first
+ * tree traversal, stopping the recursion at the loop header, at any
+ * successor block that resides outside the loop, and at any block
+ * that is already part of the current depth-first traversal.
+ */
+static void increment_loop_frequencies(loop_p loop_ptr,
+ basic_block block,
+ int frequency_increment)
+{
+
+ vec<basic_block> loop_blocks = _get_loop_blocks(loop_ptr);
+
+#ifdef KELVIN_NOISE
+ {
+ unsigned int j;
+ basic_block b;
+ fprintf(stderr, "increment_loop_frequencies block %d with increment: %d\n",
+ block->index, frequency_increment);
+ fprintf(stderr, " the loop blocks consist of:\n");
+ FOR_EACH_VEC_ELT(loop_blocks, j, b) {
+ fprintf(stderr, " block %d\n", b->index);
+ }
+ }
+#endif
+ if (_in_loop_set(block, loop_blocks)) {
+ struct block_ladder_rung ladder_rung;
+ ladder_rung.block = block;
+ ladder_rung.lower_rung = NULL;
+ basic_block header;
+
+ vec<edge> exit_edges = _get_loop_exit_edges(loop_blocks);
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ "incrementing loop frequency for block %d, from %d by %d\n",
+ block->index, block->frequency, frequency_increment);
+#endif
+ block->frequency += frequency_increment;
+ for (unsigned int i = 0; i < EDGE_COUNT(block->succs); i++) {
+ edge successor = EDGE_SUCC(block, i);
+ if (successor->probability != 0) {
+ int successor_increment =
+ ((frequency_increment * successor->probability) /
+ REG_BR_PROB_BASE);
+
+ recursively_increment_frequency(loop_ptr, exit_edges,
+ &ladder_rung, successor,
+ successor_increment);
+ }
+ }
+ exit_edges.release();
+ }
+#ifdef KELVIN_NOISE
+ else {
+ fprintf(stderr, " block %d was not in the loop set!\n", block->index);
+ }
+#endif
+ loop_blocks.release();
+}
+
+/*
+ * check_loop_frequency_integrity enforces that:
+ *
+ * a. The sum of outgoing edge frequencies for the loop equals the
+ * sum of incoming edge frequencies for the loop header block.
+ *
+ * b. The sum of predecessor edge frequencies for every block
+ * in the loop equals the frequency of that block.
+ */
+static void check_loop_frequency_integrity(loop_p loop_ptr)
+{
+ unsigned int i, j, k;
+ basic_block a_block;
+
+ vec<basic_block> loop_body = _get_loop_blocks(loop_ptr);
+ basic_block header;
+
+ FOR_EACH_VEC_ELT(loop_body, k, a_block) {
+ int delta;
+ int predecessor_frequencies = 0;
+
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ "Enforcing pred frequency coherence for "
+ "block %d, with freq: %d\n",
+ a_block->index, a_block->frequency);
+#endif
+ for (j = 0; j < EDGE_COUNT(a_block->preds); j++) {
+ edge a_predecessor = EDGE_PRED(a_block, j);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, " predecessor ");
+ kdn_dump_edge(stderr, a_predecessor, true, true);
+ fprintf(stderr,
+ " which has frequency %d\n", EDGE_FREQUENCY(a_predecessor));
+#endif
+ predecessor_frequencies += EDGE_FREQUENCY(a_predecessor);
+ }
+ delta = predecessor_frequencies - a_block->frequency;
+ if (delta < 0)
+ delta = -delta;
+
+ if (delta > 10) {
+ internal("predecessor frequencies confused while unrolling loop");
+ }
+ }
+
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "Comparing loop incoming and outgoing frequencies\n");
+#endif
+ header = loop_ptr->header;
+ int incoming_frequency = 0;
+ for (i = 0; i < EDGE_COUNT(header->preds); i++) {
+ edge a_predecessor = EDGE_PRED(header, i);
+
+ if (!_in_loop_set (a_predecessor->src, loop_body)) {
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "Enforcing coherence, loop predecessor: ");
+ kdn_dump_edge(stderr, a_predecessor, true, true);
+ fprintf(stderr, " which has frequency: %d\n",
+ EDGE_FREQUENCY(a_predecessor));
+#endif
+ incoming_frequency += EDGE_FREQUENCY(a_predecessor);
+ }
+ }
+
+ int outgoing_frequency = 0;
+ vec<edge> exit_edges = _get_loop_exit_edges(loop_body);
+ edge edge;
+ FOR_EACH_VEC_ELT (exit_edges, i, edge) {
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "Enforcing coherence, loop exit: ");
+ kdn_dump_edge(stderr, edge, true, true);
+ fprintf(stderr, " which has frequency: %d\n", EDGE_FREQUENCY(edge));
+#endif
+ outgoing_frequency += EDGE_FREQUENCY(edge);
+ }
+
+ int delta = incoming_frequency - outgoing_frequency;
+ if (delta < 0)
+ delta = -delta;
+
+ if (delta > 10) {
+ internal("Incoherent frequencies while unrolling loop");
+ }
+ loop_body.release();
+ exit_edges.release();
+}
+
+#endif
+
/* Checks whether basic block BB is dominated by DATA. */
static bool
rpe_enum_p (const_basic_block bb, const void *data)
@@ -1101,7 +1633,11 @@ can_duplicate_loop_p (const struct loop *loop)
is redistributed evenly to the remaining edges coming from E->src. */
static void
+#ifdef KELVIN_PATCH
+set_zero_probability (loop_p loop_ptr, edge e)
+#else
set_zero_probability (edge e)
+#endif
{
basic_block bb = e->src;
edge_iterator ei;
@@ -1109,6 +1645,12 @@ set_zero_probability (edge e)
unsigned n = EDGE_COUNT (bb->succs);
gcov_type cnt = e->count, cnt1;
unsigned prob = e->probability, prob1;
+#ifdef KELVIN_PATCH
+ int original_edge_frequency;
+ int new_edge_frequency;
+ int change_in_edge_frequency;
+ bool edge_originates_in_loop = in_loop_p (bb, loop_ptr);
+#endif
gcc_assert (n > 1);
cnt1 = cnt / (n - 1);
@@ -1118,18 +1660,78 @@ set_zero_probability (edge e)
{
if (ae == e)
continue;
-
+
+#ifdef KELVIN_PATCH
+ if (edge_originates_in_loop)
+ {
+ original_edge_frequency = EDGE_FREQUENCY(ae);
+ ae->probability += prob1;
+ ae->count += cnt1;
+ new_edge_frequency = EDGE_FREQUENCY(ae);
+ change_in_edge_frequency =
+ new_edge_frequency - original_edge_frequency;
+
+ increment_loop_frequencies(loop_ptr, ae->dest,
+ change_in_edge_frequency);
+ }
+ else
+ {
+ ae->probability += prob1;
+ ae->count += cnt1;
+ }
+#else
ae->probability += prob1;
ae->count += cnt1;
+#endif
last = ae;
}
-
+
/* Move the rest to one of the edges. */
+#ifdef KELVIN_PATCH
+ if (edge_originates_in_loop)
+ {
+ original_edge_frequency = EDGE_FREQUENCY(last);
+ last->probability += prob % (n - 1);
+ last->count += cnt % (n - 1);
+ new_edge_frequency = EDGE_FREQUENCY(last);
+ change_in_edge_frequency = new_edge_frequency - original_edge_frequency;
+ if (change_in_edge_frequency != 0)
+ {
+ increment_loop_frequencies(loop_ptr, last->dest,
+ change_in_edge_frequency);
+ }
+ }
+ else
+ {
+ last->probability += prob % (n - 1);
+ last->count += cnt % (n - 1);
+ }
+#else
last->probability += prob % (n - 1);
last->count += cnt % (n - 1);
+#endif
+#ifdef KELVIN_PATCH
+ if (edge_originates_in_loop)
+ {
+ original_edge_frequency = EDGE_FREQUENCY(e);
+ e->probability = 0;
+ e->count = 0;
+ new_edge_frequency = EDGE_FREQUENCY(e);
+ change_in_edge_frequency =
+ new_edge_frequency - original_edge_frequency;
+ increment_loop_frequencies(loop_ptr, e->dest,
+ change_in_edge_frequency);
+ }
+ else
+ {
+ e->probability = 0;
+ e->count = 0;
+ }
+#else
e->probability = 0;
e->count = 0;
+#endif
}
/* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
@@ -1185,6 +1787,14 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
gcc_assert (bbs[0] == loop->header);
gcc_assert (bbs[n - 1] == loop->latch);
+#ifdef KELVIN_NOISE
+ /* We copy everything in the loop up to (and including?) the loop header.
+ */
+ fprintf(stderr,
+ "duplicate_loop_to_header_edge(), iterations: %d, the loop is:\n",
+ ndupl);
+ kdn_dump_loop(stderr, loop);
+#endif
/* Check whether duplication is possible. */
if (!can_copy_bbs_p (bbs, loop->num_nodes))
{
@@ -1192,6 +1802,17 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
return false;
}
new_bbs = XNEWVEC (basic_block, loop->num_nodes);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, " the edge to loop header (dest of edge) is:\n");
+ kdn_dump_edge(stderr, e, TRUE, TRUE);
+ if (orig) {
+ fprintf(stderr,
+ " the orig edge (which is an edge that leaves the loop) is:\n");
+ kdn_dump_edge(stderr, orig, TRUE, TRUE);
+ } else {
+ fprintf(stderr, " no known loop exit, as orig equals NULL\n");
+ }
+#endif
/* In case we are doing loop peeling and the loop is in the middle of
irreducible region, the peeled copies will be inside it too. */
@@ -1200,6 +1821,11 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
/* Find edge from latch. */
latch_edge = loop_latch_edge (loop);
+#ifdef KELVIN_NOISE
+ /* the latch edge is the back edge that branches back to the loop header */
+ fprintf(stderr, "loop_latch_edge is:\n");
+ kdn_dump_edge(stderr, latch_edge, TRUE, TRUE);
+#endif
if (flags & DLTHE_FLAG_UPDATE_FREQ)
{
@@ -1207,16 +1833,56 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
of duplicated loop bodies. */
freq_in = header->frequency;
freq_le = EDGE_FREQUENCY (latch_edge);
+#ifdef KELVIN_NOISE
+ /* Note that the preheader dominates the header. All paths into
+ * the loop pass through the pre-header
+ */
+ fprintf(stderr,
+ "initial header (freq_in) frequency: %d, freq_le: %d\n",
+ freq_in, freq_le);
+ /* Kelvin doesn't understand the following few lines.
+ * It seems these are handling various "broken" aspects of other
+ * compiler phases.
+ */
+#endif
if (freq_in == 0)
freq_in = 1;
if (freq_in < freq_le)
freq_in = freq_le;
freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le;
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "freq_out_orig: %d, EDGE_FREQUENCY(orig): %d\n",
+ freq_out_orig, EDGE_FREQUENCY(orig));
+#endif
if (freq_out_orig > freq_in - freq_le)
freq_out_orig = freq_in - freq_le;
+#ifdef KELVIN_NOISE
+ /* kelvin has struggled to understand the significance of "orig"
+ * and of freq_out_orig. Here's how this works:
+ * freq_in is the frequency of the loop header
+ * freq_le is the frequency of the latch edge, which iterates
+ * the loop.
+ * the difference between freq_in and freq_le is the
+ * cumulative frequency of exit edges from the loop. By the
+ * "law of preservation of mass", what goes out must have
+ * come in. So freq_in - freq_le equals freq_loop_exit,
+ * which also equals freq_entry_into_header. And this is the
+ * frequency we choose to use for each of the new blocks
+ * that we create during our "duplication activities".
+ *
+ */
+ fprintf(stderr, " refined freq_out_orig: %d\n", freq_out_orig);
+#endif
prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in);
prob_pass_wont_exit =
RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
+#ifdef KELVIN_NOISE
+ /* kelvin thinks pass_thru means we exit the loop, wont_exit
+ * means we continue execution of the loop.
+ */
+ fprintf(stderr, "prob_pass_thru: %d, prob_pass_wont_exit: %d\n",
+ prob_pass_thru, prob_pass_wont_exit);
+#endif
if (orig
&& REG_BR_PROB_BASE - orig->probability != 0)
@@ -1233,14 +1899,35 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
&& dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
bitmap_set_bit (bbs_to_scale, i);
}
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ "Dealing with blocks dominated by a removed exit edge.\n");
+ fprintf(stderr, " the loop has %d blocks\n", n);
+ fprintf(stderr, " scale_after_exit: %d\n", scale_after_exit);
+ for (unsigned int i = 0; i < n; i++)
+ {
+ fprintf(stderr, " block[%d] of the loop will%s be scaled\n",
+ i, bitmap_bit_p (bbs_to_scale, i)? "": " not");
+ }
+#endif
}
scale_step = XNEWVEC (int, ndupl);
-
+
for (i = 1; i <= ndupl; i++)
scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
? prob_pass_wont_exit
: prob_pass_thru;
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "Making %d duplicates\n", ndupl);
+ for (i = 1; i <= ndupl; i++) {
+ fprintf(stderr, " copy %d will%s exit\n", i,
+ bitmap_bit_p(wont_exit, i)? " not": "");
+ fprintf(stderr, " scale_step for this copy is %d\n",
+ scale_step[i - 1]);
+ }
+#endif
+
/* Complete peeling is special as the probability of exit in last
copy becomes 1. */
@@ -1248,6 +1935,15 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
{
int wanted_freq = EDGE_FREQUENCY (e);
+#ifdef KELVIN_NOISE
+ /* kelvin thinks "complete peeling" means there is no longer
+ * any iteration. For example, unrolling a loop 4 times, if
+ * the total iteration count is 3 will "completely peel".
+ */
+ fprintf(stderr,
+ "doing complette_peel, wanted_frequency: %d, freq_in: %d\n",
+ wanted_freq, freq_in);
+#endif
if (wanted_freq > freq_in)
wanted_freq = freq_in;
@@ -1257,11 +1953,18 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
should've managed the flags so all except for original loop
has won't exist set. */
scale_act = GCOV_COMPUTE_SCALE (wanted_freq, freq_in);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "scale_act is %d, ndupl: %d\n", scale_act, ndupl);
+#endif
/* Now simulate the duplication adjustments and compute header
frequency of the last copy. */
for (i = 0; i < ndupl; i++)
wanted_freq = combine_probabilities (wanted_freq, scale_step[i]);
scale_main = GCOV_COMPUTE_SCALE (wanted_freq, freq_in);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "after looping, wanted_freq: %d, scale_main: %d\n",
+ wanted_freq, scale_main);
+#endif
}
else if (is_latch)
{
@@ -1277,6 +1980,10 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
}
scale_main = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE, scale_main);
scale_act = combine_probabilities (scale_main, prob_pass_main);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "is_latch: scale_main is %d, scale_act: %d\n",
+ scale_main, scale_act);
+#endif
}
else
{
@@ -1284,6 +1991,10 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
for (i = 0; i < ndupl; i++)
scale_main = combine_probabilities (scale_main, scale_step[i]);
scale_act = REG_BR_PROB_BASE - prob_pass_thru;
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "!is_latch: scale_main is %d, scale_act: %d\n",
+ scale_main, scale_act);
+#endif
}
for (i = 0; i < ndupl; i++)
gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE);
@@ -1293,15 +2004,25 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
/* Loop the new bbs will belong to. */
target = e->src->loop_father;
-
+
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ "The newly copied blocks will belong to this (target) loop:\n");
+ kdn_dump_loop(stderr, target);
+#endif
/* Original loops. */
n_orig_loops = 0;
for (aloop = loop->inner; aloop; aloop = aloop->next)
n_orig_loops++;
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ " Number of loops immediately nested within the copied loop: %d\n",
+ n_orig_loops);
+#endif
orig_loops = XNEWVEC (struct loop *, n_orig_loops);
for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
orig_loops[i] = aloop;
-
+
set_loop_copy (loop, target);
first_active = XNEWVEC (basic_block, n);
@@ -1310,19 +2031,109 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
memcpy (first_active, bbs, n * sizeof (basic_block));
first_active_latch = latch;
}
-
+
spec_edges[SE_ORIG] = orig;
spec_edges[SE_LATCH] = latch_edge;
+
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ "Before making %d copies of the loop body, method consists of\n", ndupl);
+ kdn_dump_all_blocks(stderr, loop);
+#endif
+
+#ifdef KELVIN_PATCH
+ /* recompute the loop body frequencies */
+ zero_loop_frequencies (loop);
+
+ basic_block my_header = loop->header;
+ int sum_incoming_frequencies = 0;
+ for (unsigned int i = 0; i < EDGE_COUNT(my_header->preds); i++)
+ {
+ edge predecessor = EDGE_PRED(my_header, i);
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ " computing incoming frequency by accumulating %d from edge ",
+ EDGE_FREQUENCY(predecessor));
+ kdn_dump_edge(stderr, predecessor, true, true);
+#endif
+
+ if (in_loop_p(predecessor->src, loop))
+ sum_incoming_frequencies += EDGE_FREQUENCY(predecessor);
+ else
+ sum_incoming_frequencies +=
+ (int) (EDGE_FREQUENCY(predecessor) * 111111 + 5000) / 10000;
+ }
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "After zeroing loop frequencies, increment header with %d\n",
+ sum_incoming_frequencies);
+#endif
+ increment_loop_frequencies(loop, header, sum_incoming_frequencies);
+#endif
+
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ "After recomputing loop frequencies in preparation for making copies\n");
+ kdn_dump_all_blocks(stderr, loop);
+#endif
place_after = e->src;
for (j = 0; j < ndupl; j++)
{
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "Making copy %d, placing copy after block: ", j);
+ kdn_dump_block_id(stderr, place_after);
+ /* kelvin has a few questions:
+
+after making some changes, the problem is still right here.
+
+the "problem" is that the copy I make of B5 and B6 to become B24 and
+B25, and they have copied frequencies 7711 and 7017 respectively. But
+that's not right, because the sole predecessor to B24 is B6, and B6
+frequency is 7017, so it doesn't make sense that B24 frequency would
+be 7711.
+
+When B5 and B6 are copied, B5 still has the 9% out-edge to B7, which
+explains why B6 has a lower frequency than B5.
+
+so kelvin's new insight (10/7/2015) is that I should copy the loops
+with zero frequency and then propagate the frequency of the target to
+its ancestors
+
+
+
+ * How do B5 and B6 end up with frequencies 7536 and 6858 respectively?
+ * (in the original unrolled loop, they had frequencies 9100 and 8281)
+ * --> kelvin says they should have frequency 7711
+ * When/how do we remove the out-going edge from B5 to B7?
+ * (the original unrolled loop has a loop-exit mode from B5 to B7
+ * but this edge does not appear in unrolled loop.)
+ * How is it that only the third copy of the loop body gets
+ * a loop-exiting edge from B28 to B7?? (note that the loop
+ * frequencies as generated do properly account for the
+ * outgoing edge from B28 to B29, because B28 has frequency
+ * 7536 and B29 has frequency 6858 and the exit edge frequency
+ * is 678 = 7536 - 6858.
+ */
+#endif
+
/* Copy loops. */
copy_loops_to (orig_loops, n_orig_loops, target);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "Immediately before copy_bbs, place_after is:\n");
+ kdn_dump_block_id(stderr, place_after);
+ fprintf(stderr, " and method contents is\n");
+ kdn_dump_all_blocks(stderr, loop);
+#endif
/* Copy bbs. */
copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
- place_after, true);
+ place_after, true);
+
+#ifdef KELVIN_PATCH
+ int place_after_frequency = place_after->frequency;
+ basic_block saved_place_after = place_after;
+#endif
+
place_after = new_spec_edges[SE_LATCH]->src;
if (flags & DLTHE_RECORD_COPY_NUMBER)
@@ -1352,8 +2163,12 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
}
for (i = 0; i < n; i++)
new_bbs[i]->flags &= ~BB_DUPLICATED;
- }
-
+ }
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "In loop making copies of the loop body\n");
+ fprintf(stderr, " is_latch: %d, bitmap_bit_p(wont_exit, j+1): %ld\n",
+ is_latch, bitmap_bit_p(wont_exit, j+1));
+#endif
/* Redirect the special edges. */
if (is_latch)
{
@@ -1373,20 +2188,58 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
e = new_spec_edges[SE_LATCH];
}
+
+#ifdef KELVIN_PATCH
+ zero_partial_loop_frequencies(loop, saved_place_after);
+ increment_loop_frequencies(loop,
+ saved_place_after, place_after_frequency);
+#else
+#endif
+
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ "Immediately after copy_bbs, three levels @ place_after:\n");
+ kdn_dump_block_flow(stderr, 3, saved_place_after);
+#endif
+
/* Record exit edge in this copy. */
if (orig && bitmap_bit_p (wont_exit, j + 1))
{
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ " bitmap_bit_p(wont_exit, j+1) is TRUE, to_remove is %s\n",
+ (to_remove == NULL)? "NULL": "not NULL");
+#endif
if (to_remove)
- to_remove->safe_push (new_spec_edges[SE_ORIG]);
+ {
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "Arranging to remove this edge\n");
+ kdn_dump_edge(stderr, new_spec_edges[SE_ORIG], TRUE, TRUE);
+#endif
+ to_remove->safe_push (new_spec_edges[SE_ORIG]);
+ }
+#ifdef KELVIN_PATCH
+ set_zero_probability (loop, new_spec_edges[SE_ORIG]);
+#else
set_zero_probability (new_spec_edges[SE_ORIG]);
-
+#endif
/* Scale the frequencies of the blocks dominated by the exit. */
if (bbs_to_scale)
{
EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
{
+#ifdef KELVIN_NOISE
+ fprintf(stderr, " before scaling new_bbs, ");
+ fprintf(stderr, "i is %d, block no: %d, frequency: %d\n",
+ i, new_bbs[i]->index, new_bbs[i]->frequency);
+#endif
scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit,
REG_BR_PROB_BASE);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, " after scaling, ");
+ fprintf(stderr, "i is %d, block no: %d, frequency: %d\n",
+ i, new_bbs[i]->index, new_bbs[i]->frequency);
+#endif
}
}
}
@@ -1399,30 +2252,79 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
first_active_latch = new_bbs[n - 1];
}
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ "Having made the copy, do we scale the frequency: %s\n",
+ (flags & DLTHE_FLAG_UPDATE_FREQ)? "yes": "no");
+#endif
/* Set counts and frequencies. */
if (flags & DLTHE_FLAG_UPDATE_FREQ)
{
+#ifdef KELVIN_NOISE
+ fprintf(stderr, " scale_step for this copy is %d\n", scale_step[j]);
+#endif
scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE);
scale_act = combine_probabilities (scale_act, scale_step[j]);
}
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ "After making copy %d of the loop body, method consists of\n", j);
+ kdn_dump_all_blocks(stderr, loop);
+ fprintf(stderr, "Adjusting the place_after block to: ");
+ kdn_dump_block_id(stderr, place_after);
+#endif
}
free (new_bbs);
free (orig_loops);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "Done making copies of the loop body\n");
+#endif
+
/* Record the exit edge in the original loop body, and update the frequencies. */
if (orig && bitmap_bit_p (wont_exit, 0))
{
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ " bitmap_bit_p(wont_exit, 0) is TRUE, to_remove is %s\n",
+ (to_remove == NULL)? "NULL": "not NULL");
+#endif
if (to_remove)
- to_remove->safe_push (orig);
+ {
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "Also arranging to remove this edge\n");
+ kdn_dump_edge(stderr, orig, TRUE, TRUE);
+#endif
+ to_remove->safe_push (orig);
+ }
+#ifdef KELVIN_PATCH
+ set_zero_probability (loop, orig);
+#else
set_zero_probability (orig);
+#endif
/* Scale the frequencies of the blocks dominated by the exit. */
if (bbs_to_scale)
{
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ " scaling bbs frequencies, orig and bitmap_bit_p: %d\n",
+ scale_after_exit);
+#endif
EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
{
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ " before scaling, i is %d, block no: %d, frequency: %d\n",
+ i, bbs[i]->index, bbs[i]->frequency);
+#endif
scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit,
REG_BR_PROB_BASE);
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ " i is %d, block no: %d, scaled_frequency is: %d\n",
+ i, bbs[i]->index, bbs[i]->frequency);
+#endif
}
}
}
@@ -1432,6 +2334,9 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
if (flags & DLTHE_FLAG_UPDATE_FREQ)
{
+#ifdef KELVIN_NOISE
+ fprintf(stderr, " scaling bbs frequencies main: %d\n", scale_main);
+#endif
scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE);
free (scale_step);
}
@@ -1462,6 +2367,13 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e,
free (bbs);
BITMAP_FREE (bbs_to_scale);
+#ifdef KELVIN_PATCH
+ /* This function call is strictly paranoia. it makes no changes
+ * to the data structures.
+ */
+ check_loop_frequency_integrity(loop);
+#endif
+
return true;
}
diff --git a/gcc/kelvin-debugs.c b/gcc/kelvin-debugs.c
new file mode 100644
index 00000000000..94a6e1fd2ac
--- /dev/null
+++ b/gcc/kelvin-debugs.c
@@ -0,0 +1,801 @@
+
+/*
+ * I'm just going to include this source file into the module that I
+ * want to debug. Presumably, that outer module will provide all of
+ * the necessary includes.
+
+#include <stdio.h>
+
+
+#include "system.h"
+#include "coretypes.h"
+
+#include "basic-block.h"
+#include "et-forest.h"
+#include "cfgloop.h"
+*/
+
+#include "pretty-print.h"
+#include "et-forest.h"
+#include "gimple.h"
+#include "input.h"
+#include "line-map.h"
+
+typedef const rtx_insn *const_ins;
+
+
+/* note: rtl.h declares:
+ * extern void print_insn(pretty_printer *, const_rtx, int)
+ * but that resolves to:
+ * extern void print_insn(pretty_printer *, rtx_def const *, int)
+ * and rtx_def is a superclass of rtx_insn. This results in an
+ * unresolved reference at link time.
+ *
+ * extern void print_insn(pretty_printer *, const rtx_insn *, int);
+ */
+
+static void kdn_dump_loop_id(FILE *, loop_p);
+static void kdn_dump_loop(FILE *, loop_p);
+static void kdn_dump_block_id(FILE *, basic_block);
+static void kdn_dump_block(FILE *, basic_block);
+static void kdn_dump_block_flow(FILE *, int, basic_block);
+static void kdn_dump_bb_flags(FILE *, int);
+static void kdn_dump_edge_flags(FILE *, int);
+static void kdn_dump_edge(FILE *, edge, bool, bool);
+static void kdn_dump_rtl_instruction_list(FILE *,
+ const_rtx, struct rtl_bb_info *);
+static void kdn_dump_gimple_instruction_list(FILE *,
+ struct gimple_bb_info gimple);
+static void kdn_dump_rtl_insns(FILE *, const_rtx);
+static void kdn_dump_gimple_insns(FILE *, gimple_seq);
+static void kdn_dump_loop_exits(FILE *stream, struct loop_exit *);
+static void kdn_dump_niter_desc(FILE *, struct niter_desc *);
+static void kdn_dump_dominator(FILE *, struct et_node *);
+static void kdn_treedump_children(FILE *, int, struct et_node *);
+static void kdn_dump_wide_int(FILE *stream, const widest_int &value);
+static void kdn_dump_all_blocks(FILE *stream, loop_p loop_ptr);
+
+static void kdn_dump_loop_id(FILE *stream, loop_p loop_ptr)
+{
+ if (loop_ptr == NULL)
+ fprintf(stream, "NULL\n");
+ else
+ fprintf(stream, " %d\n", loop_ptr->num);
+}
+
+static void kdn_dump_all_blocks(FILE *stream, loop_p loop_ptr) {
+ /* find some instruction contained within the loop */
+ if (loop_ptr->header == NULL) {
+ fprintf(stream,
+ "Could not dump all blocks because loop_ptr->header is NULL.\n");
+ } else if (loop_ptr->num_nodes <= 0) {
+ fprintf(stream,
+ "Could not dump all blocks because loop has no nodes.\n");
+ } else {
+ basic_block *body, a_block;
+ const_rtx the_head;
+ struct rtl_bb_info *the_rtl;
+ const rtx_insn *an_instruction;
+
+ body = get_loop_body(loop_ptr);
+ a_block = body[0];
+
+ the_head = a_block->il.x.head_;
+ the_rtl = a_block->il.x.rtl;
+
+ if (the_head != NULL) {
+ fprintf(stream,
+ "Dumping all basic blocks of the method:\n");
+
+ an_instruction = as_a <const rtx_insn *> (the_head);
+
+ /* walk through the list of instructions until you find the first
+ * insruction in the method. This is the one for which the previous
+ * index value is zero.
+ */
+ while (PREV_INSN(an_instruction) != NULL) {
+ an_instruction = PREV_INSN(an_instruction);
+ }
+
+ /* At this point, an_instruction points to first instruction
+ * of the method.
+ */
+ /* while our focus is not yet at the end of the method's instruction
+ * list:
+ * print out the contents of one basic block
+ * advance the focus to the instruction that follows the
+ * basic block just printed out.
+ */
+
+ while (an_instruction != NULL) {
+ int loop_is_done = FALSE;
+ int seen_one_basic_block_note = FALSE;
+ basic_block the_block;
+
+ fprintf(stream, "\nContents of one basic block:\n");
+ while ((an_instruction != NULL) && !loop_is_done) {
+ /*
+ * instead of printing out instructions one at a time,
+ * i'll just skip over the instructions of each basic block
+ * while remembering the basic block information.
+ *
+ * print_rtl_single(stream, an_instruction);
+ */
+
+ the_block = BLOCK_FOR_INSN(an_instruction);
+ if (NOTE_INSN_BASIC_BLOCK_P(an_instruction)) {
+ seen_one_basic_block_note = TRUE;
+ }
+ else {
+ switch (GET_CODE(an_instruction)) {
+
+ case JUMP_INSN:
+ loop_is_done = TRUE;
+ break;
+
+ case CALL_INSN: /* not sure if CALL_INSN terminates a bb */
+ /* this code was copied from cfgbuild.c */
+ /* Noreturn and sibling call instructions terminate the
+ * basic blocks (but only if they happen unconditionally).
+ */
+ if ((SIBLING_CALL_P (an_instruction)
+ || find_reg_note (an_instruction, REG_NORETURN, 0))
+ && GET_CODE (PATTERN (an_instruction)) != COND_EXEC) {
+ loop_is_done = TRUE;
+ break;
+ }
+
+ default: /* in other cases, do nothing */
+ break;
+ }
+ }
+
+ an_instruction = NEXT_INSN(an_instruction);
+
+ if (an_instruction != NULL) {
+ if (NOTE_INSN_BASIC_BLOCK_P(an_instruction)) {
+ if (seen_one_basic_block_note) {
+ loop_is_done = TRUE;
+ }
+ } else if (GET_CODE(an_instruction) == CODE_LABEL) {
+ if (seen_one_basic_block_note) {
+ loop_is_done = TRUE;
+ }
+ }
+ }
+ }
+ kdn_dump_block(stream, the_block);
+ fprintf(stream, "End of this basic block.\n");
+ }
+ fprintf(stream, "End of basic block's head:\n");
+ }
+ }
+}
+
+static void kdn_dump_loop(FILE *stream, loop_p loop_ptr) {
+
+ fprintf(stream, " %d\n", loop_ptr->num);
+ fprintf(stream, " number of instructions: %d\n", loop_ptr->ninsns);
+ fprintf(stream, " number of blocks within loop: %d\n", loop_ptr->num_nodes);
+ fprintf(stream, " average instructions per iteration: %d\n",
+ loop_ptr->av_ninsns);
+
+ if (loop_ptr->any_upper_bound) {
+ fprintf(stream,
+ " upper bound on number of times loop latch executes: ");
+ kdn_dump_wide_int(stream, loop_ptr->nb_iterations_upper_bound);
+ }
+
+ if (loop_ptr->any_estimate) {
+ fprintf(stream,
+ " estimate on number of times loop latch executes: ");
+ kdn_dump_wide_int(stream, loop_ptr->nb_iterations_estimate);
+ }
+
+ fprintf(stream, " loop can be parallel: %d\n", loop_ptr->can_be_parallel);
+ fprintf(stream, " warned of aggressive optimizations: %d\n",
+ loop_ptr->warned_aggressive_loop_optimizations);
+
+ fprintf(stream, " force_vectorize: %d, dont_vectorize: %d\n",
+ loop_ptr->force_vectorize, loop_ptr->dont_vectorize);
+
+ if (loop_ptr->inner != NULL) {
+ fprintf(stream, " inner_loop: ");
+ kdn_dump_loop_id(stream, loop_ptr->inner);
+ }
+
+ fprintf(stream, " outer_loops:\n");
+ if (loop_ptr->superloops) {
+ for (unsigned int i = 0; i < vec_safe_length(loop_ptr->superloops); i++) {
+ fprintf(stream, " ");
+ kdn_dump_loop_id(stream, (*loop_ptr->superloops)[i]);
+ }
+ }
+
+ fprintf(stream, " next loop is: ");
+ kdn_dump_loop_id(stream, loop_ptr->next);
+
+ fprintf(stream, " loop header: ");
+ kdn_dump_block_id(stream, loop_ptr->header);
+
+ fprintf(stream, " former header: ");
+ kdn_dump_block_id(stream, loop_ptr->former_header);
+
+ fprintf(stream, " loop latch: ");
+ kdn_dump_block_id(stream, loop_ptr->latch);
+
+ fprintf(stream, " loop exits: ");
+ kdn_dump_loop_exits(stream, loop_ptr->exits);
+
+ fprintf(stream, " loop iteration information\n");
+ kdn_dump_niter_desc(stream, loop_ptr->simple_loop_desc);
+
+#define DUMP_ENTIRE_BLOCK
+#ifdef DUMP_ENTIRE_BLOCK
+ kdn_dump_all_blocks(stream, loop_ptr);
+#endif
+
+ fprintf(stream,
+ "Not reported: aux, nb_iterations, safelen, estimate_state,");
+ fprintf(stream,
+ " simduid bounds, control_ivs, lpt_decision\n");
+}
+
+static void kdn_dump_block_id(FILE *stream, basic_block block) {
+ if (block == NULL)
+ fprintf(stream, "Null Basic Block\n");
+ else
+ fprintf(stream, "Basic Block No: %d\n", block->index);
+}
+
+/* Dump this block and all of its successor blocks transitively up
+ * to the num_levels distance from block.
+ */
+static void kdn_dump_block_flow(FILE *stream,
+ int num_levels, basic_block block) {
+ if (num_levels > 0) {
+ kdn_dump_block(stream, block);
+ for (unsigned int i = 0; i < EDGE_COUNT(block->succs); i++) {
+ kdn_dump_block_flow(stream, num_levels - 1, EDGE_SUCC(block, i)->dest);
+ }
+ }
+}
+
+/* Note: basic_block is "struct basic_block_def *" */
+static void kdn_dump_block(FILE *stream, basic_block block) {
+ unsigned int i;
+
+ if (block == NULL)
+ fprintf(stream, "NULL block\n");
+ else {
+ fprintf(stream, "Basic block no: %d\n", block->index);
+
+ /* The discriminator for this block. The discriminator distinguishes
+ among several basic blocks that share a common locus, allowing for
+ more accurate sample-based profiling. */
+ fprintf(stream, " discriminator: %d\n", block->discriminator);
+
+ if (block->loop_father) {
+ fprintf(stream, " inner-most enclosing loop: ");
+ kdn_dump_loop_id(stream, block->loop_father);
+ }
+
+ /* enumerate from cfg-flags.def */
+ fprintf(stream, " flags: ");
+ kdn_dump_bb_flags(stream, block->flags);
+ fprintf(stream, " count: %ld\n", (long int) block->count);
+ fprintf(stream, " frequency: %d\n", block->frequency);
+
+ fprintf(stream, " Dominator information\n");
+ kdn_dump_dominator(stream, block->dom[0]);
+
+ fprintf(stream, " Post-dominator information\n");
+ kdn_dump_dominator(stream, block->dom[1]);
+
+ fprintf(stream, " Predecessor edges:\n");
+ for (i = 0; i < EDGE_COUNT(block->preds); i++) {
+ fprintf(stream, " predecessor[%d]:\n", i);
+ kdn_dump_edge(stream, EDGE_PRED(block, i), TRUE, FALSE);
+ }
+
+ fprintf(stream, " Successor edges:\n");
+ for (i = 0; i < EDGE_COUNT(block->succs); i++) {
+ fprintf(stream, " successor[%d]:\n", i);
+ kdn_dump_edge(stream, EDGE_SUCC(block, i), FALSE, TRUE);
+ }
+
+ fprintf(stream, " body of block:\n");
+ if (block->flags & BB_RTL)
+ kdn_dump_rtl_instruction_list(stream,
+ block->il.x.head_, block->il.x.rtl);
+ else
+ kdn_dump_gimple_instruction_list(stream, block->il.gimple);
+
+ fprintf(stream, "End of info for basic block no: %d\n", block->index);
+ }
+}
+
+static void kdn_dump_bb_flags(FILE *stream, int flags) {
+
+ /* see cfg-flags.def */
+ if (flags & BB_NEW)
+ fprintf(stream, "NEW ");
+ if (flags & BB_REACHABLE)
+ fprintf(stream, "REACHABLE ");
+ if (flags & BB_IRREDUCIBLE_LOOP)
+ fprintf(stream, "IRREDUCIBLE_LOOP ");
+ if (flags & BB_SUPERBLOCK)
+ fprintf(stream, "SUPERBLOCK ");
+ if (flags & BB_DISABLE_SCHEDULE)
+ fprintf(stream, "DISABLE_SCHEDULE ");
+ if (flags & BB_HOT_PARTITION)
+ fprintf(stream, "HOT_PARTITION ");
+ if (flags & BB_COLD_PARTITION)
+ fprintf(stream, "COLD_PARTITION ");
+ if (flags & BB_DUPLICATED)
+ fprintf(stream, "DUPLICATED ");
+ if (flags & BB_NON_LOCAL_GOTO_TARGET)
+ fprintf(stream, "NON_LOCAL_GOTO_TARGET ");
+ if (flags & BB_RTL)
+ fprintf(stream, "RTL ");
+ if (flags & BB_FORWARDER_BLOCK)
+ fprintf(stream, "FORWARDER_BLOCK ");
+ if (flags & BB_NONTHREADABLE_BLOCK)
+ fprintf(stream, "NONTHREADABLE_BLOCK ");
+ if (flags & BB_MODIFIED)
+ fprintf(stream, "MODIFIED ");
+ if (flags & BB_VISITED)
+ fprintf(stream, "VISITED ");
+ if (flags & BB_IN_TRANSACTION)
+ fprintf(stream, "IN_TRANSACTION ");
+
+ fprintf(stream, "\n");
+}
+
+static void kdn_dump_edge_flags(FILE *stream, int flags) {
+
+ /* see cfg-flags.def */
+ if (flags & EDGE_FALLTHRU)
+ fprintf(stream, "FALLTHRU ");
+ if (flags & EDGE_ABNORMAL)
+ fprintf(stream, "ABNORMAL ");
+ if (flags & EDGE_ABNORMAL_CALL)
+ fprintf(stream, "ABNORMAL_CALL ");
+ if (flags & EDGE_EH)
+ fprintf(stream, "EH ");
+ if (flags & EDGE_PRESERVE)
+ fprintf(stream, "PRESERVE ");
+ if (flags & EDGE_FAKE)
+ fprintf(stream, "FAKE ");
+ if (flags & EDGE_DFS_BACK)
+ fprintf(stream, "DFS_BACK ");
+ if (flags & EDGE_IRREDUCIBLE_LOOP)
+ fprintf(stream, "IRREDUCIBLE_LOOP ");
+ if (flags & EDGE_TRUE_VALUE)
+ fprintf(stream, "TRUE_VALUE ");
+ if (flags & EDGE_FALSE_VALUE)
+ fprintf(stream, "FALSE_VALUE ");
+ if (flags & EDGE_EXECUTABLE)
+ fprintf(stream, "EXECUTABLE ");
+ if (flags & EDGE_CROSSING)
+ fprintf(stream, "CROSSING ");
+ if (flags & EDGE_SIBCALL)
+ fprintf(stream, "SIBCALL ");
+ if (flags & EDGE_CAN_FALLTHRU)
+ fprintf(stream, "CAN_FALLTHRU ");
+ if (flags & EDGE_LOOP_EXIT)
+ fprintf(stream, "LOOP_EXIT ");
+ if (flags & EDGE_TM_UNINSTRUMENTED)
+ fprintf(stream, "TM_UNINSTRUMENTED ");
+ if (flags & EDGE_TM_ABORT)
+ fprintf(stream, "TM_ABORT ");
+ fprintf(stream, "\n");
+}
+
+static void kdn_dump_edge(FILE *stream, edge an_edge,
+ bool display_src, bool display_dest) {
+ /* note: an_edge is a pointer to struct edge_def */
+ if (an_edge == NULL) {
+ fprintf(stream, "Null Edge\n");
+ } else {
+ fprintf(stream,
+ "Edge (B%d->B%d)\n",
+ an_edge->src->index, an_edge->dest->index);
+ fprintf(stream, " flags: ");
+ kdn_dump_edge_flags(stream, an_edge->flags);
+ fprintf(stream, " probability: %d\n", an_edge->probability);
+ fprintf(stream, " count: %ld\n", (long int) an_edge->count);
+
+ /*
+ * trying to output got_locus information, but it's not working yet.
+ */
+ fprintf(stream, " goto locus: (file: %s) ",
+ LOCATION_FILE(an_edge->goto_locus));
+ fprintf(stream, "(line: %d) ", LOCATION_LINE(an_edge->goto_locus));
+ fprintf(stream, "(column: %d)\n", LOCATION_COLUMN(an_edge->goto_locus));
+
+ if (an_edge->flags & EDGE_EXECUTABLE) {
+ fprintf(stream, " GIMPLE instructions:\n");
+ kdn_dump_gimple_insns(stream, an_edge->insns.g);
+ fprintf(stream, " End of GIMPLE instructions\n");
+ }
+ else if (an_edge->insns.r != NULL) {
+ fprintf(stream, " RTL instructions:\n");
+ kdn_dump_rtl_insns(stream, an_edge->insns.r);
+ fprintf(stream, " End of RTL instructions\n");
+ }
+
+ fprintf(stream, " not reported: dest_idx, aux\n");
+ }
+}
+
+static void kdn_dump_rtl_instruction_list(FILE *stream,
+ const_rtx head,
+ struct rtl_bb_info *rtl) {
+ /* see rtl.h for definition of "class rtx_insn"
+ * supports operations such as
+ * INSN_UID(), NEXT_INSN(), PREV_INSN()
+ * Note that a const_rtx does not have a next field. const_rtx is
+ * the "super-class" of rtx_insn.
+ */
+ const_ins notes_and_jumptables;
+ struct pretty_printer buffer;
+
+ pp_needs_newline(&buffer) = true;
+ buffer.buffer->stream = stream;
+
+ if (rtl != NULL) {
+ int loop_is_done = FALSE;
+ int seen_one_basic_block_note = FALSE;
+
+ fprintf(stream, "Basic block's header:\n");
+ for (notes_and_jumptables = rtl->header_;
+ (notes_and_jumptables != NULL) && !loop_is_done; ) {
+
+ print_rtl_single(stream, notes_and_jumptables);
+
+ if (NOTE_INSN_BASIC_BLOCK_P(notes_and_jumptables)) {
+ seen_one_basic_block_note = TRUE;
+ }
+ else {
+
+ switch (GET_CODE(notes_and_jumptables)) {
+
+ case JUMP_INSN:
+ loop_is_done = TRUE;
+ break;
+
+ case CALL_INSN:
+ /* this code was copied from cfgbuild.c */
+ /* Noreturn and sibling call instructions terminate the basic blocks
+ (but only if they happen unconditionally). */
+ if ((SIBLING_CALL_P (notes_and_jumptables)
+ || find_reg_note (notes_and_jumptables, REG_NORETURN, 0))
+ && GET_CODE (PATTERN (notes_and_jumptables)) != COND_EXEC) {
+ loop_is_done = TRUE;
+ break;
+ }
+
+ default: /* in other cases, do nothing */
+ break;
+ }
+ }
+
+ notes_and_jumptables = NEXT_INSN(notes_and_jumptables);
+
+ if (notes_and_jumptables != NULL) {
+ if (NOTE_INSN_BASIC_BLOCK_P(notes_and_jumptables)) {
+ if (seen_one_basic_block_note) {
+ loop_is_done = TRUE;
+ }
+ } else if (GET_CODE(notes_and_jumptables) == CODE_LABEL) {
+ if (seen_one_basic_block_note) {
+ loop_is_done = TRUE;
+ }
+ }
+ }
+ }
+ fprintf(stream, "End of basic block's header\n");
+ }
+
+ fprintf(stream, "Basic block's head:\n");
+
+ if (head != NULL) {
+ /* note: print_rtl prints multiple basic blocks here. not sure
+ * whose fault this is.
+ */
+ int loop_is_done = FALSE;
+ int seen_one_basic_block_note = FALSE;
+
+#undef EXTRA_KDN_DEBUGGING
+#ifdef EXTRA_KDN_DEBUGGING
+ fprintf(stream,
+ "NOTE_INSN_BASIC_BLOCK: %d, JUMP_INSN: %d, CODE_LABEL: %d\n",
+ NOTE_INSN_BASIC_BLOCK, JUMP_INSN, CODE_LABEL);
+#endif
+
+ for (const rtx_insn *head_iterator = as_a <const rtx_insn *> (head);
+ (head_iterator != NULL) && !loop_is_done; ) {
+
+#ifdef EXTRA_KDN_DEBUGGING
+ fprintf(stream,
+ "printing single rtl, loop_is_done: %d, seen_one_bb: %d\n",
+ loop_is_done, seen_one_basic_block_note);
+ fprintf(stream,
+ " code of this insn is %d\n", GET_CODE(head_iterator));
+#endif
+
+ print_rtl_single(stream, head_iterator);
+
+ if (NOTE_INSN_BASIC_BLOCK_P(head_iterator)) {
+ seen_one_basic_block_note = TRUE;
+ }
+ else {
+ switch (GET_CODE(head_iterator)) {
+
+ case JUMP_INSN:
+ loop_is_done = TRUE;
+ break;
+
+ case CALL_INSN: /* not sure if CALL_INSN terminates a bb */
+ /* this code was copied from cfgbuild.c */
+ /* Noreturn and sibling call instructions terminate the basic blocks
+ (but only if they happen unconditionally). */
+ if ((SIBLING_CALL_P (head_iterator)
+ || find_reg_note (head_iterator, REG_NORETURN, 0))
+ && GET_CODE (PATTERN (head_iterator)) != COND_EXEC) {
+ loop_is_done = TRUE;
+ break;
+ }
+
+ default: /* in other cases, do nothing */
+ break;
+ }
+ }
+
+ head_iterator = NEXT_INSN(head_iterator);
+
+ if (head_iterator != NULL) {
+ if (NOTE_INSN_BASIC_BLOCK_P(head_iterator)) {
+ if (seen_one_basic_block_note) {
+ loop_is_done = TRUE;
+ }
+ } else if (GET_CODE(head_iterator) == CODE_LABEL) {
+ if (seen_one_basic_block_note) {
+ loop_is_done = TRUE;
+ }
+ }
+ }
+ }
+ }
+ fprintf(stream, "End of basic block's head:\n");
+
+ if (rtl != NULL) {
+ fprintf(stream, "Basic block's footer:\n");
+ int loop_is_done = FALSE;
+ int seen_one_basic_block_note = FALSE;
+
+ for (notes_and_jumptables = rtl->footer_;
+ (notes_and_jumptables != NULL) && !loop_is_done; ) {
+
+ print_rtl_single(stream, notes_and_jumptables);
+
+ if (NOTE_INSN_BASIC_BLOCK_P(notes_and_jumptables)) {
+ seen_one_basic_block_note = TRUE;
+ }
+ else {
+ switch (GET_CODE(notes_and_jumptables)) {
+
+ case JUMP_INSN:
+ loop_is_done = TRUE;
+ break;
+
+ case CALL_INSN:
+ /* this code was copied from cfgbuild.c */
+ /* Noreturn and sibling call instructions terminate the basic blocks
+ (but only if they happen unconditionally). */
+ if ((SIBLING_CALL_P (notes_and_jumptables)
+ || find_reg_note (notes_and_jumptables, REG_NORETURN, 0))
+ && GET_CODE (PATTERN (notes_and_jumptables)) != COND_EXEC) {
+ loop_is_done = TRUE;
+ break;
+ }
+
+ default: /* in other cases, do nothing */
+ break;
+ }
+ }
+
+ notes_and_jumptables = NEXT_INSN(notes_and_jumptables);
+
+ if (notes_and_jumptables != NULL) {
+ if (NOTE_INSN_BASIC_BLOCK_P(notes_and_jumptables)) {
+ if (seen_one_basic_block_note) {
+ loop_is_done = TRUE;
+ }
+ } else if (GET_CODE(notes_and_jumptables) == CODE_LABEL) {
+ if (seen_one_basic_block_note) {
+ loop_is_done = TRUE;
+ }
+ }
+ }
+ }
+ fprintf(stream, "End of basic block's footer\n");
+ }
+}
+
+/* ignore warning message about gimple not used */
+static void kdn_dump_gimple_instruction_list(FILE *stream,
+ struct gimple_bb_info gimple) {
+ /* see gimple-pretty-print.c for relevant subroutines */
+ /* reference to visited silences warning about arg not used */
+ fprintf(stream,
+ "Not Implemented: dumping of gimple instructions, visited: %d\n",
+ gimple.seq->visited);
+}
+
+static void kdn_dump_rtl_insns(FILE *stream, const_rtx rtx_insns) {
+ if (rtx_insns != NULL) {
+ struct pretty_printer buffer;
+ pp_needs_newline(&buffer) = true;
+ buffer.buffer->stream = stream;
+
+ print_rtl(stream, rtx_insns);
+ }
+}
+
+static void kdn_dump_gimple_insns(FILE *stream, gimple_seq gimp_insns) {
+ /* see gimple-pretty-print.c for relevant subroutines */
+ /* reference to visited silences warning about arg not used */
+ fprintf(stream,
+ "Not Implemented: dumping of gimple instructions, visited: %d\n",
+ gimp_insns->visited);
+}
+
+static void kdn_dump_loop_exits(FILE *stream, struct loop_exit *exits) {
+ edge e;
+ loop_exit *walker = exits;
+
+ /* exits is as an infinite cycle and never equals null, apparently */
+ fprintf(stream, "Loop exits consist of edges: ");
+ do {
+ e = exits->e;
+ kdn_dump_edge(stream, e, TRUE, TRUE);
+ walker = walker->next;
+ } while (walker != exits);
+}
+
+static void kdn_dump_niter_desc(FILE *stream, struct niter_desc *loop_desc) {
+
+ if (loop_desc == NULL) {
+ fprintf(stream, "No iteration information for this loop\n");
+
+ } else {
+
+ fprintf(stream, " Edge that leaves loop: ");
+ kdn_dump_edge(stream, loop_desc->out_edge, TRUE, TRUE);
+
+ fprintf(stream, " Edge from same source that continues loop: ");
+ kdn_dump_edge(stream, loop_desc->in_edge, TRUE, TRUE);
+
+ fprintf(stream, " What do we know? %s\n",
+ (loop_desc->simple_p)? "something": "nothing");
+
+ fprintf(stream, " Is number of iterations constant? %s\n",
+ (loop_desc->const_iter)? "yes": "no");
+
+ if (loop_desc->const_iter) {
+ fprintf(stream, " Constant number of iterations: %ld\n",
+ (long) loop_desc->niter);
+ }
+
+ /*
+ fprintf(stream, " Number of iterations:\n");
+ kdn_dump_rtl_insns(stream, loop_desc->niter_expr);
+ fprintf(stream, "\n");
+ */
+ fprintf(stream,
+ " Not reported: assumptions, noloop_assumptions, infinite,");
+ fprintf(stream, " niter_expr, mode, signed_p\n");
+ }
+}
+
+
+static void kdn_dump_dominator(FILE *stream, struct et_node *dominator) {
+ basic_block a_block;
+
+ if (dominator == NULL) {
+ fprintf(stream,
+ " [Post?] Domination tree is empty\n");
+ } else {
+
+ fprintf(stream,
+ " [Post?] Domination tree [dfs_num_in: %d, dfs_num_out: %d]\n",
+ dominator->dfs_num_in, dominator->dfs_num_out);
+
+ /* Testing confirms that dominator->data points to a basic block */
+ a_block = (basic_block) dominator->data;
+
+ fprintf(stream, " ");
+ kdn_dump_block_id(stream, a_block);
+
+ if (dominator->son != NULL) {
+ kdn_treedump_children(stream, 8, dominator->son);
+ }
+ }
+}
+
+static void kdn_treedump_children(FILE *stream,
+ int indent_level, struct et_node *a_son) {
+ /* assert: a_son != NULL */
+ struct et_node *first_son = a_son;
+
+ do {
+ basic_block a_block = (basic_block) a_son->data;
+
+ fprintf(stream, "%*s", indent_level, ": ");
+ kdn_dump_block_id(stream, a_block);
+
+ if (a_son->son != NULL) {
+ kdn_treedump_children(stream, indent_level + 2, a_son->son);
+ }
+ a_son = a_son->left;
+ } while (a_son != first_son);
+}
+
+static void kdn_dump_wide_int(FILE *stream, const widest_int &orig_big_no) {
+ widest_int working_memory;
+ widest_int wide_zero, wide_ten;
+ int precision = orig_big_no.get_precision();
+ /* ISO C++ prohibits variable length array declarations, so I need a
+ * different way to allocate this array.
+ *
+ * char buffer[precision+2];
+ */
+ char *buffer = (char *) alloca(precision+2);
+ char *end_buffer = &buffer[precision+1];
+ bool negative = FALSE;
+
+ /* implicit invocation of widest_int constructors */
+ wide_zero = 0;
+ wide_ten = 10;
+
+ wi::copy(working_memory, orig_big_no);
+ if (wi::only_sign_bit_p(working_memory)) {
+ wi::neg(working_memory);
+ negative = TRUE;
+ }
+
+ *end_buffer-- = '\0';
+ for (int i = 0; i < precision; i++) {
+ widest_int remainder;
+ unsigned int length;
+ const HOST_WIDE_INT *value;
+
+ remainder = wi::smod_trunc(working_memory, wide_ten);
+ length = remainder.get_len();
+
+ /* value gets a pointer to the insides of the remainder object */
+ value = remainder.get_val();
+
+ /* kelvin says: i think i want to assert that length > 0 */
+ if (length > 0) {
+ *end_buffer-- = '0' + value[length-1];
+ }
+
+ /* divide working_memory by ten, since we've captured the least
+ * significant bit already.
+ */
+ working_memory = wi::sdiv_trunc(working_memory, wide_ten);
+
+ if (wi::cmps(working_memory, wide_zero) == 0) {
+ break;
+ }
+ }
+
+ if (negative) {
+ *end_buffer-- = '-';
+ }
+ end_buffer++;
+ fprintf(stream, "[kdn big number] %s\n", end_buffer);
+}
diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c
index 913bd3c2614..d081f969eca 100644
--- a/gcc/loop-unroll.c
+++ b/gcc/loop-unroll.c
@@ -45,6 +45,13 @@ along with GCC; see the file COPYING3. If not see
#include "target.h"
#include "dumpfile.h"
+#define KELVIN_PATCH
+#define KELVIN_NOISE
+#ifdef KELVIN_NOISE
+#include "kelvin-debugs.c"
+#endif
+
+
/* This pass performs loop unrolling. We only perform this
optimization on innermost loops (with single exception) because
the impact on performance is greatest here, and we want to avoid
@@ -479,6 +486,26 @@ unroll_loop_constant_iterations (struct loop *loop)
struct opt_info *opt_info = NULL;
bool ok;
+#ifdef KELVIN_NOISE
+ basic_block *body;
+ body = get_loop_body (loop);
+ fprintf(stderr, "At top of unroll_loop_constant_iterations() with loop:");
+ kdn_dump_loop(stderr, loop);
+ fprintf(stderr, " this loop is comprised of %d basic blocks\n",
+ loop->num_nodes);
+
+ /* kdn: body is a newly allocated (XNEWVEC) array of basic_block
+ * references. Note that basic_block is "struct basic_block_def *".
+ * The number of values held in the body array is given by
+ * loop->num_nodes.
+ */
+ for (unsigned int _i_ = 0; _i_ < loop->num_nodes; _i_++) {
+ fprintf(stderr, "body block[%d]:\n", _i_);
+ kdn_dump_block(stderr, body[_i_]);
+ }
+ free(body);
+#endif
+
niter = desc->niter;
/* Should not get here (such loop should be peeled instead). */
@@ -634,6 +661,27 @@ unroll_loop_constant_iterations (struct loop *loop)
FOR_EACH_VEC_ELT (remove_edges, i, e)
remove_path (e);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "At bottom of unroll_loop_constant_iterations(), ");
+ fprintf(stderr, "having unrolled %d times\n", max_unroll);
+ fprintf(stderr, " Loop has %d instructions, ", num_loop_insns(loop));
+ fprintf(stderr, " distributed across %d basic blocks.\n", loop->num_nodes);
+ kdn_dump_loop(stderr, loop);
+
+ body = get_loop_body(loop);
+
+ /* kdn: body is a newly allocated (XNEWVEC) array of basic_block
+ * references. Note that basic_block is "struct basic_block_def *".
+ * The number of values held in the body array is given by
+ * loop->num_nodes.
+ */
+ for (unsigned int _i_ = 0; _i_ < loop->num_nodes; _i_++) {
+ fprintf(stderr, "body block[%d]:\n", _i_);
+ kdn_dump_block(stderr, body[_i_]);
+ }
+ free(body);
+#endif
+
if (dump_file)
fprintf (dump_file,
";; Unrolled loop %d times, constant # of iterations %i insns\n",
@@ -876,6 +924,26 @@ unroll_loop_runtime_iterations (struct loop *loop)
auto_vec<basic_block> dom_bbs;
body = get_loop_body (loop);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "At top of unroll_loop_runtime_iterations() with loop:");
+ kdn_dump_loop(stderr, loop);
+ fprintf(stderr, " this loop is comprised of %d basic blocks\n",
+ loop->num_nodes);
+ /*
+ fprintf(stderr, " loop desc is represented by:\n");
+ kdn_dump_niter_desc(stderr, desc);
+ */
+ /* kdn: body is a newly allocated (XNEWVEC) array of basic_block
+ * references. Note that basic_block is "struct basic_block_def *".
+ * The number of values held in the body array is given by
+ * loop->num_nodes.
+ */
+ for (unsigned int _i_ = 0; _i_ < loop->num_nodes; _i_++) {
+ fprintf(stderr, "body block[%d]:\n", _i_);
+ kdn_dump_block(stderr, body[_i_]);
+ }
+#endif
+
for (i = 0; i < loop->num_nodes; i++)
{
vec<basic_block> ldom;
@@ -929,6 +997,10 @@ unroll_loop_runtime_iterations (struct loop *loop)
/* Precondition the loop. */
split_edge_and_insert (loop_preheader_edge (loop), init_code);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "After preconditioning the loop, the loop looks like:\n");
+ kdn_dump_loop(stderr, loop);
+#endif
auto_vec<edge> remove_edges;
@@ -943,32 +1015,170 @@ unroll_loop_runtime_iterations (struct loop *loop)
&& !desc->noloop_assumptions)
bitmap_set_bit (wont_exit, 1);
ezc_swtch = loop_preheader_edge (loop)->src;
+
+#ifdef KELVIN_NOISE
+ /* extra zero check switch (ezc) processing */
+ fprintf(stderr,
+ "ezc_switch loop preheader before calling duplicate_loop...\n");
+ kdn_dump_block(stderr, ezc_swtch);
+#endif
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1, wont_exit, desc->out_edge,
&remove_edges,
DLTHE_FLAG_UPDATE_FREQ);
gcc_assert (ok);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "After duplicating loop to header edge, the loop is\n");
+ kdn_dump_loop(stderr, loop);
+#endif
/* Record the place where switch will be built for preconditioning. */
swtch = split_edge (loop_preheader_edge (loop));
+#ifdef KELVIN_PATCH
+ /* kelvin: next three lines inserted by PTH */
+ int iter_freq, new_freq;
+ iter_freq = new_freq = swtch->frequency / (n_peel+1);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "\n Before overwriting, swtch->frequency is %d\n",
+ swtch->frequency);
+ /* Overwriting swtch->frequency below is what causes B11 to have the
+ * correct frequency. But I have to scale the frequencies for each of
+ * the loop's blocks that are copied as well.
+ */
+#endif
+ swtch->frequency = new_freq;
+#endif
+
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "\n n_peel is %d, iter_freq is %d\n", n_peel, iter_freq);
+ fprintf(stderr, " The newly created swtch block is given by:\n");
+ kdn_dump_block(stderr, swtch);
+#endif
+
for (i = 0; i < n_peel; i++)
{
/* Peel the copy. */
bitmap_clear (wont_exit);
if (i != n_peel - 1 || !last_may_exit)
bitmap_set_bit (wont_exit, 1);
+#ifdef KELVIN_PATCH
+ /* kelvin: up to ok below is all new */
+ // PTH - Need to adjust freq of pred block before duplicating loop
+ // preheader->src didn't work (didn't see any change)
+ // preheader->dest didn't work (peeled blocks had freq = 0)
+ // preheader->dest + not passing FLAG_UPDATE_FREQ fixed peeled copies
+ // but screwed up loop block bb5=558 (bb8(exit,
+ // due to bb5 freq) and bb23(freq=7536))
+ // kelvin says PTH had commented out the following line,
+ // apparently because it creates some problems with frequency of
+ // bb5.
+ // loop_preheader_edge (loop)->dest->frequency = new_freq;
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ "preparing to invoke duplicate_loop_to_header_edge, new_freq: %d\n",
+ new_freq);
+ fprintf(stderr, "loop_preheader_edge(loop) is:\n");
+ kdn_dump_edge(stderr, loop_preheader_edge(loop), TRUE, TRUE);
+#endif
+
+ /* kelvin patch begins here */
+ int saved_header_frequency = loop->header->frequency;
+ int saved_latch_frequency = loop->latch->frequency;
+ loop->header->frequency /= (n_peel + 1);
+ loop->latch->frequency /= (n_peel + 1);
+
+ loop->header->frequency *= (i + 1);
+ loop->latch->frequency *= (i + 1);
+
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ "Temporarily setting header and latch frequencies to %d and %d\n",
+ loop->header->frequency, loop->latch->frequency);
+#endif
+ /* kelvin patch ends here */
+#endif
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1, wont_exit, desc->out_edge,
&remove_edges,
DLTHE_FLAG_UPDATE_FREQ);
+#ifdef KELVIN_PATCH
+ loop->header->frequency = saved_header_frequency;
+ loop->latch->frequency = saved_latch_frequency;
+ fprintf(stderr, "Restoring header and latch frequencies to %d and %d\n",
+ loop->header->frequency, loop->latch->frequency);
+#endif
gcc_assert (ok);
/* Create item for switch. */
j = n_peel - i - (extra_zero_check ? 0 : 1);
p = REG_BR_PROB_BASE / (i + 2);
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ "Before splitting the next edge, loop_preaheader_edge is:\n");
+ kdn_dump_edge(stderr, loop_preheader_edge (loop), TRUE, TRUE);
+#endif
preheader = split_edge (loop_preheader_edge (loop));
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "In peel loop, i: %d, j: %d, preheader is:\n", i, j);
+ kdn_dump_block(stderr, preheader);
+ /* note that the preheader has the correct frequency, but the
+ * nodes created by duplicate_loop_to_header_edge have an incorrect
+ * frequency.
+ *
+ * Kelvin questions: how does preheader get its frequency? It
+ * is copied from the original loop, and then overwritten below.
+ */
+ fprintf(stderr, "Refined loop definition is:\n");
+ kdn_dump_loop(stderr, loop);
+
+ /* kelvin believes the following code is responsible for assigning
+ * frequencies on B14, B18, and B22.
+ */
+ fprintf(stderr, "Processing preheader block: ");
+ kdn_dump_block_id(stderr, preheader);
+ fprintf(stderr,
+ "Before overwriting the preheader, frequency is %d, ",
+ preheader->frequency);
+ fprintf(stderr, "to be overwritten with %d (or not!)*\n",
+ new_freq + iter_freq);
+
+ /*
+ * note that each time we peel another loop iteration, we
+ * increment new_freq by the value of iter_freq, so the
+ * first iteration has value 186, the second 372, the third,
+ * 558, an so on.
+ */
+
+#endif
+#ifdef KELVIN_PATCH
+ /* Pat Haugen's patch below is not quite correct. In the
+ * case that the duplicated blocks include an "exit" which
+ * circumvents execution of the loop, we need to subtract
+ * from preheader->frequency the cumulative frequencies
+ * associated with the duplicated "loop" exit edges.
+ */
+
+
+ /* kelvin: the following two lines constitute a patch provided
+ * by Pat Haugen. This works most of the time, but it doesn't
+ * work in the case that the copied blocks include conditional
+ * branches that circumvent execution of the loop that follows.
+ * To do this right, I need to calculate the frequency of this
+ * preheader in a different way. Basically, I need to calculate
+ * the preheader frequency by summing frequences of predecessor
+ * nodes, AFTER we create and establish the frequencies for the
+ * new swtch block.
+ *
+ * new_freq = new_freq + iter_freq;
+ * preheader->frequency = new_freq;
+ */
+
+ /* kelvin is suspicious of the code above, which is calculating
+ * the preheader->frequency.
+ */
+#endif
branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
block_label (preheader), p,
NULL);
@@ -978,13 +1188,58 @@ unroll_loop_runtime_iterations (struct loop *loop)
gcc_assert (branch_code != NULL_RTX);
swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "The newly created swtch is:\n");
+ kdn_dump_block(stderr, swtch);
+#endif
set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
+#ifdef KELVIN_PATCH
+ /* kelvin: following line modified by pth, was single_pred_edge */
+ single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
+#else
single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
+#endif
e = make_edge (swtch, preheader,
single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
e->probability = p;
+
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "The newly created edge is:\n");
+ kdn_dump_edge(stderr, e, TRUE, TRUE);
+#endif
+#ifdef KELVIN_PATCH
+ /* kelvin: following line inserted by PTH is replaced with
+ * the alternative sequence of instructions crafted by kelvin.
+ * swtch->frequency = new_freq;
+ */
+ new_freq = new_freq + iter_freq;
+ swtch->frequency = new_freq;
+
+ int prehead_frequency = 0;
+ for (unsigned int i = 0; i < EDGE_COUNT(preheader->preds); i++) {
+ edge an_edge = EDGE_PRED(preheader, i);
+ int the_edge_frequency = EDGE_FREQUENCY(an_edge);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "Summing predecessor edge: ");
+ kdn_dump_edge(stderr, an_edge, TRUE, TRUE);
+ fprintf(stderr, " which has frequency %d\n", the_edge_frequency);
+#endif
+ prehead_frequency += the_edge_frequency;
+ }
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "Overwriting preheader block ");
+ kdn_dump_block_id(stderr, preheader);
+ fprintf(stderr, " with frequency %d\n", prehead_frequency);
+#endif
+ preheader->frequency = prehead_frequency;
+
+#endif
}
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "\nAt midpoint of unroll_loop_runtime_iterations() with loop:");
+ kdn_dump_loop(stderr, loop);
+#endif
if (extra_zero_check)
{
@@ -1015,16 +1270,41 @@ unroll_loop_runtime_iterations (struct loop *loop)
bitmap_clear_bit (wont_exit, may_exit_copy);
opt_info_start_duplication (opt_info);
+#ifdef KELVIN_NOISE
+ fprintf(stderr,
+ "Invoking duplicate_loop_to_header_edge for unrolling loop\n");
+#endif
+#ifdef KELVIN_PATCH
ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
max_unroll,
wont_exit, desc->out_edge,
&remove_edges,
+/* kelvin: following code commented out and replaced by PTH
DLTHE_FLAG_UPDATE_FREQ
| (opt_info
? DLTHE_RECORD_COPY_NUMBER
: 0));
+*/ opt_info
+ ? DLTHE_RECORD_COPY_NUMBER
+ : 0);
+#else
+ ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
+ max_unroll,
+ wont_exit, desc->out_edge,
+ &remove_edges,
+ DLTHE_FLAG_UPDATE_FREQ
+ | (opt_info
+ ? DLTHE_RECORD_COPY_NUMBER
+ : 0));
+#endif
gcc_assert (ok);
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "After duplicating loop to header edge for the loop body\n");
+ fprintf(stderr, " exit_at_end is %d\n", exit_at_end);
+ fprintf(stderr, " Kelvin is wondering when we remove orig out edge\n");
+ kdn_dump_all_blocks(stderr, loop);
+#endif
if (opt_info)
{
apply_opt_in_copies (opt_info, max_unroll, true, true);
@@ -1053,7 +1333,17 @@ unroll_loop_runtime_iterations (struct loop *loop)
/* Remove the edges. */
FOR_EACH_VEC_ELT (remove_edges, i, e)
+#ifdef KELVIN_PATCH
+ {
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "Removing an edge from the loop\n");
+ kdn_dump_edge(stderr, e, TRUE, TRUE);
+#endif
+ remove_path (e);
+ }
+#else
remove_path (e);
+#endif
/* We must be careful when updating the number of iterations due to
preconditioning and the fact that the value must be valid at entry
@@ -1081,6 +1371,27 @@ unroll_loop_runtime_iterations (struct loop *loop)
loop->any_estimate = false;
}
+#ifdef KELVIN_NOISE
+ fprintf(stderr, "\nAt bottom of unroll_loop_runtime_iterations(), ");
+ fprintf(stderr, "having unrolled %d times\n", max_unroll);
+ fprintf(stderr, " Loop has %d instructions, ", num_loop_insns(loop));
+ fprintf(stderr, " distributed across %d basic blocks.\n", loop->num_nodes);
+ kdn_dump_loop(stderr, loop);
+
+ body = get_loop_body(loop);
+
+ /* kdn: body is a newly allocated (XNEWVEC) array of basic_block
+ * references. Note that basic_block is "struct basic_block_def *".
+ * The number of values held in the body array is given by
+ * loop->num_nodes.
+ */
+ for (unsigned int _i_ = 0; _i_ < loop->num_nodes; _i_++) {
+ fprintf(stderr, "body block[%d]:\n", _i_);
+ kdn_dump_block(stderr, body[_i_]);
+ }
+ free(body);
+#endif
+
if (dump_file)
fprintf (dump_file,
";; Unrolled loop %d times, counting # of iterations "