diff options
author | kelvin <kelvin@138bc75d-0d04-0410-961f-82ee72b054a4> | 2015-10-23 19:54:53 +0000 |
---|---|---|
committer | kelvin <kelvin@138bc75d-0d04-0410-961f-82ee72b054a4> | 2015-10-23 19:54:53 +0000 |
commit | 3e068742a36a4c1e426abb1bf51531dbdc5c279a (patch) | |
tree | 391d3b681ce0c1cc28a08c64006cb1fe067654eb | |
parent | 418b6d85e22af51e3291e0d81d4e3e1c340a6b57 (diff) |
snapshot of code that works for first test case
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/ibm/kelvin-1@229269 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/cfgloopmanip.c | 936 | ||||
-rw-r--r-- | gcc/kelvin-debugs.c | 801 | ||||
-rw-r--r-- | gcc/loop-unroll.c | 311 |
3 files changed, 2036 insertions, 12 deletions
diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index 1f9a2b38fe7..2c9c8d15e1e 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -34,6 +34,12 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-loop-manip.h" #include "dumpfile.h" +#define KELVIN_PATCH +#define KELVIN_NOISE +#ifdef KELVIN_NOISE +#include "kelvin-debugs.c" +#endif + static void copy_loops_to (struct loop **, int, struct loop *); static void loop_redirect_edge (edge, basic_block); @@ -44,6 +50,532 @@ static void fix_loop_placements (struct loop *, bool *); static bool fix_bb_placement (basic_block); static void fix_bb_placements (basic_block, bool *, bitmap); +#ifdef KELVIN_PATCH + +/** + * Issue a fatal error message and abort program execution. + */ +static void internal(const char *msg) +{ + fprintf(stderr, "Fatal internal error: %s\n", msg); + exit(-1); +} + +/* + * Return true iff block is considered to reside within the loop + * represented by loop_ptr + */ +static bool +in_loop_p(basic_block block, loop_p loop_ptr) +{ + basic_block *bbs = get_loop_body (loop_ptr); + bool result = false; + + for (unsigned int i = 0; i < loop_ptr->num_nodes; i++) + { + if (bbs[i] == block) + result = true; + } + free (bbs); + return result; +} + + +/* + * Zero all frequencies associated with this loop. + */ +static void +zero_loop_frequencies(loop_p loop_ptr) +{ + basic_block *bbs = get_loop_body (loop_ptr); + for (unsigned i = 0; i < loop_ptr->num_nodes; ++i) + { +#ifdef KELVIN_NOISE + fprintf(stderr, "zero_loop_frequencies: "); + kdn_dump_block_id(stderr, bbs[i]); +#endif + bbs[i]->frequency = 0; + } + free (bbs); +} + +typedef struct block_ladder_rung { + basic_block block; + struct block_ladder_rung *lower_rung; +} *ladder_rung_p; + + +/* I'm not sure the same logical edge will always be represented + * by the same edge data object. So we will test equality by + * comparing source and dest values. + */ +static bool same_edge(edge an_edge, edge another_edge) +{ + return ((an_edge->src == another_edge->src) && + (an_edge->dest == another_edge->dest)); +} + +/* Return true iff an_edge matches one of the nodes that is already + * present within set_of_edges. + */ +static bool in_edge_set(edge an_edge, vec<edge> set_of_edges) +{ + unsigned int j; + edge e; + + FOR_EACH_VEC_ELT(set_of_edges, j, e) + { + if (same_edge(e, an_edge)) + return true; + } + return false; +} + +/* return true iff an_edge->dest is already represented within + * the ladder_rung. + */ +static bool in_call_chain(edge an_edge, ladder_rung_p ladder_rung) +{ + if (ladder_rung == NULL) + return false; + else if (an_edge->dest == ladder_rung->block) + return true; + else + return in_call_chain(an_edge, ladder_rung->lower_rung); +} + + +static void +recursively_zero_frequency(loop_p loop_ptr, vec<edge> exit_edges, + ladder_rung_p ladder_rung, + edge incoming_edge) +{ +#ifdef KELVIN_NOISE + basic_block a_block = incoming_edge->dest; + fprintf(stderr, "recursively zero loop frequency for block %d, from %d\n", + a_block->index, a_block->frequency); +#endif + + if (incoming_edge->dest == loop_ptr->header) + return; + else if (in_edge_set(incoming_edge, exit_edges)) + return; + else if (in_call_chain(incoming_edge, ladder_rung)) + return; + else { + struct block_ladder_rung a_rung; + basic_block block = incoming_edge->dest; + +#ifdef KELVIN_NOISE + fprintf(stderr, " ---> actually doing the zentrification\n"); +#endif + a_rung.block = block; + a_rung.lower_rung = ladder_rung; + block->frequency = 0; + for (unsigned int i = 0; i < EDGE_COUNT(block->succs); i++) + { + edge successor = EDGE_SUCC(block, i); + recursively_zero_frequency(loop_ptr, exit_edges, + &a_rung, successor); + } + } +} + +/* Return true iff candidate is contained within the loop represented + * by loop_header and loop_latch. + * + * We consider the block to be within the loop if there exists a path + * within the control flow graph from this node to the loop's latch + * which does not pass through the loop's header. (If all paths to + * the latch pass through the loop header, then the node is contained + * within an outer-nested loop but not within this loop.) + * + * Note that if a candidate's success is the loop, then the candidate + * itself is also in the loop. If none of the successors are in the loop + */ +static bool _in_loop(basic_block candidate, + basic_block loop_header, + basic_block loop_latch, + bool start_of_recursion) +{ + if (candidate == loop_latch) { + return true; + } else if (candidate == loop_header) { + return start_of_recursion; + } else { + for (unsigned int i = 0; i < EDGE_COUNT(candidate->succs); i++) { + basic_block successor = EDGE_SUCC(candidate, i)->dest; + if (_in_loop(successor, loop_header, loop_latch, false)) + return true; + } + return false; /* None of the successors was in loop */ + } +} + +static bool _in_loop_set(basic_block candidate, vec<basic_block> loop_set) +{ + unsigned int j; + basic_block b; + + FOR_EACH_VEC_ELT(loop_set, j, b) { + if (b == candidate) { + return true; + } + } + return false; +} + +/* Add candidate into the results array at position count if candidate + * is in the loop and it is not already contained within the results + * array. + * + * We consider the block to be within the loop if there exists a path + * within the control flow graph from this node to the loop's latch + * which does not pass through the loop's header. (If all paths to + * the latch pass through the loop header, then the node is contained + * within an outer-nested loop but not within this loop.) + * + * If and only if candidate is added to the results array, recursively + * do the same for each successor of candidate starting at the following + * position (count + 1) within the array. + * + * Abort with an internal error message if the number of blocks to be + * added into the results array exceeds the array's size. + * + * return the number of nodes stored within the results array + */ +static vec<basic_block> _recursively_get_loop_blocks(basic_block candidate, + vec<basic_block> results, + basic_block loop_header, + basic_block loop_latch) +{ + basic_block bb; + unsigned int u; + + /* if candidate is already in the results array, then we're done */ + FOR_EACH_VEC_ELT(results, u, bb) { + if (bb == candidate) + return results; + } + + if (_in_loop(candidate, loop_header, loop_latch, true)) { + results.safe_push(candidate); + for (unsigned int u = 0; u < EDGE_COUNT(candidate->succs); u++) { + edge successor = EDGE_SUCC(candidate, u); + if (successor->probability != 0) { + results = _recursively_get_loop_blocks(successor->dest, results, + loop_header, loop_latch); + } + } + } + + return results; +} + + +static vec<basic_block> _get_loop_blocks(loop_p loop_ptr) +{ + vec<basic_block> results; + + results = vNULL; + results = _recursively_get_loop_blocks(loop_ptr->header, results, + loop_ptr->header, loop_ptr->latch); + return results; +} + +static bool _in_block_set(basic_block block, vec<basic_block> block_set) +{ + basic_block bb; + unsigned int u; + FOR_EACH_VEC_ELT(block_set, u, bb) { + if (bb == block) + return true; + } + return false; +} + +static vec<edge> _get_loop_exit_edges(vec<basic_block> loop_blocks) { + basic_block bb; + unsigned int u; + edge e; + vec<edge> results = vNULL; + + FOR_EACH_VEC_ELT(loop_blocks, u, bb) { + for (unsigned int i = 0; i < EDGE_COUNT(bb->succs); i++) { + edge successor = EDGE_SUCC(bb, i); + basic_block edge_dest = successor->dest; + + if (!_in_block_set(edge_dest, loop_blocks)) { + results.safe_push(successor); + } + } + } +#ifdef KELVIN_NOISE + fprintf(stderr, "_get_loop_exit_edges() is returning this set of edges\n"); + FOR_EACH_VEC_ELT(results, u, e) { + kdn_dump_edge(stderr, e, true, true); + } + fprintf(stderr, " end of edge set\n"); +#endif + return results; +} + +/* + * Zero all frequencies for all blocks contained within the loop + * represented by loop_ptr which are reachable from block without + * passing through the block header. If block is not within the loop, + * this has no effect. The behavior is as outlined by the following + * algorithm: + * + * If block is contained within loop: + * Set block's frequency to zero + * Using a depth-first traversal, do the same for each successor + * transitively, stopping the recursive traversal if: + * the current block is the loop header, or + * the current block resides outside the loop, or + * the current block has already been visited in this depth-first + * traversal. + */ +static void +zero_partial_loop_frequencies(loop_p loop_ptr, basic_block block) +{ + /* When zero_partial_loop_frequencies is invoked, the *loop_ptr + * object is not entirely coherent, so existing library services + * get_loop_blocks() and get_loop_exit_edges() cannot be called + * from this context. Instead, we use the _get_loop_blocks() + * and _get_loop_exit_edges() functions which assume only + * the validity of loop_ptr->loop_header, loop_ptr->loop_latch, + * and valid successor and predecessor information for each + * block contained within the loop. + */ + vec<basic_block> loop_blocks = _get_loop_blocks(loop_ptr); + if (_in_block_set(block, loop_blocks)) { + struct block_ladder_rung ladder_rung; + ladder_rung.block = block; + ladder_rung.lower_rung = NULL; + basic_block header; + + vec<edge> exit_edges = _get_loop_exit_edges(loop_blocks); +#ifdef KELVIN_NOISE + fprintf(stderr, + "zeroing loop frequency for block %d, from %d\n", + block->index, block->frequency); +#endif + block->frequency = 0; + for (unsigned int i = 0; i < EDGE_COUNT(block->succs); i++) { + edge successor = EDGE_SUCC(block, i); + if (successor->probability != 0) { + recursively_zero_frequency(loop_ptr, exit_edges, + &ladder_rung, successor); + } + } + exit_edges.release(); + } + loop_blocks.release(); +} + +static void +recursively_increment_frequency(loop_p loop_ptr, vec<edge> exit_edges, + ladder_rung_p ladder_rung, + edge incoming_edge, + int frequency_increment) +{ +#ifdef KELVIN_NOISE + basic_block a_block = incoming_edge->dest; + fprintf(stderr, "recursively incrementing loop frequency for block %d, from %d by %d\n", + a_block->index, a_block->frequency, frequency_increment); +#endif + + if (incoming_edge->dest == loop_ptr->header) + return; + else if (in_edge_set(incoming_edge, exit_edges)) + return; + else if (in_call_chain(incoming_edge, ladder_rung)) + return; + /* right here, I think I need a short-circuit evaluator to return if + a_block is not inside the loop + + so why didn't my test for exit_edges membership work? + + in the trace, in_block_set(7) fails, but then I acdtually do an + increment for block 7. + + */ + else { + struct block_ladder_rung a_rung; + basic_block block = incoming_edge->dest; + +#ifdef KELVIN_NOISE + fprintf(stderr, " ---> actually doing the increment\n"); +#endif + a_rung.block = block; + a_rung.lower_rung = ladder_rung; + block->frequency += frequency_increment; + for (unsigned int i = 0; i < EDGE_COUNT(block->succs); i++) { + edge successor = EDGE_SUCC(block, i); + int successor_increment = + (frequency_increment * successor->probability) / REG_BR_PROB_BASE; + + recursively_increment_frequency(loop_ptr, exit_edges, + &a_rung, successor, + successor_increment); + } + } +} + +/* + * If block is contained within loop, we do the following: + * Add incremental_frequency (which may be negative) to + * block->frequency and propogate this change to all successors of + * block that reside within the loop, transitively. Use a depth-first + * tree traversal, stopping the recursion at the loop header, at any + * successor block that resides outside the loop, and at any block + * that is already part of the current depth-first traversal. + */ +static void increment_loop_frequencies(loop_p loop_ptr, + basic_block block, + int frequency_increment) +{ + + vec<basic_block> loop_blocks = _get_loop_blocks(loop_ptr); + +#ifdef KELVIN_NOISE + { + unsigned int j; + basic_block b; + fprintf(stderr, "increment_loop_frequencies block %d with increment: %d\n", + block->index, frequency_increment); + fprintf(stderr, " the loop blocks consist of:\n"); + FOR_EACH_VEC_ELT(loop_blocks, j, b) { + fprintf(stderr, " block %d\n", b->index); + } + } +#endif + if (_in_loop_set(block, loop_blocks)) { + struct block_ladder_rung ladder_rung; + ladder_rung.block = block; + ladder_rung.lower_rung = NULL; + basic_block header; + + vec<edge> exit_edges = _get_loop_exit_edges(loop_blocks); +#ifdef KELVIN_NOISE + fprintf(stderr, + "incrementing loop frequency for block %d, from %d by %d\n", + block->index, block->frequency, frequency_increment); +#endif + block->frequency += frequency_increment; + for (unsigned int i = 0; i < EDGE_COUNT(block->succs); i++) { + edge successor = EDGE_SUCC(block, i); + if (successor->probability != 0) { + int successor_increment = + ((frequency_increment * successor->probability) / + REG_BR_PROB_BASE); + + recursively_increment_frequency(loop_ptr, exit_edges, + &ladder_rung, successor, + successor_increment); + } + } + exit_edges.release(); + } +#ifdef KELVIN_NOISE + else { + fprintf(stderr, " block %d was not in the loop set!\n", block->index); + } +#endif + loop_blocks.release(); +} + +/* + * check_loop_frequency_integrity enforces that: + * + * a. The sum of outgoing edge frequencies for the loop equals the + * sum of incoming edge frequencies for the loop header block. + * + * b. The sum of predecessor edge frequencies for every block + * in the loop equals the frequency of that block. + */ +static void check_loop_frequency_integrity(loop_p loop_ptr) +{ + unsigned int i, j, k; + basic_block a_block; + + vec<basic_block> loop_body = _get_loop_blocks(loop_ptr); + basic_block header; + + FOR_EACH_VEC_ELT(loop_body, k, a_block) { + int delta; + int predecessor_frequencies = 0; + +#ifdef KELVIN_NOISE + fprintf(stderr, + "Enforcing pred frequency coherence for " + "block %d, with freq: %d\n", + a_block->index, a_block->frequency); +#endif + for (j = 0; j < EDGE_COUNT(a_block->preds); j++) { + edge a_predecessor = EDGE_PRED(a_block, j); +#ifdef KELVIN_NOISE + fprintf(stderr, " predecessor "); + kdn_dump_edge(stderr, a_predecessor, true, true); + fprintf(stderr, + " which has frequency %d\n", EDGE_FREQUENCY(a_predecessor)); +#endif + predecessor_frequencies += EDGE_FREQUENCY(a_predecessor); + } + delta = predecessor_frequencies - a_block->frequency; + if (delta < 0) + delta = -delta; + + if (delta > 10) { + internal("predecessor frequencies confused while unrolling loop"); + } + } + +#ifdef KELVIN_NOISE + fprintf(stderr, "Comparing loop incoming and outgoing frequencies\n"); +#endif + header = loop_ptr->header; + int incoming_frequency = 0; + for (i = 0; i < EDGE_COUNT(header->preds); i++) { + edge a_predecessor = EDGE_PRED(header, i); + + if (!_in_loop_set (a_predecessor->src, loop_body)) { +#ifdef KELVIN_NOISE + fprintf(stderr, "Enforcing coherence, loop predecessor: "); + kdn_dump_edge(stderr, a_predecessor, true, true); + fprintf(stderr, " which has frequency: %d\n", + EDGE_FREQUENCY(a_predecessor)); +#endif + incoming_frequency += EDGE_FREQUENCY(a_predecessor); + } + } + + int outgoing_frequency = 0; + vec<edge> exit_edges = _get_loop_exit_edges(loop_body); + edge edge; + FOR_EACH_VEC_ELT (exit_edges, i, edge) { +#ifdef KELVIN_NOISE + fprintf(stderr, "Enforcing coherence, loop exit: "); + kdn_dump_edge(stderr, edge, true, true); + fprintf(stderr, " which has frequency: %d\n", EDGE_FREQUENCY(edge)); +#endif + outgoing_frequency += EDGE_FREQUENCY(edge); + } + + int delta = incoming_frequency - outgoing_frequency; + if (delta < 0) + delta = -delta; + + if (delta > 10) { + internal("Incoherent frequencies while unrolling loop"); + } + loop_body.release(); + exit_edges.release(); +} + +#endif + /* Checks whether basic block BB is dominated by DATA. */ static bool rpe_enum_p (const_basic_block bb, const void *data) @@ -1101,7 +1633,11 @@ can_duplicate_loop_p (const struct loop *loop) is redistributed evenly to the remaining edges coming from E->src. */ static void +#ifdef KELVIN_PATCH +set_zero_probability (loop_p loop_ptr, edge e) +#else set_zero_probability (edge e) +#endif { basic_block bb = e->src; edge_iterator ei; @@ -1109,6 +1645,12 @@ set_zero_probability (edge e) unsigned n = EDGE_COUNT (bb->succs); gcov_type cnt = e->count, cnt1; unsigned prob = e->probability, prob1; +#ifdef KELVIN_PATCH + int original_edge_frequency; + int new_edge_frequency; + int change_in_edge_frequency; + bool edge_originates_in_loop = in_loop_p (bb, loop_ptr); +#endif gcc_assert (n > 1); cnt1 = cnt / (n - 1); @@ -1118,18 +1660,78 @@ set_zero_probability (edge e) { if (ae == e) continue; - + +#ifdef KELVIN_PATCH + if (edge_originates_in_loop) + { + original_edge_frequency = EDGE_FREQUENCY(ae); + ae->probability += prob1; + ae->count += cnt1; + new_edge_frequency = EDGE_FREQUENCY(ae); + change_in_edge_frequency = + new_edge_frequency - original_edge_frequency; + + increment_loop_frequencies(loop_ptr, ae->dest, + change_in_edge_frequency); + } + else + { + ae->probability += prob1; + ae->count += cnt1; + } +#else ae->probability += prob1; ae->count += cnt1; +#endif last = ae; } - + /* Move the rest to one of the edges. */ +#ifdef KELVIN_PATCH + if (edge_originates_in_loop) + { + original_edge_frequency = EDGE_FREQUENCY(last); + last->probability += prob % (n - 1); + last->count += cnt % (n - 1); + new_edge_frequency = EDGE_FREQUENCY(last); + change_in_edge_frequency = new_edge_frequency - original_edge_frequency; + if (change_in_edge_frequency != 0) + { + increment_loop_frequencies(loop_ptr, last->dest, + change_in_edge_frequency); + } + } + else + { + last->probability += prob % (n - 1); + last->count += cnt % (n - 1); + } +#else last->probability += prob % (n - 1); last->count += cnt % (n - 1); +#endif +#ifdef KELVIN_PATCH + if (edge_originates_in_loop) + { + original_edge_frequency = EDGE_FREQUENCY(e); + e->probability = 0; + e->count = 0; + new_edge_frequency = EDGE_FREQUENCY(e); + change_in_edge_frequency = + new_edge_frequency - original_edge_frequency; + increment_loop_frequencies(loop_ptr, e->dest, + change_in_edge_frequency); + } + else + { + e->probability = 0; + e->count = 0; + } +#else e->probability = 0; e->count = 0; +#endif } /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating @@ -1185,6 +1787,14 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, gcc_assert (bbs[0] == loop->header); gcc_assert (bbs[n - 1] == loop->latch); +#ifdef KELVIN_NOISE + /* We copy everything in the loop up to (and including?) the loop header. + */ + fprintf(stderr, + "duplicate_loop_to_header_edge(), iterations: %d, the loop is:\n", + ndupl); + kdn_dump_loop(stderr, loop); +#endif /* Check whether duplication is possible. */ if (!can_copy_bbs_p (bbs, loop->num_nodes)) { @@ -1192,6 +1802,17 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, return false; } new_bbs = XNEWVEC (basic_block, loop->num_nodes); +#ifdef KELVIN_NOISE + fprintf(stderr, " the edge to loop header (dest of edge) is:\n"); + kdn_dump_edge(stderr, e, TRUE, TRUE); + if (orig) { + fprintf(stderr, + " the orig edge (which is an edge that leaves the loop) is:\n"); + kdn_dump_edge(stderr, orig, TRUE, TRUE); + } else { + fprintf(stderr, " no known loop exit, as orig equals NULL\n"); + } +#endif /* In case we are doing loop peeling and the loop is in the middle of irreducible region, the peeled copies will be inside it too. */ @@ -1200,6 +1821,11 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, /* Find edge from latch. */ latch_edge = loop_latch_edge (loop); +#ifdef KELVIN_NOISE + /* the latch edge is the back edge that branches back to the loop header */ + fprintf(stderr, "loop_latch_edge is:\n"); + kdn_dump_edge(stderr, latch_edge, TRUE, TRUE); +#endif if (flags & DLTHE_FLAG_UPDATE_FREQ) { @@ -1207,16 +1833,56 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, of duplicated loop bodies. */ freq_in = header->frequency; freq_le = EDGE_FREQUENCY (latch_edge); +#ifdef KELVIN_NOISE + /* Note that the preheader dominates the header. All paths into + * the loop pass through the pre-header + */ + fprintf(stderr, + "initial header (freq_in) frequency: %d, freq_le: %d\n", + freq_in, freq_le); + /* Kelvin doesn't understand the following few lines. + * It seems these are handling various "broken" aspects of other + * compiler phases. + */ +#endif if (freq_in == 0) freq_in = 1; if (freq_in < freq_le) freq_in = freq_le; freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le; +#ifdef KELVIN_NOISE + fprintf(stderr, "freq_out_orig: %d, EDGE_FREQUENCY(orig): %d\n", + freq_out_orig, EDGE_FREQUENCY(orig)); +#endif if (freq_out_orig > freq_in - freq_le) freq_out_orig = freq_in - freq_le; +#ifdef KELVIN_NOISE + /* kelvin has struggled to understand the significance of "orig" + * and of freq_out_orig. Here's how this works: + * freq_in is the frequency of the loop header + * freq_le is the frequency of the latch edge, which iterates + * the loop. + * the difference between freq_in and freq_le is the + * cumulative frequency of exit edges from the loop. By the + * "law of preservation of mass", what goes out must have + * come in. So freq_in - freq_le equals freq_loop_exit, + * which also equals freq_entry_into_header. And this is the + * frequency we choose to use for each of the new blocks + * that we create during our "duplication activities". + * + */ + fprintf(stderr, " refined freq_out_orig: %d\n", freq_out_orig); +#endif prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in); prob_pass_wont_exit = RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in); +#ifdef KELVIN_NOISE + /* kelvin thinks pass_thru means we exit the loop, wont_exit + * means we continue execution of the loop. + */ + fprintf(stderr, "prob_pass_thru: %d, prob_pass_wont_exit: %d\n", + prob_pass_thru, prob_pass_wont_exit); +#endif if (orig && REG_BR_PROB_BASE - orig->probability != 0) @@ -1233,14 +1899,35 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src)) bitmap_set_bit (bbs_to_scale, i); } +#ifdef KELVIN_NOISE + fprintf(stderr, + "Dealing with blocks dominated by a removed exit edge.\n"); + fprintf(stderr, " the loop has %d blocks\n", n); + fprintf(stderr, " scale_after_exit: %d\n", scale_after_exit); + for (unsigned int i = 0; i < n; i++) + { + fprintf(stderr, " block[%d] of the loop will%s be scaled\n", + i, bitmap_bit_p (bbs_to_scale, i)? "": " not"); + } +#endif } scale_step = XNEWVEC (int, ndupl); - + for (i = 1; i <= ndupl; i++) scale_step[i - 1] = bitmap_bit_p (wont_exit, i) ? prob_pass_wont_exit : prob_pass_thru; +#ifdef KELVIN_NOISE + fprintf(stderr, "Making %d duplicates\n", ndupl); + for (i = 1; i <= ndupl; i++) { + fprintf(stderr, " copy %d will%s exit\n", i, + bitmap_bit_p(wont_exit, i)? " not": ""); + fprintf(stderr, " scale_step for this copy is %d\n", + scale_step[i - 1]); + } +#endif + /* Complete peeling is special as the probability of exit in last copy becomes 1. */ @@ -1248,6 +1935,15 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, { int wanted_freq = EDGE_FREQUENCY (e); +#ifdef KELVIN_NOISE + /* kelvin thinks "complete peeling" means there is no longer + * any iteration. For example, unrolling a loop 4 times, if + * the total iteration count is 3 will "completely peel". + */ + fprintf(stderr, + "doing complette_peel, wanted_frequency: %d, freq_in: %d\n", + wanted_freq, freq_in); +#endif if (wanted_freq > freq_in) wanted_freq = freq_in; @@ -1257,11 +1953,18 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, should've managed the flags so all except for original loop has won't exist set. */ scale_act = GCOV_COMPUTE_SCALE (wanted_freq, freq_in); +#ifdef KELVIN_NOISE + fprintf(stderr, "scale_act is %d, ndupl: %d\n", scale_act, ndupl); +#endif /* Now simulate the duplication adjustments and compute header frequency of the last copy. */ for (i = 0; i < ndupl; i++) wanted_freq = combine_probabilities (wanted_freq, scale_step[i]); scale_main = GCOV_COMPUTE_SCALE (wanted_freq, freq_in); +#ifdef KELVIN_NOISE + fprintf(stderr, "after looping, wanted_freq: %d, scale_main: %d\n", + wanted_freq, scale_main); +#endif } else if (is_latch) { @@ -1277,6 +1980,10 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, } scale_main = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE, scale_main); scale_act = combine_probabilities (scale_main, prob_pass_main); +#ifdef KELVIN_NOISE + fprintf(stderr, "is_latch: scale_main is %d, scale_act: %d\n", + scale_main, scale_act); +#endif } else { @@ -1284,6 +1991,10 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, for (i = 0; i < ndupl; i++) scale_main = combine_probabilities (scale_main, scale_step[i]); scale_act = REG_BR_PROB_BASE - prob_pass_thru; +#ifdef KELVIN_NOISE + fprintf(stderr, "!is_latch: scale_main is %d, scale_act: %d\n", + scale_main, scale_act); +#endif } for (i = 0; i < ndupl; i++) gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE); @@ -1293,15 +2004,25 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, /* Loop the new bbs will belong to. */ target = e->src->loop_father; - + +#ifdef KELVIN_NOISE + fprintf(stderr, + "The newly copied blocks will belong to this (target) loop:\n"); + kdn_dump_loop(stderr, target); +#endif /* Original loops. */ n_orig_loops = 0; for (aloop = loop->inner; aloop; aloop = aloop->next) n_orig_loops++; +#ifdef KELVIN_NOISE + fprintf(stderr, + " Number of loops immediately nested within the copied loop: %d\n", + n_orig_loops); +#endif orig_loops = XNEWVEC (struct loop *, n_orig_loops); for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++) orig_loops[i] = aloop; - + set_loop_copy (loop, target); first_active = XNEWVEC (basic_block, n); @@ -1310,19 +2031,109 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, memcpy (first_active, bbs, n * sizeof (basic_block)); first_active_latch = latch; } - + spec_edges[SE_ORIG] = orig; spec_edges[SE_LATCH] = latch_edge; + +#ifdef KELVIN_NOISE + fprintf(stderr, + "Before making %d copies of the loop body, method consists of\n", ndupl); + kdn_dump_all_blocks(stderr, loop); +#endif + +#ifdef KELVIN_PATCH + /* recompute the loop body frequencies */ + zero_loop_frequencies (loop); + + basic_block my_header = loop->header; + int sum_incoming_frequencies = 0; + for (unsigned int i = 0; i < EDGE_COUNT(my_header->preds); i++) + { + edge predecessor = EDGE_PRED(my_header, i); +#ifdef KELVIN_NOISE + fprintf(stderr, + " computing incoming frequency by accumulating %d from edge ", + EDGE_FREQUENCY(predecessor)); + kdn_dump_edge(stderr, predecessor, true, true); +#endif + + if (in_loop_p(predecessor->src, loop)) + sum_incoming_frequencies += EDGE_FREQUENCY(predecessor); + else + sum_incoming_frequencies += + (int) (EDGE_FREQUENCY(predecessor) * 111111 + 5000) / 10000; + } +#ifdef KELVIN_NOISE + fprintf(stderr, "After zeroing loop frequencies, increment header with %d\n", + sum_incoming_frequencies); +#endif + increment_loop_frequencies(loop, header, sum_incoming_frequencies); +#endif + +#ifdef KELVIN_NOISE + fprintf(stderr, + "After recomputing loop frequencies in preparation for making copies\n"); + kdn_dump_all_blocks(stderr, loop); +#endif place_after = e->src; for (j = 0; j < ndupl; j++) { +#ifdef KELVIN_NOISE + fprintf(stderr, "Making copy %d, placing copy after block: ", j); + kdn_dump_block_id(stderr, place_after); + /* kelvin has a few questions: + +after making some changes, the problem is still right here. + +the "problem" is that the copy I make of B5 and B6 to become B24 and +B25, and they have copied frequencies 7711 and 7017 respectively. But +that's not right, because the sole predecessor to B24 is B6, and B6 +frequency is 7017, so it doesn't make sense that B24 frequency would +be 7711. + +When B5 and B6 are copied, B5 still has the 9% out-edge to B7, which +explains why B6 has a lower frequency than B5. + +so kelvin's new insight (10/7/2015) is that I should copy the loops +with zero frequency and then propagate the frequency of the target to +its ancestors + + + + * How do B5 and B6 end up with frequencies 7536 and 6858 respectively? + * (in the original unrolled loop, they had frequencies 9100 and 8281) + * --> kelvin says they should have frequency 7711 + * When/how do we remove the out-going edge from B5 to B7? + * (the original unrolled loop has a loop-exit mode from B5 to B7 + * but this edge does not appear in unrolled loop.) + * How is it that only the third copy of the loop body gets + * a loop-exiting edge from B28 to B7?? (note that the loop + * frequencies as generated do properly account for the + * outgoing edge from B28 to B29, because B28 has frequency + * 7536 and B29 has frequency 6858 and the exit edge frequency + * is 678 = 7536 - 6858. + */ +#endif + /* Copy loops. */ copy_loops_to (orig_loops, n_orig_loops, target); +#ifdef KELVIN_NOISE + fprintf(stderr, "Immediately before copy_bbs, place_after is:\n"); + kdn_dump_block_id(stderr, place_after); + fprintf(stderr, " and method contents is\n"); + kdn_dump_all_blocks(stderr, loop); +#endif /* Copy bbs. */ copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop, - place_after, true); + place_after, true); + +#ifdef KELVIN_PATCH + int place_after_frequency = place_after->frequency; + basic_block saved_place_after = place_after; +#endif + place_after = new_spec_edges[SE_LATCH]->src; if (flags & DLTHE_RECORD_COPY_NUMBER) @@ -1352,8 +2163,12 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, } for (i = 0; i < n; i++) new_bbs[i]->flags &= ~BB_DUPLICATED; - } - + } +#ifdef KELVIN_NOISE + fprintf(stderr, "In loop making copies of the loop body\n"); + fprintf(stderr, " is_latch: %d, bitmap_bit_p(wont_exit, j+1): %ld\n", + is_latch, bitmap_bit_p(wont_exit, j+1)); +#endif /* Redirect the special edges. */ if (is_latch) { @@ -1373,20 +2188,58 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, e = new_spec_edges[SE_LATCH]; } + +#ifdef KELVIN_PATCH + zero_partial_loop_frequencies(loop, saved_place_after); + increment_loop_frequencies(loop, + saved_place_after, place_after_frequency); +#else +#endif + +#ifdef KELVIN_NOISE + fprintf(stderr, + "Immediately after copy_bbs, three levels @ place_after:\n"); + kdn_dump_block_flow(stderr, 3, saved_place_after); +#endif + /* Record exit edge in this copy. */ if (orig && bitmap_bit_p (wont_exit, j + 1)) { +#ifdef KELVIN_NOISE + fprintf(stderr, + " bitmap_bit_p(wont_exit, j+1) is TRUE, to_remove is %s\n", + (to_remove == NULL)? "NULL": "not NULL"); +#endif if (to_remove) - to_remove->safe_push (new_spec_edges[SE_ORIG]); + { +#ifdef KELVIN_NOISE + fprintf(stderr, "Arranging to remove this edge\n"); + kdn_dump_edge(stderr, new_spec_edges[SE_ORIG], TRUE, TRUE); +#endif + to_remove->safe_push (new_spec_edges[SE_ORIG]); + } +#ifdef KELVIN_PATCH + set_zero_probability (loop, new_spec_edges[SE_ORIG]); +#else set_zero_probability (new_spec_edges[SE_ORIG]); - +#endif /* Scale the frequencies of the blocks dominated by the exit. */ if (bbs_to_scale) { EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) { +#ifdef KELVIN_NOISE + fprintf(stderr, " before scaling new_bbs, "); + fprintf(stderr, "i is %d, block no: %d, frequency: %d\n", + i, new_bbs[i]->index, new_bbs[i]->frequency); +#endif scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit, REG_BR_PROB_BASE); +#ifdef KELVIN_NOISE + fprintf(stderr, " after scaling, "); + fprintf(stderr, "i is %d, block no: %d, frequency: %d\n", + i, new_bbs[i]->index, new_bbs[i]->frequency); +#endif } } } @@ -1399,30 +2252,79 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, first_active_latch = new_bbs[n - 1]; } +#ifdef KELVIN_NOISE + fprintf(stderr, + "Having made the copy, do we scale the frequency: %s\n", + (flags & DLTHE_FLAG_UPDATE_FREQ)? "yes": "no"); +#endif /* Set counts and frequencies. */ if (flags & DLTHE_FLAG_UPDATE_FREQ) { +#ifdef KELVIN_NOISE + fprintf(stderr, " scale_step for this copy is %d\n", scale_step[j]); +#endif scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE); scale_act = combine_probabilities (scale_act, scale_step[j]); } +#ifdef KELVIN_NOISE + fprintf(stderr, + "After making copy %d of the loop body, method consists of\n", j); + kdn_dump_all_blocks(stderr, loop); + fprintf(stderr, "Adjusting the place_after block to: "); + kdn_dump_block_id(stderr, place_after); +#endif } free (new_bbs); free (orig_loops); +#ifdef KELVIN_NOISE + fprintf(stderr, "Done making copies of the loop body\n"); +#endif + /* Record the exit edge in the original loop body, and update the frequencies. */ if (orig && bitmap_bit_p (wont_exit, 0)) { +#ifdef KELVIN_NOISE + fprintf(stderr, + " bitmap_bit_p(wont_exit, 0) is TRUE, to_remove is %s\n", + (to_remove == NULL)? "NULL": "not NULL"); +#endif if (to_remove) - to_remove->safe_push (orig); + { +#ifdef KELVIN_NOISE + fprintf(stderr, "Also arranging to remove this edge\n"); + kdn_dump_edge(stderr, orig, TRUE, TRUE); +#endif + to_remove->safe_push (orig); + } +#ifdef KELVIN_PATCH + set_zero_probability (loop, orig); +#else set_zero_probability (orig); +#endif /* Scale the frequencies of the blocks dominated by the exit. */ if (bbs_to_scale) { +#ifdef KELVIN_NOISE + fprintf(stderr, + " scaling bbs frequencies, orig and bitmap_bit_p: %d\n", + scale_after_exit); +#endif EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) { +#ifdef KELVIN_NOISE + fprintf(stderr, + " before scaling, i is %d, block no: %d, frequency: %d\n", + i, bbs[i]->index, bbs[i]->frequency); +#endif scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit, REG_BR_PROB_BASE); +#ifdef KELVIN_NOISE + fprintf(stderr, + " i is %d, block no: %d, scaled_frequency is: %d\n", + i, bbs[i]->index, bbs[i]->frequency); +#endif } } } @@ -1432,6 +2334,9 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src); if (flags & DLTHE_FLAG_UPDATE_FREQ) { +#ifdef KELVIN_NOISE + fprintf(stderr, " scaling bbs frequencies main: %d\n", scale_main); +#endif scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE); free (scale_step); } @@ -1462,6 +2367,13 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, free (bbs); BITMAP_FREE (bbs_to_scale); +#ifdef KELVIN_PATCH + /* This function call is strictly paranoia. it makes no changes + * to the data structures. + */ + check_loop_frequency_integrity(loop); +#endif + return true; } diff --git a/gcc/kelvin-debugs.c b/gcc/kelvin-debugs.c new file mode 100644 index 00000000000..94a6e1fd2ac --- /dev/null +++ b/gcc/kelvin-debugs.c @@ -0,0 +1,801 @@ + +/* + * I'm just going to include this source file into the module that I + * want to debug. Presumably, that outer module will provide all of + * the necessary includes. + +#include <stdio.h> + + +#include "system.h" +#include "coretypes.h" + +#include "basic-block.h" +#include "et-forest.h" +#include "cfgloop.h" +*/ + +#include "pretty-print.h" +#include "et-forest.h" +#include "gimple.h" +#include "input.h" +#include "line-map.h" + +typedef const rtx_insn *const_ins; + + +/* note: rtl.h declares: + * extern void print_insn(pretty_printer *, const_rtx, int) + * but that resolves to: + * extern void print_insn(pretty_printer *, rtx_def const *, int) + * and rtx_def is a superclass of rtx_insn. This results in an + * unresolved reference at link time. + * + * extern void print_insn(pretty_printer *, const rtx_insn *, int); + */ + +static void kdn_dump_loop_id(FILE *, loop_p); +static void kdn_dump_loop(FILE *, loop_p); +static void kdn_dump_block_id(FILE *, basic_block); +static void kdn_dump_block(FILE *, basic_block); +static void kdn_dump_block_flow(FILE *, int, basic_block); +static void kdn_dump_bb_flags(FILE *, int); +static void kdn_dump_edge_flags(FILE *, int); +static void kdn_dump_edge(FILE *, edge, bool, bool); +static void kdn_dump_rtl_instruction_list(FILE *, + const_rtx, struct rtl_bb_info *); +static void kdn_dump_gimple_instruction_list(FILE *, + struct gimple_bb_info gimple); +static void kdn_dump_rtl_insns(FILE *, const_rtx); +static void kdn_dump_gimple_insns(FILE *, gimple_seq); +static void kdn_dump_loop_exits(FILE *stream, struct loop_exit *); +static void kdn_dump_niter_desc(FILE *, struct niter_desc *); +static void kdn_dump_dominator(FILE *, struct et_node *); +static void kdn_treedump_children(FILE *, int, struct et_node *); +static void kdn_dump_wide_int(FILE *stream, const widest_int &value); +static void kdn_dump_all_blocks(FILE *stream, loop_p loop_ptr); + +static void kdn_dump_loop_id(FILE *stream, loop_p loop_ptr) +{ + if (loop_ptr == NULL) + fprintf(stream, "NULL\n"); + else + fprintf(stream, " %d\n", loop_ptr->num); +} + +static void kdn_dump_all_blocks(FILE *stream, loop_p loop_ptr) { + /* find some instruction contained within the loop */ + if (loop_ptr->header == NULL) { + fprintf(stream, + "Could not dump all blocks because loop_ptr->header is NULL.\n"); + } else if (loop_ptr->num_nodes <= 0) { + fprintf(stream, + "Could not dump all blocks because loop has no nodes.\n"); + } else { + basic_block *body, a_block; + const_rtx the_head; + struct rtl_bb_info *the_rtl; + const rtx_insn *an_instruction; + + body = get_loop_body(loop_ptr); + a_block = body[0]; + + the_head = a_block->il.x.head_; + the_rtl = a_block->il.x.rtl; + + if (the_head != NULL) { + fprintf(stream, + "Dumping all basic blocks of the method:\n"); + + an_instruction = as_a <const rtx_insn *> (the_head); + + /* walk through the list of instructions until you find the first + * insruction in the method. This is the one for which the previous + * index value is zero. + */ + while (PREV_INSN(an_instruction) != NULL) { + an_instruction = PREV_INSN(an_instruction); + } + + /* At this point, an_instruction points to first instruction + * of the method. + */ + /* while our focus is not yet at the end of the method's instruction + * list: + * print out the contents of one basic block + * advance the focus to the instruction that follows the + * basic block just printed out. + */ + + while (an_instruction != NULL) { + int loop_is_done = FALSE; + int seen_one_basic_block_note = FALSE; + basic_block the_block; + + fprintf(stream, "\nContents of one basic block:\n"); + while ((an_instruction != NULL) && !loop_is_done) { + /* + * instead of printing out instructions one at a time, + * i'll just skip over the instructions of each basic block + * while remembering the basic block information. + * + * print_rtl_single(stream, an_instruction); + */ + + the_block = BLOCK_FOR_INSN(an_instruction); + if (NOTE_INSN_BASIC_BLOCK_P(an_instruction)) { + seen_one_basic_block_note = TRUE; + } + else { + switch (GET_CODE(an_instruction)) { + + case JUMP_INSN: + loop_is_done = TRUE; + break; + + case CALL_INSN: /* not sure if CALL_INSN terminates a bb */ + /* this code was copied from cfgbuild.c */ + /* Noreturn and sibling call instructions terminate the + * basic blocks (but only if they happen unconditionally). + */ + if ((SIBLING_CALL_P (an_instruction) + || find_reg_note (an_instruction, REG_NORETURN, 0)) + && GET_CODE (PATTERN (an_instruction)) != COND_EXEC) { + loop_is_done = TRUE; + break; + } + + default: /* in other cases, do nothing */ + break; + } + } + + an_instruction = NEXT_INSN(an_instruction); + + if (an_instruction != NULL) { + if (NOTE_INSN_BASIC_BLOCK_P(an_instruction)) { + if (seen_one_basic_block_note) { + loop_is_done = TRUE; + } + } else if (GET_CODE(an_instruction) == CODE_LABEL) { + if (seen_one_basic_block_note) { + loop_is_done = TRUE; + } + } + } + } + kdn_dump_block(stream, the_block); + fprintf(stream, "End of this basic block.\n"); + } + fprintf(stream, "End of basic block's head:\n"); + } + } +} + +static void kdn_dump_loop(FILE *stream, loop_p loop_ptr) { + + fprintf(stream, " %d\n", loop_ptr->num); + fprintf(stream, " number of instructions: %d\n", loop_ptr->ninsns); + fprintf(stream, " number of blocks within loop: %d\n", loop_ptr->num_nodes); + fprintf(stream, " average instructions per iteration: %d\n", + loop_ptr->av_ninsns); + + if (loop_ptr->any_upper_bound) { + fprintf(stream, + " upper bound on number of times loop latch executes: "); + kdn_dump_wide_int(stream, loop_ptr->nb_iterations_upper_bound); + } + + if (loop_ptr->any_estimate) { + fprintf(stream, + " estimate on number of times loop latch executes: "); + kdn_dump_wide_int(stream, loop_ptr->nb_iterations_estimate); + } + + fprintf(stream, " loop can be parallel: %d\n", loop_ptr->can_be_parallel); + fprintf(stream, " warned of aggressive optimizations: %d\n", + loop_ptr->warned_aggressive_loop_optimizations); + + fprintf(stream, " force_vectorize: %d, dont_vectorize: %d\n", + loop_ptr->force_vectorize, loop_ptr->dont_vectorize); + + if (loop_ptr->inner != NULL) { + fprintf(stream, " inner_loop: "); + kdn_dump_loop_id(stream, loop_ptr->inner); + } + + fprintf(stream, " outer_loops:\n"); + if (loop_ptr->superloops) { + for (unsigned int i = 0; i < vec_safe_length(loop_ptr->superloops); i++) { + fprintf(stream, " "); + kdn_dump_loop_id(stream, (*loop_ptr->superloops)[i]); + } + } + + fprintf(stream, " next loop is: "); + kdn_dump_loop_id(stream, loop_ptr->next); + + fprintf(stream, " loop header: "); + kdn_dump_block_id(stream, loop_ptr->header); + + fprintf(stream, " former header: "); + kdn_dump_block_id(stream, loop_ptr->former_header); + + fprintf(stream, " loop latch: "); + kdn_dump_block_id(stream, loop_ptr->latch); + + fprintf(stream, " loop exits: "); + kdn_dump_loop_exits(stream, loop_ptr->exits); + + fprintf(stream, " loop iteration information\n"); + kdn_dump_niter_desc(stream, loop_ptr->simple_loop_desc); + +#define DUMP_ENTIRE_BLOCK +#ifdef DUMP_ENTIRE_BLOCK + kdn_dump_all_blocks(stream, loop_ptr); +#endif + + fprintf(stream, + "Not reported: aux, nb_iterations, safelen, estimate_state,"); + fprintf(stream, + " simduid bounds, control_ivs, lpt_decision\n"); +} + +static void kdn_dump_block_id(FILE *stream, basic_block block) { + if (block == NULL) + fprintf(stream, "Null Basic Block\n"); + else + fprintf(stream, "Basic Block No: %d\n", block->index); +} + +/* Dump this block and all of its successor blocks transitively up + * to the num_levels distance from block. + */ +static void kdn_dump_block_flow(FILE *stream, + int num_levels, basic_block block) { + if (num_levels > 0) { + kdn_dump_block(stream, block); + for (unsigned int i = 0; i < EDGE_COUNT(block->succs); i++) { + kdn_dump_block_flow(stream, num_levels - 1, EDGE_SUCC(block, i)->dest); + } + } +} + +/* Note: basic_block is "struct basic_block_def *" */ +static void kdn_dump_block(FILE *stream, basic_block block) { + unsigned int i; + + if (block == NULL) + fprintf(stream, "NULL block\n"); + else { + fprintf(stream, "Basic block no: %d\n", block->index); + + /* The discriminator for this block. The discriminator distinguishes + among several basic blocks that share a common locus, allowing for + more accurate sample-based profiling. */ + fprintf(stream, " discriminator: %d\n", block->discriminator); + + if (block->loop_father) { + fprintf(stream, " inner-most enclosing loop: "); + kdn_dump_loop_id(stream, block->loop_father); + } + + /* enumerate from cfg-flags.def */ + fprintf(stream, " flags: "); + kdn_dump_bb_flags(stream, block->flags); + fprintf(stream, " count: %ld\n", (long int) block->count); + fprintf(stream, " frequency: %d\n", block->frequency); + + fprintf(stream, " Dominator information\n"); + kdn_dump_dominator(stream, block->dom[0]); + + fprintf(stream, " Post-dominator information\n"); + kdn_dump_dominator(stream, block->dom[1]); + + fprintf(stream, " Predecessor edges:\n"); + for (i = 0; i < EDGE_COUNT(block->preds); i++) { + fprintf(stream, " predecessor[%d]:\n", i); + kdn_dump_edge(stream, EDGE_PRED(block, i), TRUE, FALSE); + } + + fprintf(stream, " Successor edges:\n"); + for (i = 0; i < EDGE_COUNT(block->succs); i++) { + fprintf(stream, " successor[%d]:\n", i); + kdn_dump_edge(stream, EDGE_SUCC(block, i), FALSE, TRUE); + } + + fprintf(stream, " body of block:\n"); + if (block->flags & BB_RTL) + kdn_dump_rtl_instruction_list(stream, + block->il.x.head_, block->il.x.rtl); + else + kdn_dump_gimple_instruction_list(stream, block->il.gimple); + + fprintf(stream, "End of info for basic block no: %d\n", block->index); + } +} + +static void kdn_dump_bb_flags(FILE *stream, int flags) { + + /* see cfg-flags.def */ + if (flags & BB_NEW) + fprintf(stream, "NEW "); + if (flags & BB_REACHABLE) + fprintf(stream, "REACHABLE "); + if (flags & BB_IRREDUCIBLE_LOOP) + fprintf(stream, "IRREDUCIBLE_LOOP "); + if (flags & BB_SUPERBLOCK) + fprintf(stream, "SUPERBLOCK "); + if (flags & BB_DISABLE_SCHEDULE) + fprintf(stream, "DISABLE_SCHEDULE "); + if (flags & BB_HOT_PARTITION) + fprintf(stream, "HOT_PARTITION "); + if (flags & BB_COLD_PARTITION) + fprintf(stream, "COLD_PARTITION "); + if (flags & BB_DUPLICATED) + fprintf(stream, "DUPLICATED "); + if (flags & BB_NON_LOCAL_GOTO_TARGET) + fprintf(stream, "NON_LOCAL_GOTO_TARGET "); + if (flags & BB_RTL) + fprintf(stream, "RTL "); + if (flags & BB_FORWARDER_BLOCK) + fprintf(stream, "FORWARDER_BLOCK "); + if (flags & BB_NONTHREADABLE_BLOCK) + fprintf(stream, "NONTHREADABLE_BLOCK "); + if (flags & BB_MODIFIED) + fprintf(stream, "MODIFIED "); + if (flags & BB_VISITED) + fprintf(stream, "VISITED "); + if (flags & BB_IN_TRANSACTION) + fprintf(stream, "IN_TRANSACTION "); + + fprintf(stream, "\n"); +} + +static void kdn_dump_edge_flags(FILE *stream, int flags) { + + /* see cfg-flags.def */ + if (flags & EDGE_FALLTHRU) + fprintf(stream, "FALLTHRU "); + if (flags & EDGE_ABNORMAL) + fprintf(stream, "ABNORMAL "); + if (flags & EDGE_ABNORMAL_CALL) + fprintf(stream, "ABNORMAL_CALL "); + if (flags & EDGE_EH) + fprintf(stream, "EH "); + if (flags & EDGE_PRESERVE) + fprintf(stream, "PRESERVE "); + if (flags & EDGE_FAKE) + fprintf(stream, "FAKE "); + if (flags & EDGE_DFS_BACK) + fprintf(stream, "DFS_BACK "); + if (flags & EDGE_IRREDUCIBLE_LOOP) + fprintf(stream, "IRREDUCIBLE_LOOP "); + if (flags & EDGE_TRUE_VALUE) + fprintf(stream, "TRUE_VALUE "); + if (flags & EDGE_FALSE_VALUE) + fprintf(stream, "FALSE_VALUE "); + if (flags & EDGE_EXECUTABLE) + fprintf(stream, "EXECUTABLE "); + if (flags & EDGE_CROSSING) + fprintf(stream, "CROSSING "); + if (flags & EDGE_SIBCALL) + fprintf(stream, "SIBCALL "); + if (flags & EDGE_CAN_FALLTHRU) + fprintf(stream, "CAN_FALLTHRU "); + if (flags & EDGE_LOOP_EXIT) + fprintf(stream, "LOOP_EXIT "); + if (flags & EDGE_TM_UNINSTRUMENTED) + fprintf(stream, "TM_UNINSTRUMENTED "); + if (flags & EDGE_TM_ABORT) + fprintf(stream, "TM_ABORT "); + fprintf(stream, "\n"); +} + +static void kdn_dump_edge(FILE *stream, edge an_edge, + bool display_src, bool display_dest) { + /* note: an_edge is a pointer to struct edge_def */ + if (an_edge == NULL) { + fprintf(stream, "Null Edge\n"); + } else { + fprintf(stream, + "Edge (B%d->B%d)\n", + an_edge->src->index, an_edge->dest->index); + fprintf(stream, " flags: "); + kdn_dump_edge_flags(stream, an_edge->flags); + fprintf(stream, " probability: %d\n", an_edge->probability); + fprintf(stream, " count: %ld\n", (long int) an_edge->count); + + /* + * trying to output got_locus information, but it's not working yet. + */ + fprintf(stream, " goto locus: (file: %s) ", + LOCATION_FILE(an_edge->goto_locus)); + fprintf(stream, "(line: %d) ", LOCATION_LINE(an_edge->goto_locus)); + fprintf(stream, "(column: %d)\n", LOCATION_COLUMN(an_edge->goto_locus)); + + if (an_edge->flags & EDGE_EXECUTABLE) { + fprintf(stream, " GIMPLE instructions:\n"); + kdn_dump_gimple_insns(stream, an_edge->insns.g); + fprintf(stream, " End of GIMPLE instructions\n"); + } + else if (an_edge->insns.r != NULL) { + fprintf(stream, " RTL instructions:\n"); + kdn_dump_rtl_insns(stream, an_edge->insns.r); + fprintf(stream, " End of RTL instructions\n"); + } + + fprintf(stream, " not reported: dest_idx, aux\n"); + } +} + +static void kdn_dump_rtl_instruction_list(FILE *stream, + const_rtx head, + struct rtl_bb_info *rtl) { + /* see rtl.h for definition of "class rtx_insn" + * supports operations such as + * INSN_UID(), NEXT_INSN(), PREV_INSN() + * Note that a const_rtx does not have a next field. const_rtx is + * the "super-class" of rtx_insn. + */ + const_ins notes_and_jumptables; + struct pretty_printer buffer; + + pp_needs_newline(&buffer) = true; + buffer.buffer->stream = stream; + + if (rtl != NULL) { + int loop_is_done = FALSE; + int seen_one_basic_block_note = FALSE; + + fprintf(stream, "Basic block's header:\n"); + for (notes_and_jumptables = rtl->header_; + (notes_and_jumptables != NULL) && !loop_is_done; ) { + + print_rtl_single(stream, notes_and_jumptables); + + if (NOTE_INSN_BASIC_BLOCK_P(notes_and_jumptables)) { + seen_one_basic_block_note = TRUE; + } + else { + + switch (GET_CODE(notes_and_jumptables)) { + + case JUMP_INSN: + loop_is_done = TRUE; + break; + + case CALL_INSN: + /* this code was copied from cfgbuild.c */ + /* Noreturn and sibling call instructions terminate the basic blocks + (but only if they happen unconditionally). */ + if ((SIBLING_CALL_P (notes_and_jumptables) + || find_reg_note (notes_and_jumptables, REG_NORETURN, 0)) + && GET_CODE (PATTERN (notes_and_jumptables)) != COND_EXEC) { + loop_is_done = TRUE; + break; + } + + default: /* in other cases, do nothing */ + break; + } + } + + notes_and_jumptables = NEXT_INSN(notes_and_jumptables); + + if (notes_and_jumptables != NULL) { + if (NOTE_INSN_BASIC_BLOCK_P(notes_and_jumptables)) { + if (seen_one_basic_block_note) { + loop_is_done = TRUE; + } + } else if (GET_CODE(notes_and_jumptables) == CODE_LABEL) { + if (seen_one_basic_block_note) { + loop_is_done = TRUE; + } + } + } + } + fprintf(stream, "End of basic block's header\n"); + } + + fprintf(stream, "Basic block's head:\n"); + + if (head != NULL) { + /* note: print_rtl prints multiple basic blocks here. not sure + * whose fault this is. + */ + int loop_is_done = FALSE; + int seen_one_basic_block_note = FALSE; + +#undef EXTRA_KDN_DEBUGGING +#ifdef EXTRA_KDN_DEBUGGING + fprintf(stream, + "NOTE_INSN_BASIC_BLOCK: %d, JUMP_INSN: %d, CODE_LABEL: %d\n", + NOTE_INSN_BASIC_BLOCK, JUMP_INSN, CODE_LABEL); +#endif + + for (const rtx_insn *head_iterator = as_a <const rtx_insn *> (head); + (head_iterator != NULL) && !loop_is_done; ) { + +#ifdef EXTRA_KDN_DEBUGGING + fprintf(stream, + "printing single rtl, loop_is_done: %d, seen_one_bb: %d\n", + loop_is_done, seen_one_basic_block_note); + fprintf(stream, + " code of this insn is %d\n", GET_CODE(head_iterator)); +#endif + + print_rtl_single(stream, head_iterator); + + if (NOTE_INSN_BASIC_BLOCK_P(head_iterator)) { + seen_one_basic_block_note = TRUE; + } + else { + switch (GET_CODE(head_iterator)) { + + case JUMP_INSN: + loop_is_done = TRUE; + break; + + case CALL_INSN: /* not sure if CALL_INSN terminates a bb */ + /* this code was copied from cfgbuild.c */ + /* Noreturn and sibling call instructions terminate the basic blocks + (but only if they happen unconditionally). */ + if ((SIBLING_CALL_P (head_iterator) + || find_reg_note (head_iterator, REG_NORETURN, 0)) + && GET_CODE (PATTERN (head_iterator)) != COND_EXEC) { + loop_is_done = TRUE; + break; + } + + default: /* in other cases, do nothing */ + break; + } + } + + head_iterator = NEXT_INSN(head_iterator); + + if (head_iterator != NULL) { + if (NOTE_INSN_BASIC_BLOCK_P(head_iterator)) { + if (seen_one_basic_block_note) { + loop_is_done = TRUE; + } + } else if (GET_CODE(head_iterator) == CODE_LABEL) { + if (seen_one_basic_block_note) { + loop_is_done = TRUE; + } + } + } + } + } + fprintf(stream, "End of basic block's head:\n"); + + if (rtl != NULL) { + fprintf(stream, "Basic block's footer:\n"); + int loop_is_done = FALSE; + int seen_one_basic_block_note = FALSE; + + for (notes_and_jumptables = rtl->footer_; + (notes_and_jumptables != NULL) && !loop_is_done; ) { + + print_rtl_single(stream, notes_and_jumptables); + + if (NOTE_INSN_BASIC_BLOCK_P(notes_and_jumptables)) { + seen_one_basic_block_note = TRUE; + } + else { + switch (GET_CODE(notes_and_jumptables)) { + + case JUMP_INSN: + loop_is_done = TRUE; + break; + + case CALL_INSN: + /* this code was copied from cfgbuild.c */ + /* Noreturn and sibling call instructions terminate the basic blocks + (but only if they happen unconditionally). */ + if ((SIBLING_CALL_P (notes_and_jumptables) + || find_reg_note (notes_and_jumptables, REG_NORETURN, 0)) + && GET_CODE (PATTERN (notes_and_jumptables)) != COND_EXEC) { + loop_is_done = TRUE; + break; + } + + default: /* in other cases, do nothing */ + break; + } + } + + notes_and_jumptables = NEXT_INSN(notes_and_jumptables); + + if (notes_and_jumptables != NULL) { + if (NOTE_INSN_BASIC_BLOCK_P(notes_and_jumptables)) { + if (seen_one_basic_block_note) { + loop_is_done = TRUE; + } + } else if (GET_CODE(notes_and_jumptables) == CODE_LABEL) { + if (seen_one_basic_block_note) { + loop_is_done = TRUE; + } + } + } + } + fprintf(stream, "End of basic block's footer\n"); + } +} + +/* ignore warning message about gimple not used */ +static void kdn_dump_gimple_instruction_list(FILE *stream, + struct gimple_bb_info gimple) { + /* see gimple-pretty-print.c for relevant subroutines */ + /* reference to visited silences warning about arg not used */ + fprintf(stream, + "Not Implemented: dumping of gimple instructions, visited: %d\n", + gimple.seq->visited); +} + +static void kdn_dump_rtl_insns(FILE *stream, const_rtx rtx_insns) { + if (rtx_insns != NULL) { + struct pretty_printer buffer; + pp_needs_newline(&buffer) = true; + buffer.buffer->stream = stream; + + print_rtl(stream, rtx_insns); + } +} + +static void kdn_dump_gimple_insns(FILE *stream, gimple_seq gimp_insns) { + /* see gimple-pretty-print.c for relevant subroutines */ + /* reference to visited silences warning about arg not used */ + fprintf(stream, + "Not Implemented: dumping of gimple instructions, visited: %d\n", + gimp_insns->visited); +} + +static void kdn_dump_loop_exits(FILE *stream, struct loop_exit *exits) { + edge e; + loop_exit *walker = exits; + + /* exits is as an infinite cycle and never equals null, apparently */ + fprintf(stream, "Loop exits consist of edges: "); + do { + e = exits->e; + kdn_dump_edge(stream, e, TRUE, TRUE); + walker = walker->next; + } while (walker != exits); +} + +static void kdn_dump_niter_desc(FILE *stream, struct niter_desc *loop_desc) { + + if (loop_desc == NULL) { + fprintf(stream, "No iteration information for this loop\n"); + + } else { + + fprintf(stream, " Edge that leaves loop: "); + kdn_dump_edge(stream, loop_desc->out_edge, TRUE, TRUE); + + fprintf(stream, " Edge from same source that continues loop: "); + kdn_dump_edge(stream, loop_desc->in_edge, TRUE, TRUE); + + fprintf(stream, " What do we know? %s\n", + (loop_desc->simple_p)? "something": "nothing"); + + fprintf(stream, " Is number of iterations constant? %s\n", + (loop_desc->const_iter)? "yes": "no"); + + if (loop_desc->const_iter) { + fprintf(stream, " Constant number of iterations: %ld\n", + (long) loop_desc->niter); + } + + /* + fprintf(stream, " Number of iterations:\n"); + kdn_dump_rtl_insns(stream, loop_desc->niter_expr); + fprintf(stream, "\n"); + */ + fprintf(stream, + " Not reported: assumptions, noloop_assumptions, infinite,"); + fprintf(stream, " niter_expr, mode, signed_p\n"); + } +} + + +static void kdn_dump_dominator(FILE *stream, struct et_node *dominator) { + basic_block a_block; + + if (dominator == NULL) { + fprintf(stream, + " [Post?] Domination tree is empty\n"); + } else { + + fprintf(stream, + " [Post?] Domination tree [dfs_num_in: %d, dfs_num_out: %d]\n", + dominator->dfs_num_in, dominator->dfs_num_out); + + /* Testing confirms that dominator->data points to a basic block */ + a_block = (basic_block) dominator->data; + + fprintf(stream, " "); + kdn_dump_block_id(stream, a_block); + + if (dominator->son != NULL) { + kdn_treedump_children(stream, 8, dominator->son); + } + } +} + +static void kdn_treedump_children(FILE *stream, + int indent_level, struct et_node *a_son) { + /* assert: a_son != NULL */ + struct et_node *first_son = a_son; + + do { + basic_block a_block = (basic_block) a_son->data; + + fprintf(stream, "%*s", indent_level, ": "); + kdn_dump_block_id(stream, a_block); + + if (a_son->son != NULL) { + kdn_treedump_children(stream, indent_level + 2, a_son->son); + } + a_son = a_son->left; + } while (a_son != first_son); +} + +static void kdn_dump_wide_int(FILE *stream, const widest_int &orig_big_no) { + widest_int working_memory; + widest_int wide_zero, wide_ten; + int precision = orig_big_no.get_precision(); + /* ISO C++ prohibits variable length array declarations, so I need a + * different way to allocate this array. + * + * char buffer[precision+2]; + */ + char *buffer = (char *) alloca(precision+2); + char *end_buffer = &buffer[precision+1]; + bool negative = FALSE; + + /* implicit invocation of widest_int constructors */ + wide_zero = 0; + wide_ten = 10; + + wi::copy(working_memory, orig_big_no); + if (wi::only_sign_bit_p(working_memory)) { + wi::neg(working_memory); + negative = TRUE; + } + + *end_buffer-- = '\0'; + for (int i = 0; i < precision; i++) { + widest_int remainder; + unsigned int length; + const HOST_WIDE_INT *value; + + remainder = wi::smod_trunc(working_memory, wide_ten); + length = remainder.get_len(); + + /* value gets a pointer to the insides of the remainder object */ + value = remainder.get_val(); + + /* kelvin says: i think i want to assert that length > 0 */ + if (length > 0) { + *end_buffer-- = '0' + value[length-1]; + } + + /* divide working_memory by ten, since we've captured the least + * significant bit already. + */ + working_memory = wi::sdiv_trunc(working_memory, wide_ten); + + if (wi::cmps(working_memory, wide_zero) == 0) { + break; + } + } + + if (negative) { + *end_buffer-- = '-'; + } + end_buffer++; + fprintf(stream, "[kdn big number] %s\n", end_buffer); +} diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c index 913bd3c2614..d081f969eca 100644 --- a/gcc/loop-unroll.c +++ b/gcc/loop-unroll.c @@ -45,6 +45,13 @@ along with GCC; see the file COPYING3. If not see #include "target.h" #include "dumpfile.h" +#define KELVIN_PATCH +#define KELVIN_NOISE +#ifdef KELVIN_NOISE +#include "kelvin-debugs.c" +#endif + + /* This pass performs loop unrolling. We only perform this optimization on innermost loops (with single exception) because the impact on performance is greatest here, and we want to avoid @@ -479,6 +486,26 @@ unroll_loop_constant_iterations (struct loop *loop) struct opt_info *opt_info = NULL; bool ok; +#ifdef KELVIN_NOISE + basic_block *body; + body = get_loop_body (loop); + fprintf(stderr, "At top of unroll_loop_constant_iterations() with loop:"); + kdn_dump_loop(stderr, loop); + fprintf(stderr, " this loop is comprised of %d basic blocks\n", + loop->num_nodes); + + /* kdn: body is a newly allocated (XNEWVEC) array of basic_block + * references. Note that basic_block is "struct basic_block_def *". + * The number of values held in the body array is given by + * loop->num_nodes. + */ + for (unsigned int _i_ = 0; _i_ < loop->num_nodes; _i_++) { + fprintf(stderr, "body block[%d]:\n", _i_); + kdn_dump_block(stderr, body[_i_]); + } + free(body); +#endif + niter = desc->niter; /* Should not get here (such loop should be peeled instead). */ @@ -634,6 +661,27 @@ unroll_loop_constant_iterations (struct loop *loop) FOR_EACH_VEC_ELT (remove_edges, i, e) remove_path (e); +#ifdef KELVIN_NOISE + fprintf(stderr, "At bottom of unroll_loop_constant_iterations(), "); + fprintf(stderr, "having unrolled %d times\n", max_unroll); + fprintf(stderr, " Loop has %d instructions, ", num_loop_insns(loop)); + fprintf(stderr, " distributed across %d basic blocks.\n", loop->num_nodes); + kdn_dump_loop(stderr, loop); + + body = get_loop_body(loop); + + /* kdn: body is a newly allocated (XNEWVEC) array of basic_block + * references. Note that basic_block is "struct basic_block_def *". + * The number of values held in the body array is given by + * loop->num_nodes. + */ + for (unsigned int _i_ = 0; _i_ < loop->num_nodes; _i_++) { + fprintf(stderr, "body block[%d]:\n", _i_); + kdn_dump_block(stderr, body[_i_]); + } + free(body); +#endif + if (dump_file) fprintf (dump_file, ";; Unrolled loop %d times, constant # of iterations %i insns\n", @@ -876,6 +924,26 @@ unroll_loop_runtime_iterations (struct loop *loop) auto_vec<basic_block> dom_bbs; body = get_loop_body (loop); +#ifdef KELVIN_NOISE + fprintf(stderr, "At top of unroll_loop_runtime_iterations() with loop:"); + kdn_dump_loop(stderr, loop); + fprintf(stderr, " this loop is comprised of %d basic blocks\n", + loop->num_nodes); + /* + fprintf(stderr, " loop desc is represented by:\n"); + kdn_dump_niter_desc(stderr, desc); + */ + /* kdn: body is a newly allocated (XNEWVEC) array of basic_block + * references. Note that basic_block is "struct basic_block_def *". + * The number of values held in the body array is given by + * loop->num_nodes. + */ + for (unsigned int _i_ = 0; _i_ < loop->num_nodes; _i_++) { + fprintf(stderr, "body block[%d]:\n", _i_); + kdn_dump_block(stderr, body[_i_]); + } +#endif + for (i = 0; i < loop->num_nodes; i++) { vec<basic_block> ldom; @@ -929,6 +997,10 @@ unroll_loop_runtime_iterations (struct loop *loop) /* Precondition the loop. */ split_edge_and_insert (loop_preheader_edge (loop), init_code); +#ifdef KELVIN_NOISE + fprintf(stderr, "After preconditioning the loop, the loop looks like:\n"); + kdn_dump_loop(stderr, loop); +#endif auto_vec<edge> remove_edges; @@ -943,32 +1015,170 @@ unroll_loop_runtime_iterations (struct loop *loop) && !desc->noloop_assumptions) bitmap_set_bit (wont_exit, 1); ezc_swtch = loop_preheader_edge (loop)->src; + +#ifdef KELVIN_NOISE + /* extra zero check switch (ezc) processing */ + fprintf(stderr, + "ezc_switch loop preheader before calling duplicate_loop...\n"); + kdn_dump_block(stderr, ezc_swtch); +#endif ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 1, wont_exit, desc->out_edge, &remove_edges, DLTHE_FLAG_UPDATE_FREQ); gcc_assert (ok); +#ifdef KELVIN_NOISE + fprintf(stderr, "After duplicating loop to header edge, the loop is\n"); + kdn_dump_loop(stderr, loop); +#endif /* Record the place where switch will be built for preconditioning. */ swtch = split_edge (loop_preheader_edge (loop)); +#ifdef KELVIN_PATCH + /* kelvin: next three lines inserted by PTH */ + int iter_freq, new_freq; + iter_freq = new_freq = swtch->frequency / (n_peel+1); +#ifdef KELVIN_NOISE + fprintf(stderr, "\n Before overwriting, swtch->frequency is %d\n", + swtch->frequency); + /* Overwriting swtch->frequency below is what causes B11 to have the + * correct frequency. But I have to scale the frequencies for each of + * the loop's blocks that are copied as well. + */ +#endif + swtch->frequency = new_freq; +#endif + +#ifdef KELVIN_NOISE + fprintf(stderr, "\n n_peel is %d, iter_freq is %d\n", n_peel, iter_freq); + fprintf(stderr, " The newly created swtch block is given by:\n"); + kdn_dump_block(stderr, swtch); +#endif + for (i = 0; i < n_peel; i++) { /* Peel the copy. */ bitmap_clear (wont_exit); if (i != n_peel - 1 || !last_may_exit) bitmap_set_bit (wont_exit, 1); +#ifdef KELVIN_PATCH + /* kelvin: up to ok below is all new */ + // PTH - Need to adjust freq of pred block before duplicating loop + // preheader->src didn't work (didn't see any change) + // preheader->dest didn't work (peeled blocks had freq = 0) + // preheader->dest + not passing FLAG_UPDATE_FREQ fixed peeled copies + // but screwed up loop block bb5=558 (bb8(exit, + // due to bb5 freq) and bb23(freq=7536)) + // kelvin says PTH had commented out the following line, + // apparently because it creates some problems with frequency of + // bb5. + // loop_preheader_edge (loop)->dest->frequency = new_freq; +#ifdef KELVIN_NOISE + fprintf(stderr, + "preparing to invoke duplicate_loop_to_header_edge, new_freq: %d\n", + new_freq); + fprintf(stderr, "loop_preheader_edge(loop) is:\n"); + kdn_dump_edge(stderr, loop_preheader_edge(loop), TRUE, TRUE); +#endif + + /* kelvin patch begins here */ + int saved_header_frequency = loop->header->frequency; + int saved_latch_frequency = loop->latch->frequency; + loop->header->frequency /= (n_peel + 1); + loop->latch->frequency /= (n_peel + 1); + + loop->header->frequency *= (i + 1); + loop->latch->frequency *= (i + 1); + +#ifdef KELVIN_NOISE + fprintf(stderr, + "Temporarily setting header and latch frequencies to %d and %d\n", + loop->header->frequency, loop->latch->frequency); +#endif + /* kelvin patch ends here */ +#endif ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 1, wont_exit, desc->out_edge, &remove_edges, DLTHE_FLAG_UPDATE_FREQ); +#ifdef KELVIN_PATCH + loop->header->frequency = saved_header_frequency; + loop->latch->frequency = saved_latch_frequency; + fprintf(stderr, "Restoring header and latch frequencies to %d and %d\n", + loop->header->frequency, loop->latch->frequency); +#endif gcc_assert (ok); /* Create item for switch. */ j = n_peel - i - (extra_zero_check ? 0 : 1); p = REG_BR_PROB_BASE / (i + 2); +#ifdef KELVIN_NOISE + fprintf(stderr, + "Before splitting the next edge, loop_preaheader_edge is:\n"); + kdn_dump_edge(stderr, loop_preheader_edge (loop), TRUE, TRUE); +#endif preheader = split_edge (loop_preheader_edge (loop)); +#ifdef KELVIN_NOISE + fprintf(stderr, "In peel loop, i: %d, j: %d, preheader is:\n", i, j); + kdn_dump_block(stderr, preheader); + /* note that the preheader has the correct frequency, but the + * nodes created by duplicate_loop_to_header_edge have an incorrect + * frequency. + * + * Kelvin questions: how does preheader get its frequency? It + * is copied from the original loop, and then overwritten below. + */ + fprintf(stderr, "Refined loop definition is:\n"); + kdn_dump_loop(stderr, loop); + + /* kelvin believes the following code is responsible for assigning + * frequencies on B14, B18, and B22. + */ + fprintf(stderr, "Processing preheader block: "); + kdn_dump_block_id(stderr, preheader); + fprintf(stderr, + "Before overwriting the preheader, frequency is %d, ", + preheader->frequency); + fprintf(stderr, "to be overwritten with %d (or not!)*\n", + new_freq + iter_freq); + + /* + * note that each time we peel another loop iteration, we + * increment new_freq by the value of iter_freq, so the + * first iteration has value 186, the second 372, the third, + * 558, an so on. + */ + +#endif +#ifdef KELVIN_PATCH + /* Pat Haugen's patch below is not quite correct. In the + * case that the duplicated blocks include an "exit" which + * circumvents execution of the loop, we need to subtract + * from preheader->frequency the cumulative frequencies + * associated with the duplicated "loop" exit edges. + */ + + + /* kelvin: the following two lines constitute a patch provided + * by Pat Haugen. This works most of the time, but it doesn't + * work in the case that the copied blocks include conditional + * branches that circumvent execution of the loop that follows. + * To do this right, I need to calculate the frequency of this + * preheader in a different way. Basically, I need to calculate + * the preheader frequency by summing frequences of predecessor + * nodes, AFTER we create and establish the frequencies for the + * new swtch block. + * + * new_freq = new_freq + iter_freq; + * preheader->frequency = new_freq; + */ + + /* kelvin is suspicious of the code above, which is calculating + * the preheader->frequency. + */ +#endif branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ, block_label (preheader), p, NULL); @@ -978,13 +1188,58 @@ unroll_loop_runtime_iterations (struct loop *loop) gcc_assert (branch_code != NULL_RTX); swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code); +#ifdef KELVIN_NOISE + fprintf(stderr, "The newly created swtch is:\n"); + kdn_dump_block(stderr, swtch); +#endif set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); +#ifdef KELVIN_PATCH + /* kelvin: following line modified by pth, was single_pred_edge */ + single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p; +#else single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p; +#endif e = make_edge (swtch, preheader, single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p); e->probability = p; + +#ifdef KELVIN_NOISE + fprintf(stderr, "The newly created edge is:\n"); + kdn_dump_edge(stderr, e, TRUE, TRUE); +#endif +#ifdef KELVIN_PATCH + /* kelvin: following line inserted by PTH is replaced with + * the alternative sequence of instructions crafted by kelvin. + * swtch->frequency = new_freq; + */ + new_freq = new_freq + iter_freq; + swtch->frequency = new_freq; + + int prehead_frequency = 0; + for (unsigned int i = 0; i < EDGE_COUNT(preheader->preds); i++) { + edge an_edge = EDGE_PRED(preheader, i); + int the_edge_frequency = EDGE_FREQUENCY(an_edge); +#ifdef KELVIN_NOISE + fprintf(stderr, "Summing predecessor edge: "); + kdn_dump_edge(stderr, an_edge, TRUE, TRUE); + fprintf(stderr, " which has frequency %d\n", the_edge_frequency); +#endif + prehead_frequency += the_edge_frequency; + } +#ifdef KELVIN_NOISE + fprintf(stderr, "Overwriting preheader block "); + kdn_dump_block_id(stderr, preheader); + fprintf(stderr, " with frequency %d\n", prehead_frequency); +#endif + preheader->frequency = prehead_frequency; + +#endif } +#ifdef KELVIN_NOISE + fprintf(stderr, "\nAt midpoint of unroll_loop_runtime_iterations() with loop:"); + kdn_dump_loop(stderr, loop); +#endif if (extra_zero_check) { @@ -1015,16 +1270,41 @@ unroll_loop_runtime_iterations (struct loop *loop) bitmap_clear_bit (wont_exit, may_exit_copy); opt_info_start_duplication (opt_info); +#ifdef KELVIN_NOISE + fprintf(stderr, + "Invoking duplicate_loop_to_header_edge for unrolling loop\n"); +#endif +#ifdef KELVIN_PATCH ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), max_unroll, wont_exit, desc->out_edge, &remove_edges, +/* kelvin: following code commented out and replaced by PTH DLTHE_FLAG_UPDATE_FREQ | (opt_info ? DLTHE_RECORD_COPY_NUMBER : 0)); +*/ opt_info + ? DLTHE_RECORD_COPY_NUMBER + : 0); +#else + ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), + max_unroll, + wont_exit, desc->out_edge, + &remove_edges, + DLTHE_FLAG_UPDATE_FREQ + | (opt_info + ? DLTHE_RECORD_COPY_NUMBER + : 0)); +#endif gcc_assert (ok); +#ifdef KELVIN_NOISE + fprintf(stderr, "After duplicating loop to header edge for the loop body\n"); + fprintf(stderr, " exit_at_end is %d\n", exit_at_end); + fprintf(stderr, " Kelvin is wondering when we remove orig out edge\n"); + kdn_dump_all_blocks(stderr, loop); +#endif if (opt_info) { apply_opt_in_copies (opt_info, max_unroll, true, true); @@ -1053,7 +1333,17 @@ unroll_loop_runtime_iterations (struct loop *loop) /* Remove the edges. */ FOR_EACH_VEC_ELT (remove_edges, i, e) +#ifdef KELVIN_PATCH + { +#ifdef KELVIN_NOISE + fprintf(stderr, "Removing an edge from the loop\n"); + kdn_dump_edge(stderr, e, TRUE, TRUE); +#endif + remove_path (e); + } +#else remove_path (e); +#endif /* We must be careful when updating the number of iterations due to preconditioning and the fact that the value must be valid at entry @@ -1081,6 +1371,27 @@ unroll_loop_runtime_iterations (struct loop *loop) loop->any_estimate = false; } +#ifdef KELVIN_NOISE + fprintf(stderr, "\nAt bottom of unroll_loop_runtime_iterations(), "); + fprintf(stderr, "having unrolled %d times\n", max_unroll); + fprintf(stderr, " Loop has %d instructions, ", num_loop_insns(loop)); + fprintf(stderr, " distributed across %d basic blocks.\n", loop->num_nodes); + kdn_dump_loop(stderr, loop); + + body = get_loop_body(loop); + + /* kdn: body is a newly allocated (XNEWVEC) array of basic_block + * references. Note that basic_block is "struct basic_block_def *". + * The number of values held in the body array is given by + * loop->num_nodes. + */ + for (unsigned int _i_ = 0; _i_ < loop->num_nodes; _i_++) { + fprintf(stderr, "body block[%d]:\n", _i_); + kdn_dump_block(stderr, body[_i_]); + } + free(body); +#endif + if (dump_file) fprintf (dump_file, ";; Unrolled loop %d times, counting # of iterations " |