diff options
author | Tobias Grosser <grosser@fim.uni-passau.de> | 2008-07-10 18:52:43 +0000 |
---|---|---|
committer | Tobias Grosser <grosser@fim.uni-passau.de> | 2008-07-10 18:52:43 +0000 |
commit | 3dd6b64960569b73deee4928277faee7a7fd48e5 (patch) | |
tree | 7ba9c2cfdb79bb71237f50c7dc2142c88d4bc59f | |
parent | 0c140a38790f0a21c24fdcce8d471f2776ebd816 (diff) |
2008-07-10 Tobias Grosser <grosser@fim.uni-passau.de>
* graphite.c (graphite_trans_bb_swap_loops): Rename from
graphite_swap_loops.
(graphite_trans_bb_move_loop): New.
(graphite_trans_bb_strip_mine): Rename from graphite_strip_mine_loop.
(graphite_trans_bb_block): New.
(graphite_trans_loop_block): New.
(graphite_trans_scop_swap_1and2): Rename from graphite_trans_swap_1and2.
(graphite_trans_scop_strip): Rename from graphite_trans_strip.
(graphite_trans_scop_block): New.
(graphite_apply_transformations): Rename from graphite_transformations.
* testsuite/gcc.dg/graphite/scop-matmult.c: New.
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/graphite@137700 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog.graphite | 15 | ||||
-rw-r--r-- | gcc/graphite.c | 331 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/graphite/scop-matmult.c | 60 |
3 files changed, 385 insertions, 21 deletions
diff --git a/gcc/ChangeLog.graphite b/gcc/ChangeLog.graphite index c28cc2b088c..278d79484dc 100644 --- a/gcc/ChangeLog.graphite +++ b/gcc/ChangeLog.graphite @@ -1,5 +1,20 @@ 2008-07-10 Tobias Grosser <grosser@fim.uni-passau.de> + * graphite.c (graphite_trans_bb_swap_loops): Rename from + graphite_swap_loops. + (graphite_trans_bb_move_loop): New. + (graphite_trans_bb_strip_mine): Rename from graphite_strip_mine_loop. + (graphite_trans_bb_block): New. + (graphite_trans_loop_block): New. + (graphite_trans_scop_swap_1and2): Rename from graphite_trans_swap_1and2. + (graphite_trans_scop_strip): Rename from graphite_trans_strip. + (graphite_trans_scop_block): New. + (graphite_apply_transformations): Rename from graphite_transformations. + + * testsuite/gcc.dg/graphite/scop-matmult.c: New. + +2008-07-10 Tobias Grosser <grosser@fim.uni-passau.de> + * graphite.c (gbb_compare): New. (graphite_sort_gbbs): New. (gbb_can_be_ignored): New. diff --git a/gcc/graphite.c b/gcc/graphite.c index a2bf683cf96..157fe5372ad 100644 --- a/gcc/graphite.c +++ b/gcc/graphite.c @@ -3392,7 +3392,7 @@ gbb_can_be_ignored (graphite_bb_p gb) return true; } -/* Remove all bbs with empty body from SCOP. */ +/* Remove all ignoreable gbbs from SCOP. */ static void scop_remove_ignoreable_gbbs (scop_p scop) @@ -3436,27 +3436,125 @@ scop_remove_ignoreable_gbbs (scop_p scop) graphite_sort_gbbs (scop); } -/* Swap the two loops of GBB at index LOOP_ONE and LOOP_TWO. We do not edit the - static schedule. */ +/******************************************************************************* + Graphite BB transformations +*******************************************************************************/ + +/* Swap the two loops of GBB at index LOOP_ONE and LOOP_TWO. + This transformartion is only valid, if the loop nest between i and k is + perfect nested. Therefore we do not need to change the static schedule. + + Example: + + for (i = 0; i < 50; i++) + for (j ...) + for (k = 5; k < 100; k++) + A + + To swap i and k call: + + graphite_trans_bb_swap_loops (gb, 0, 2) + + for (k = 5; k < 100; k++) + for (j ...) + for (i = 0; i < 50; i++) + A + + XXX: Does not check validity. Necessary? */ static void -graphite_swap_loops (graphite_bb_p gb, unsigned loop_one, unsigned loop_two) +graphite_trans_bb_swap_loops (graphite_bb_p gb, unsigned loop_one, +unsigned loop_two) { unsigned i; loop_p loop_p_one, loop_p_two; CloogMatrix *domain = GBB_DOMAIN (gb); + gcc_assert (loop_one < gbb_nb_loops (gb)); gcc_assert (loop_two < gbb_nb_loops (gb)); - + + /* Move domain columns. */ for (i = 0; i < domain->NbRows; i++) value_swap (domain->p[i][loop_one + 1], domain->p[i][loop_two + 1]); + /* Update LOOPS vector. */ loop_p_one = VEC_index (loop_p, GBB_LOOPS (gb), loop_one); loop_p_two = VEC_index (loop_p, GBB_LOOPS (gb), loop_two); VEC_replace (loop_p, GBB_LOOPS(gb), loop_one, loop_p_two); VEC_replace (loop_p, GBB_LOOPS(gb), loop_two, loop_p_one); } +/* Move the loop at index LOOP and insert it before index NEW_LOOP_POS. + This transformartion is only valid, if the loop nest between i and k is + perfect nested. Therefore we do not need to change the static schedule. + + Example: + + for (i = 0; i < 50; i++) + for (j ...) + for (k = 5; k < 100; k++) + A + + To move k before i use: + + graphite_trans_bb_move_loop (A, 2, 0) + + for (k = 5; k < 100; k++) + for (i = 0; i < 50; i++) + for (j ...) + A + + And to move k back: + + graphite_trans_bb_move_loop (A, 0, 2) + + XXX: Does not check validity. Necessary? */ + +static void +graphite_trans_bb_move_loop (graphite_bb_p gb, unsigned loop, +unsigned new_loop_pos) +{ + CloogMatrix *domain = GBB_DOMAIN (gb); + unsigned row, j; + loop_p tmp_loop_p; + + gcc_assert (loop < gbb_nb_loops (gb)); + gcc_assert (new_loop_pos < gbb_nb_loops (gb)); + + /* Update LOOPS vector. */ + tmp_loop_p = VEC_index (loop_p, GBB_LOOPS (gb), loop); + VEC_ordered_remove (loop_p, GBB_LOOPS (gb), loop); + VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), new_loop_pos, tmp_loop_p); + + /* Move the domain columns. */ + if (loop < new_loop_pos) + for (row = 0; row < domain->NbRows; row++) + { + Value tmp; + value_init (tmp); + value_assign (tmp, domain->p[row][loop + 1]); + + for (j = loop ; j < new_loop_pos - 1; j++) + value_assign (domain->p[row][j + 1], domain->p[row][j + 2]); + + value_assign (domain->p[row][new_loop_pos], tmp); + value_clear (tmp); + } + else + for (row = 0; row < domain->NbRows; row++) + { + Value tmp; + value_init (tmp); + value_assign (tmp, domain->p[row][loop + 1]); + + for (j = loop ; j > new_loop_pos; j--) + value_assign (domain->p[row][j + 1], domain->p[row][j]); + + value_assign (domain->p[row][new_loop_pos + 1], tmp); + value_clear (tmp); + } +} + /* Strip mines the loop of BB at the position (loop depth) LOOP with STRIDE. Example: @@ -3468,10 +3566,10 @@ graphite_swap_loops (graphite_bb_p gb, unsigned loop_one, unsigned loop_two) 1 0 # i >= 0 -1 20 # i <= 20 - Strip mining with stripe stride = 2 + Strip mining with stripe stride = 4 - for (ii = 0; ii <= 20; i+=2) - for (i = ii; i <= min (20, ii+1); i++) + for (ii = 0; ii <= 20; i+=4) + for (i = ii; i <= min (20, ii+2); i++) A ii i ii2 1 @@ -3480,10 +3578,12 @@ graphite_swap_loops (graphite_bb_p gb, unsigned loop_one, unsigned loop_two) -1 1 0 0 # i >= ii 1 -1 0 1 # i <= ii + stride - 1 1 0 -2 0 # i <= stride - -1 0 2 0 # i >= stride */ + -1 0 2 0 # i >= stride + + Always valid, but not always a performance improvement. */ static void -graphite_strip_mine_loop (graphite_bb_p gb, unsigned loop, unsigned stride) +graphite_trans_bb_strip_mine (graphite_bb_p gb, unsigned loop, unsigned stride) { unsigned row, col; @@ -3499,7 +3599,8 @@ graphite_strip_mine_loop (graphite_bb_p gb, unsigned loop, unsigned stride) gcc_assert (loop <= gbb_nb_loops (gb) - 1); GBB_DOMAIN (gb) = new_domain; - + + /* Move domain. */ for (row = 0; row < domain->NbRows; row++) for (col = 0; col < domain->NbColumns; col++) if (col <= loop) @@ -3569,30 +3670,94 @@ graphite_strip_mine_loop (graphite_bb_p gb, unsigned loop, unsigned stride) GBB_STATIC_SCHEDULE (gb) = new_schedule; - /* XXX: Free old schedule. */ + /* XXX: Free old schedule. How? */ } VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), loop, NULL); VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), loop + 2, NULL); } +/* Loop block the LOOPS innermost loops of BB with stride size STRIDE. + + Example + + for (i = 0; i <= 50; i++=4) + for (k = 0; k <= 100; k++=4) + for (l = 0; l <= 200; l++=4) + A + + To strip mine the two inner most loops with stride = 4 call: + + graphite_trans_bb_block (A, 4, 2) + + for (i = 0; i <= 50; i++) + for (kk = 0; kk <= 100; kk+=4) + for (ll = 0; ll <= 200; ll+=4) + for (k = kk; k <= min (100, kk + 3); k++) + for (l = ll; l <= min (200, ll + 3); l++) + A + + XXX: Does not check validity. Necessary? */ + +static void +graphite_trans_bb_block (graphite_bb_p gb, unsigned stride, unsigned loops) +{ + unsigned i; + unsigned nb_loops = gbb_nb_loops (gb); + unsigned start = nb_loops - loops; + + /* Strip mine loops. */ + for (i = 0; i < nb_loops - start; i++) + graphite_trans_bb_strip_mine (gb, start + 3 * i, stride); + + /* Interchange loops. */ + for (i = 1; i < nb_loops - start; i++) + graphite_trans_bb_move_loop (gb, start + 3 * i, start + i); +} + +/******************************************************************************* + Graphite LOOP transformations +*******************************************************************************/ + +/* Loop block a loop nest. Calculate the optimal stride size (TODO). */ + +static void +graphite_trans_loop_block (VEC (graphite_bb_p, heap) *bbs, int loops) +{ + graphite_bb_p gb; + int i; + + /* TODO: - Calculate the stride size automatically. + - Only block, if there are sufficient iterations. */ + int stride_size = 64; + + for (i = 0; VEC_iterate (graphite_bb_p, bbs, i, gb); i++) + graphite_trans_bb_block (gb, stride_size, loops); +} + +/******************************************************************************* + Graphite SCOP transformations +*******************************************************************************/ + +/* SIMPLE EXAMPLE */ /* For all bb with two loops swap these two loops. */ static void -graphite_trans_swap_1and2 (scop_p scop) +graphite_trans_scop_swap_1and2 (scop_p scop) { graphite_bb_p gb; int i; for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) if (gbb_nb_loops (gb) == 2) - graphite_swap_loops (gb, 0, 1); + graphite_trans_bb_swap_loops (gb, 0, 1); } -/* Strip mine the innermost loops for all bbs. */ +/* SIMPLE EXAMPLE */ +/* Strip mine all innermost loops with stride = 4. */ static void -graphite_trans_strip (scop_p scop) +graphite_trans_scop_strip (scop_p scop) { graphite_bb_p gb; int i; @@ -3601,14 +3766,133 @@ graphite_trans_strip (scop_p scop) { int nb_loops = gbb_nb_loops (gb); if (nb_loops >= 1 && nb_loops == loop_depth) - graphite_strip_mine_loop (gb, gbb_nb_loops (gb) - 1, 4); + graphite_trans_bb_strip_mine (gb, gbb_nb_loops (gb) - 1, 4); + } +} + + +/* Loop block all bbs. + XXX: Does not contain dependency checks. */ + +static void +graphite_trans_scop_block (scop_p scop) +{ + graphite_bb_p gb; + int i, j; + int last_nb_loops; + int nb_loops; + bool perfect = true; + + VEC (graphite_bb_p, heap) *bbs = VEC_alloc (graphite_bb_p, heap, 3); + int max_schedule = scop_max_loop_depth (scop) + 1; + lambda_vector last_schedule = lambda_vector_new (max_schedule); + + /* Get the data of the first bb. */ + gb = VEC_index (graphite_bb_p, SCOP_BBS (scop), 0); + last_nb_loops = gbb_nb_loops (gb); + lambda_vector_copy (GBB_STATIC_SCHEDULE (gb), last_schedule, + last_nb_loops + 1); + VEC_safe_push (graphite_bb_p, heap, bbs, gb); + + for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) + { + /* We did the first bb before. */ + if (i == 0) + continue; + + nb_loops = gbb_nb_loops (gb); + + /* If the number of loops is unchanged and only the last element of the + schedule changes, we stay in the loop nest. */ + if (nb_loops == last_nb_loops + && (last_schedule [nb_loops + 1] + != GBB_STATIC_SCHEDULE (gb)[nb_loops + 1])) + { + VEC_safe_push (graphite_bb_p, heap, bbs, gb); + continue; + } + + /* Otherwise, we left the innermost loop. So check, if the last bb was in + a perfect loop nest and how many loops are contained in this perfect + loop nest. + + Count the number of zeros from the end of the schedule. They are the + number of surrounding loops. + + Example: + last_bb 2 3 2 0 0 0 0 3 + bb 2 4 0 + <------ j = 4 + + last_bb 2 3 2 0 0 0 0 3 + bb 2 3 2 0 1 + <-- j = 2 + + If there is no zero, there were other bbs in outer loops and the loop + nest is not perfect. */ + for (j = last_nb_loops - 1; j >= 0; j--) + { + if (last_schedule [j] != 0 + || (j <= nb_loops && GBB_STATIC_SCHEDULE (gb)[j] == 1)) + { + j--; + break; + } + } + + j++; + + /* Found perfect loop nest. */ + if (perfect && last_nb_loops - j > 0) + graphite_trans_loop_block (bbs, last_nb_loops - j); + + /* Check if we start with a new loop. + + Example: + + last_bb 2 3 2 0 0 0 0 3 + bb 2 3 2 0 0 1 0 + + Here we start with the loop "2 3 2 0 0 1" + + last_bb 2 3 2 0 0 0 0 3 + bb 2 3 2 0 0 1 + + But here not, so the loop nest can never be perfect. */ + + perfect = (GBB_STATIC_SCHEDULE (gb)[nb_loops] == 0); + + /* Update the last_bb infos. We do not do that for the bbs in the same + loop, as the data we use is not changed. */ + last_nb_loops = nb_loops; + lambda_vector_copy (GBB_STATIC_SCHEDULE (gb), last_schedule, + nb_loops + 1); + VEC_truncate (graphite_bb_p, bbs, 0); + VEC_safe_push (graphite_bb_p, heap, bbs, gb); } + + /* Check if the last loop nest was perfect. It is the same check as above, + but the comparision with the next bb is missing. */ + for (j = last_nb_loops - 1; j >= 0; j--) + if (last_schedule [j] != 0) + { + j--; + break; + } + + j++; + + /* Found perfect loop nest. */ + if (last_nb_loops - j > 0) + graphite_trans_loop_block (bbs, last_nb_loops - j); + VEC_free (graphite_bb_p, heap, bbs); + } /* Apply graphite transformations. */ static void -graphite_transformations (scop_p scop) +graphite_apply_transformations (scop_p scop) { /* Sort the list of bbs. Keep them always sorted. */ graphite_sort_gbbs (scop); @@ -3621,10 +3905,15 @@ graphite_transformations (scop_p scop) may generate invalid code by default. */ if (0) - graphite_trans_swap_1and2 (scop); + graphite_trans_scop_swap_1and2 (scop); if (0) - graphite_trans_strip (scop); + graphite_trans_scop_strip (scop); + + /* Enabled by default to get tested. At the moment this is not save for all + scops, as we do not check dependencies. */ + if (1) + graphite_trans_scop_block (scop); } /* Perform a set of linear transforms on LOOPS. */ @@ -3686,7 +3975,7 @@ graphite_transform_loops (void) if (0) build_rdg_all_levels (scop); - graphite_transformations (scop); + graphite_apply_transformations (scop); gloog (scop, find_transform (scop)); } diff --git a/gcc/testsuite/gcc.dg/graphite/scop-matmult.c b/gcc/testsuite/gcc.dg/graphite/scop-matmult.c new file mode 100644 index 00000000000..31d8113481e --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-matmult.c @@ -0,0 +1,60 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ +#define FLOAT float + +int foo (void); + +/* Multiply two n x n matrices A and B and store the result in C. */ + +void matmult (FLOAT **A, FLOAT **B, FLOAT **C, int n) +{ + int i,j,k; + + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + for (k = 0; k < n; k++) + A[i][j] += B[i][k] * C[k][j]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 3" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ +#define FLOAT float + +int foo (void); + +/* Multiply two n x n matrices A and B and store the result in C. */ + +void matmult (FLOAT **A, FLOAT **B, FLOAT **C, int n) +{ + int i,j,k; + + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + for (k = 0; k < n; k++) + A[i][j] += B[i][k] * C[k][j]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 3" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ +#define FLOAT float + +int foo (void); + +/* Multiply two n x n matrices A and B and store the result in C. */ + +void matmult (FLOAT **A, FLOAT **B, FLOAT **C, int n) +{ + int i,j,k; + + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + for (k = 0; k < n; k++) + A[i][j] += B[i][k] * C[k][j]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 3" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ |