diff options
author | Sebastian Pop <sebastian.pop@amd.com> | 2008-04-08 15:32:38 +0000 |
---|---|---|
committer | Sebastian Pop <sebastian.pop@amd.com> | 2008-04-08 15:32:38 +0000 |
commit | 3a945881f80200113b62376f5c649977e7fa7989 (patch) | |
tree | f77ee842e23070132c201cfc38d936f1f7e89734 | |
parent | d5bbffba03df338dab0fc539e860f767032c2d14 (diff) |
Modified ldist-*.c cases as in trunk.
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/graphite@134096 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ldist-1.c | 10 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ldist-10.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c | 33 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ldist-12.c | 22 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ldist-1a.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ldist-2.c | 10 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ldist-3.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ldist-8.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ldist-9.c | 4 |
11 files changed, 95 insertions, 14 deletions
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-1.c index 8d965789f2c..43c10466525 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-1.c @@ -20,15 +20,19 @@ void foo (int * __restrict__ ia, oyb[i] = myb[i] >> 10; } - /* This loop should be distributed, and the result should look like - this: + /* This loop was distributed, but it is not anymore due to the cost + model changes: the result of a distribution would look like this: + | for (i=0; i < 52; i++) | oya[i] = ia[i] * oxa[i] + ib[i] * oxb[i] >> 10; | | for (i=0; i < 52; i++) | oyb[i] = -ia[i] * oxb[i] + ib[i] * oxa[i] >> 10; + + and in this the array IA is read in both tasks. For maximizing + the cache reuse, ldist does not distributes this loop anymore. */ } -/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */ +/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */ /* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-10.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-10.c index 58c78cb2396..0790c18a9da 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-10.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-10.c @@ -13,10 +13,12 @@ int loop1 (int k) } /* Dependences: S1->S2 (flow, level 1) + + One partition as A is used in both S1 and S2. */ return a[1000-2] + b[1000-1] + c[1000-2]; } -/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */ +/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */ /* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c new file mode 100644 index 00000000000..88651e7b72d --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */ + +void foo (int * __restrict__ ia, + int * __restrict__ ib, + int * __restrict__ oxa, + int * __restrict__ oxb, + int * __restrict__ oya, + int * __restrict__ oyb) +{ + int i; + long int mya[52]; + long int myb[52]; + + for (i=0; i < 52; i++) + { + mya[i] = ia[i] * oxa[i] + ib[i] * oxb[i]; + myb[i] = -ia[i] * oxb[i] + ib[i] * oxa[i]; + oya[i] = 0; + oyb[i] = myb[i] >> 10; + } + + /* This loop should be distributed, and the result should look like + this: + | memset (oya, 0, 208); + | for (i=0; i < 52; i++) + | oyb[i] = -ia[i] * oxb[i] + ib[i] * oxa[i] >> 10; + */ +} + +/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */ +/* { dg-final { scan-tree-dump-times "generated memset zero" 1 "ldist" } } */ +/* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-12.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-12.c new file mode 100644 index 00000000000..1e555fe26ad --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-12.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */ + +int foo (int * __restrict__ ia, + int * __restrict__ ib, + int * __restrict__ oxa, + int * __restrict__ oxb) +{ + int i; + int oya[52], oyb[52]; + + for (i=0; i < 52; i++) + { + oya[i] = (ia[i] * oxa[i]) >> 10; + oyb[i] = (ib[i] * oxb[i]) >> 10; + } + + return oya[22] + oyb[21]; +} + +/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */ +/* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-1a.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-1a.c index d412318571e..623aacfdbf5 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-1a.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-1a.c @@ -18,5 +18,5 @@ int foo (int * __restrict__ ia, return oya[22] + oyb[21]; } -/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */ -/* { dg- final { cleanup-tree-dump "ldist" } } */ +/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */ +/* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-2.c index f10c63e55e4..de98ccc4c30 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-2.c @@ -13,15 +13,19 @@ void foo (int * __restrict__ a, b[i] = a[i - 1] + 1; } - /* This loop should be distributed, and the result should look like - this: + /* This loop is not distributed because the cost of spliting it: + | for (i=1; i < N; i++) | a[i] += c[i]; | | for (i=1; i < N; i++) | b[i] = a[i - 1] + 1; + + is higher due to data in array A that is written and then read in + another task. The cost model should forbid the transformation in + this case. */ } -/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */ +/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */ /* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-3.c index 23e3f1e9c50..524fb4542b8 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-3.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-3.c @@ -23,9 +23,12 @@ int loop1 (int k) S3 -> S4 (flow, level 1) There are three partitions: {S1, S3}, {S2} and {S4}. + + The cost model should fuse together all the partitions, as they + are reusing the same data, ending on a single partition. */ return a[10000-2] + b[10000-1] + c[10000-2] + d[10000-2]; } -/* { dg-final { scan-tree-dump-times "distributed: split to 3 loops" 1 "ldist" } } */ +/* { dg-final { scan-tree-dump-times "distributed: split to 3 loops" 0 "ldist" } } */ /* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c index 2ca1399e407..a744fea020a 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c @@ -20,5 +20,10 @@ int loop1 (int k) return b[100-1][0]; } -/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */ +/* We used to distribute also innermost loops, but these could produce + too much code in the outer loop, degrading performance of scalar + code. So this test was XFAILed because the cost model of the stand + alone distribution pass has evolved. Now it passes. */ +/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" { target ilp32 } } } */ +/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" { target lp64 } } } */ /* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c index 573d7e89332..7a38c86832b 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c @@ -26,10 +26,13 @@ int loop1 (int k) - S3 (i) has to be executed before S1 (i+1), as a[i+1] has to execute before the update to a[i], {S4} is the consumer partition: it consumes the values from array "c" produced in S3. + + The cost model should fuse all the tasks together as the cost of + fetching data from caches is too high. */ return a[1000-2] + b[1000-1] + c[1000-2] + d[1000-2]; } -/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */ +/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */ /* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-8.c index 6217309f05b..4a8e0660061 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-8.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-8.c @@ -22,10 +22,13 @@ int loop1 (int k) S4->S3 (flow, level 1) Two partitions: {S1, S2, S4} produce information that is consumed in {S3}. + + So that means that the current cost model will also fuse these + two partitions into a single one for avoiding cache misses. */ return a[1000-2] + b[1000-1] + c[1000-2] + d[1000-2]; } -/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */ +/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */ /* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-9.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-9.c index dcd8b0aa32d..ee8d023dee3 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-9.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-9.c @@ -15,10 +15,12 @@ int loop1 (int k) S1->S2 (flow, level 1) S1->S1 (anti, level 1) S1->S1 (flow, level 1) + + One partition, because of the cost of cache misses. */ return a[1000-2] + b[1000-1]; } -/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */ +/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */ /* { dg-final { cleanup-tree-dump "ldist" } } */ |