aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Pop <sebastian.pop@amd.com>2008-04-08 15:32:38 +0000
committerSebastian Pop <sebastian.pop@amd.com>2008-04-08 15:32:38 +0000
commit3a945881f80200113b62376f5c649977e7fa7989 (patch)
treef77ee842e23070132c201cfc38d936f1f7e89734
parentd5bbffba03df338dab0fc539e860f767032c2d14 (diff)
Modified ldist-*.c cases as in trunk.
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/graphite@134096 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldist-1.c10
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldist-10.c4
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c33
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldist-12.c22
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldist-1a.c4
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldist-2.c10
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldist-3.c5
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c7
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c5
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldist-8.c5
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldist-9.c4
11 files changed, 95 insertions, 14 deletions
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-1.c
index 8d965789f2c..43c10466525 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-1.c
@@ -20,15 +20,19 @@ void foo (int * __restrict__ ia,
oyb[i] = myb[i] >> 10;
}
- /* This loop should be distributed, and the result should look like
- this:
+ /* This loop was distributed, but it is not anymore due to the cost
+ model changes: the result of a distribution would look like this:
+
| for (i=0; i < 52; i++)
| oya[i] = ia[i] * oxa[i] + ib[i] * oxb[i] >> 10;
|
| for (i=0; i < 52; i++)
| oyb[i] = -ia[i] * oxb[i] + ib[i] * oxa[i] >> 10;
+
+ and in this the array IA is read in both tasks. For maximizing
+ the cache reuse, ldist does not distributes this loop anymore.
*/
}
-/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-10.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-10.c
index 58c78cb2396..0790c18a9da 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-10.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-10.c
@@ -13,10 +13,12 @@ int loop1 (int k)
}
/* Dependences:
S1->S2 (flow, level 1)
+
+ One partition as A is used in both S1 and S2.
*/
return a[1000-2] + b[1000-1] + c[1000-2];
}
-/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c
new file mode 100644
index 00000000000..88651e7b72d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-11.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
+
+void foo (int * __restrict__ ia,
+ int * __restrict__ ib,
+ int * __restrict__ oxa,
+ int * __restrict__ oxb,
+ int * __restrict__ oya,
+ int * __restrict__ oyb)
+{
+ int i;
+ long int mya[52];
+ long int myb[52];
+
+ for (i=0; i < 52; i++)
+ {
+ mya[i] = ia[i] * oxa[i] + ib[i] * oxb[i];
+ myb[i] = -ia[i] * oxb[i] + ib[i] * oxa[i];
+ oya[i] = 0;
+ oyb[i] = myb[i] >> 10;
+ }
+
+ /* This loop should be distributed, and the result should look like
+ this:
+ | memset (oya, 0, 208);
+ | for (i=0; i < 52; i++)
+ | oyb[i] = -ia[i] * oxb[i] + ib[i] * oxa[i] >> 10;
+ */
+}
+
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "generated memset zero" 1 "ldist" } } */
+/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-12.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-12.c
new file mode 100644
index 00000000000..1e555fe26ad
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-12.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-all" } */
+
+int foo (int * __restrict__ ia,
+ int * __restrict__ ib,
+ int * __restrict__ oxa,
+ int * __restrict__ oxb)
+{
+ int i;
+ int oya[52], oyb[52];
+
+ for (i=0; i < 52; i++)
+ {
+ oya[i] = (ia[i] * oxa[i]) >> 10;
+ oyb[i] = (ib[i] * oxb[i]) >> 10;
+ }
+
+ return oya[22] + oyb[21];
+}
+
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
+/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-1a.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-1a.c
index d412318571e..623aacfdbf5 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-1a.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-1a.c
@@ -18,5 +18,5 @@ int foo (int * __restrict__ ia,
return oya[22] + oyb[21];
}
-/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
-/* { dg- final { cleanup-tree-dump "ldist" } } */
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
+/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-2.c
index f10c63e55e4..de98ccc4c30 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-2.c
@@ -13,15 +13,19 @@ void foo (int * __restrict__ a,
b[i] = a[i - 1] + 1;
}
- /* This loop should be distributed, and the result should look like
- this:
+ /* This loop is not distributed because the cost of spliting it:
+
| for (i=1; i < N; i++)
| a[i] += c[i];
|
| for (i=1; i < N; i++)
| b[i] = a[i - 1] + 1;
+
+ is higher due to data in array A that is written and then read in
+ another task. The cost model should forbid the transformation in
+ this case.
*/
}
-/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-3.c
index 23e3f1e9c50..524fb4542b8 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-3.c
@@ -23,9 +23,12 @@ int loop1 (int k)
S3 -> S4 (flow, level 1)
There are three partitions: {S1, S3}, {S2} and {S4}.
+
+ The cost model should fuse together all the partitions, as they
+ are reusing the same data, ending on a single partition.
*/
return a[10000-2] + b[10000-1] + c[10000-2] + d[10000-2];
}
-/* { dg-final { scan-tree-dump-times "distributed: split to 3 loops" 1 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "distributed: split to 3 loops" 0 "ldist" } } */
/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c
index 2ca1399e407..a744fea020a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c
@@ -20,5 +20,10 @@ int loop1 (int k)
return b[100-1][0];
}
-/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
+/* We used to distribute also innermost loops, but these could produce
+ too much code in the outer loop, degrading performance of scalar
+ code. So this test was XFAILed because the cost model of the stand
+ alone distribution pass has evolved. Now it passes. */
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" { target ilp32 } } } */
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" { target lp64 } } } */
/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c
index 573d7e89332..7a38c86832b 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-6.c
@@ -26,10 +26,13 @@ int loop1 (int k)
- S3 (i) has to be executed before S1 (i+1), as a[i+1] has to execute before the update to a[i],
{S4} is the consumer partition: it consumes the values from array "c" produced in S3.
+
+ The cost model should fuse all the tasks together as the cost of
+ fetching data from caches is too high.
*/
return a[1000-2] + b[1000-1] + c[1000-2] + d[1000-2];
}
-/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-8.c
index 6217309f05b..4a8e0660061 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-8.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-8.c
@@ -22,10 +22,13 @@ int loop1 (int k)
S4->S3 (flow, level 1)
Two partitions: {S1, S2, S4} produce information that is consumed in {S3}.
+
+ So that means that the current cost model will also fuse these
+ two partitions into a single one for avoiding cache misses.
*/
return a[1000-2] + b[1000-1] + c[1000-2] + d[1000-2];
}
-/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-9.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-9.c
index dcd8b0aa32d..ee8d023dee3 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-9.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-9.c
@@ -15,10 +15,12 @@ int loop1 (int k)
S1->S2 (flow, level 1)
S1->S1 (anti, level 1)
S1->S1 (flow, level 1)
+
+ One partition, because of the cost of cache misses.
*/
return a[1000-2] + b[1000-1];
}
-/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */
/* { dg-final { cleanup-tree-dump "ldist" } } */