diff options
Diffstat (limited to 'libgomp/testsuite/libgomp.oacc-c-c++-common/private-variables.c')
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-c-c++-common/private-variables.c | 953 |
1 files changed, 953 insertions, 0 deletions
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-variables.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-variables.c new file mode 100644 index 00000000000..53f03d17bb2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-variables.c @@ -0,0 +1,953 @@ +#include <assert.h> +#include <openacc.h> + +typedef struct { + int x, y; +} vec2; + +typedef struct { + int x, y, z; + int attr[13]; +} vec3_attr; + + +/* Test of gang-private variables declared in local scope with parallel + directive. */ + +void local_g_1() +{ + int i, arr[32]; + + for (i = 0; i < 32; i++) + arr[i] = 3; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + { + int x; + + #pragma acc loop gang(static:1) + for (i = 0; i < 32; i++) + x = i * 2; + + #pragma acc loop gang(static:1) + for (i = 0; i < 32; i++) + { + if (acc_on_device (acc_device_host)) + x = i * 2; + arr[i] += x; + } + } + + for (i = 0; i < 32; i++) + assert (arr[i] == 3 + i * 2); +} + + +/* Test of worker-private variables declared in a local scope, broadcasting + to vector-partitioned mode. Back-to-back worker loops. */ + +void local_w_1() +{ + int i, arr[32 * 32 * 32]; + + for (i = 0; i < 32 * 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + { + int j; + + #pragma acc loop gang + for (i = 0; i < 32; i++) + { + #pragma acc loop worker + for (j = 0; j < 32; j++) + { + int k; + int x = i ^ j * 3; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += x * k; + } + + #pragma acc loop worker + for (j = 0; j < 32; j++) + { + int k; + int x = i | j * 5; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += x * k; + } + } + } + + for (i = 0; i < 32; i++) + for (int j = 0; j < 32; j++) + for (int k = 0; k < 32; k++) + { + int idx = i * 1024 + j * 32 + k; + assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k); + } +} + + +/* Test of worker-private variables declared in a local scope, broadcasting + to vector-partitioned mode. Successive vector loops. */ + +void local_w_2() +{ + int i, arr[32 * 32 * 32]; + + for (i = 0; i < 32 * 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + { + int j; + + #pragma acc loop gang + for (i = 0; i < 32; i++) + { + #pragma acc loop worker + for (j = 0; j < 32; j++) + { + int k; + int x = i ^ j * 3; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += x * k; + + x = i | j * 5; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += x * k; + } + } + } + + for (i = 0; i < 32; i++) + for (int j = 0; j < 32; j++) + for (int k = 0; k < 32; k++) + { + int idx = i * 1024 + j * 32 + k; + assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k); + } +} + + +/* Test of worker-private variables declared in a local scope, broadcasting + to vector-partitioned mode. Aggregate worker variable. */ + +void local_w_3() +{ + int i, arr[32 * 32 * 32]; + + for (i = 0; i < 32 * 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + { + int j; + + #pragma acc loop gang + for (i = 0; i < 32; i++) + { + #pragma acc loop worker + for (j = 0; j < 32; j++) + { + int k; + vec2 pt; + + pt.x = i ^ j * 3; + pt.y = i | j * 5; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += pt.x * k; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += pt.y * k; + } + } + } + + for (i = 0; i < 32; i++) + for (int j = 0; j < 32; j++) + for (int k = 0; k < 32; k++) + { + int idx = i * 1024 + j * 32 + k; + assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k); + } +} + + +/* Test of worker-private variables declared in a local scope, broadcasting + to vector-partitioned mode. Addressable worker variable. */ + +void local_w_4() +{ + int i, arr[32 * 32 * 32]; + + for (i = 0; i < 32 * 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + { + int j; + + #pragma acc loop gang + for (i = 0; i < 32; i++) + { + #pragma acc loop worker + for (j = 0; j < 32; j++) + { + int k; + vec2 pt, *ptp; + + ptp = &pt; + + pt.x = i ^ j * 3; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += ptp->x * k; + + ptp->y = i | j * 5; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += pt.y * k; + } + } + } + + for (i = 0; i < 32; i++) + for (int j = 0; j < 32; j++) + for (int k = 0; k < 32; k++) + { + int idx = i * 1024 + j * 32 + k; + assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k); + } +} + + +/* Test of worker-private variables declared in a local scope, broadcasting + to vector-partitioned mode. Array worker variable. */ + +void local_w_5() +{ + int i, arr[32 * 32 * 32]; + + for (i = 0; i < 32 * 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + { + int j; + + #pragma acc loop gang + for (i = 0; i < 32; i++) + { + #pragma acc loop worker + for (j = 0; j < 32; j++) + { + int k; + int pt[2]; + + pt[0] = i ^ j * 3; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += pt[0] * k; + + pt[1] = i | j * 5; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += pt[1] * k; + } + } + } + + for (i = 0; i < 32; i++) + for (int j = 0; j < 32; j++) + for (int k = 0; k < 32; k++) + { + int idx = i * 1024 + j * 32 + k; + assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k); + } +} + + +/* Test of gang-private variables declared on loop directive. */ + +void loop_g_1() +{ + int x = 5, i, arr[32]; + + for (i = 0; i < 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + { + #pragma acc loop gang private(x) + for (i = 0; i < 32; i++) + { + x = i * 2; + arr[i] += x; + } + } + + for (i = 0; i < 32; i++) + assert (arr[i] == i * 3); +} + + +/* Test of gang-private variables declared on loop directive, with broadcasting + to partitioned workers. */ + +void loop_g_2() +{ + int x = 5, i, arr[32 * 32]; + + for (i = 0; i < 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + { + #pragma acc loop gang private(x) + for (i = 0; i < 32; i++) + { + x = i * 2; + + #pragma acc loop worker + for (int j = 0; j < 32; j++) + arr[i * 32 + j] += x; + } + } + + for (i = 0; i < 32 * 32; i++) + assert (arr[i] == i + (i / 32) * 2); +} + + +/* Test of gang-private variables declared on loop directive, with broadcasting + to partitioned vectors. */ + +void loop_g_3() +{ + int x = 5, i, arr[32 * 32]; + + for (i = 0; i < 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + { + #pragma acc loop gang private(x) + for (i = 0; i < 32; i++) + { + x = i * 2; + + #pragma acc loop vector + for (int j = 0; j < 32; j++) + arr[i * 32 + j] += x; + } + } + + for (i = 0; i < 32 * 32; i++) + assert (arr[i] == i + (i / 32) * 2); +} + + +/* Test of gang-private addressable variable declared on loop directive, with + broadcasting to partitioned workers. */ + +void loop_g_4() +{ + int x = 5, i, arr[32 * 32]; + + for (i = 0; i < 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + { + #pragma acc loop gang private(x) + for (i = 0; i < 32; i++) + { + int *p = &x; + + x = i * 2; + + #pragma acc loop worker + for (int j = 0; j < 32; j++) + arr[i * 32 + j] += x; + + (*p)--; + } + } + + for (i = 0; i < 32 * 32; i++) + assert (arr[i] == i + (i / 32) * 2); +} + + +/* Test of gang-private array variable declared on loop directive, with + broadcasting to partitioned workers. */ + +void loop_g_5() +{ + int x[8], i, arr[32 * 32]; + + for (i = 0; i < 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + { + #pragma acc loop gang private(x) + for (i = 0; i < 32; i++) + { + for (int j = 0; j < 8; j++) + x[j] = j * 2; + + #pragma acc loop worker + for (int j = 0; j < 32; j++) + arr[i * 32 + j] += x[j % 8]; + } + } + + for (i = 0; i < 32 * 32; i++) + assert (arr[i] == i + (i % 8) * 2); +} + + +/* Test of gang-private aggregate variable declared on loop directive, with + broadcasting to partitioned workers. */ + +void loop_g_6() +{ + int i, arr[32 * 32]; + vec3_attr pt; + + for (i = 0; i < 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + { + #pragma acc loop gang private(pt) + for (i = 0; i < 32; i++) + { + pt.x = i; + pt.y = i * 2; + pt.z = i * 4; + pt.attr[5] = i * 6; + + #pragma acc loop worker + for (int j = 0; j < 32; j++) + arr[i * 32 + j] += pt.x + pt.y + pt.z + pt.attr[5]; + } + } + + for (i = 0; i < 32 * 32; i++) + assert (arr[i] == i + (i / 32) * 13); +} + + +/* Test of vector-private variables declared on loop directive. */ + +void loop_v_1() +{ + int x, i, arr[32 * 32 * 32]; + + for (i = 0; i < 32 * 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + { + int j; + + #pragma acc loop gang + for (i = 0; i < 32; i++) + { + #pragma acc loop worker + for (j = 0; j < 32; j++) + { + int k; + + #pragma acc loop vector private(x) + for (k = 0; k < 32; k++) + { + x = i ^ j * 3; + arr[i * 1024 + j * 32 + k] += x * k; + } + + #pragma acc loop vector private(x) + for (k = 0; k < 32; k++) + { + x = i | j * 5; + arr[i * 1024 + j * 32 + k] += x * k; + } + } + } + } + + for (i = 0; i < 32; i++) + for (int j = 0; j < 32; j++) + for (int k = 0; k < 32; k++) + { + int idx = i * 1024 + j * 32 + k; + assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k); + } +} + + +/* Test of vector-private variables declared on loop directive. Array type. */ + +void loop_v_2() +{ + int pt[2], i, arr[32 * 32 * 32]; + + for (i = 0; i < 32 * 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + { + int j; + + #pragma acc loop gang + for (i = 0; i < 32; i++) + { + #pragma acc loop worker + for (j = 0; j < 32; j++) + { + int k; + + #pragma acc loop vector private(pt) + for (k = 0; k < 32; k++) + { + pt[0] = i ^ j * 3; + pt[1] = i | j * 5; + arr[i * 1024 + j * 32 + k] += pt[0] * k; + arr[i * 1024 + j * 32 + k] += pt[1] * k; + } + } + } + } + + for (i = 0; i < 32; i++) + for (int j = 0; j < 32; j++) + for (int k = 0; k < 32; k++) + { + int idx = i * 1024 + j * 32 + k; + assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k); + } +} + + +/* Test of worker-private variables declared on a loop directive. */ + +void loop_w_1() +{ + int x = 5, i, arr[32 * 32]; + + for (i = 0; i < 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32) + { + int j; + + #pragma acc loop gang + for (i = 0; i < 32; i++) + { + #pragma acc loop worker private(x) + for (j = 0; j < 32; j++) + { + x = i ^ j * 3; + /* Try to ensure 'x' accesses doesn't get optimized into a + temporary. */ + __asm__ __volatile__ (""); + arr[i * 32 + j] += x; + } + } + } + + for (i = 0; i < 32 * 32; i++) + assert (arr[i] == i + ((i / 32) ^ (i % 32) * 3)); +} + + +/* Test of worker-private variables declared on a loop directive, broadcasting + to vector-partitioned mode. */ + +void loop_w_2() +{ + int x = 5, i, arr[32 * 32 * 32]; + + for (i = 0; i < 32 * 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + { + int j; + + #pragma acc loop gang + for (i = 0; i < 32; i++) + { + #pragma acc loop worker private(x) + for (j = 0; j < 32; j++) + { + int k; + x = i ^ j * 3; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += x * k; + } + } + } + + for (i = 0; i < 32; i++) + for (int j = 0; j < 32; j++) + for (int k = 0; k < 32; k++) + { + int idx = i * 1024 + j * 32 + k; + assert (arr[idx] == idx + (i ^ j * 3) * k); + } +} + + +/* Test of worker-private variables declared on a loop directive, broadcasting + to vector-partitioned mode. Back-to-back worker loops. */ + +void loop_w_3() +{ + int x = 5, i, arr[32 * 32 * 32]; + + for (i = 0; i < 32 * 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + { + int j; + + #pragma acc loop gang + for (i = 0; i < 32; i++) + { + #pragma acc loop worker private(x) + for (j = 0; j < 32; j++) + { + int k; + x = i ^ j * 3; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += x * k; + } + + #pragma acc loop worker private(x) + for (j = 0; j < 32; j++) + { + int k; + x = i | j * 5; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += x * k; + } + } + } + + for (i = 0; i < 32; i++) + for (int j = 0; j < 32; j++) + for (int k = 0; k < 32; k++) + { + int idx = i * 1024 + j * 32 + k; + assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k); + } +} + + +/* Test of worker-private variables declared on a loop directive, broadcasting + to vector-partitioned mode. Successive vector loops. */ + +void loop_w_4() +{ + int x = 5, i, arr[32 * 32 * 32]; + + for (i = 0; i < 32 * 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + { + int j; + + #pragma acc loop gang + for (i = 0; i < 32; i++) + { + #pragma acc loop worker private(x) + for (j = 0; j < 32; j++) + { + int k; + x = i ^ j * 3; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += x * k; + + x = i | j * 5; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += x * k; + } + } + } + + for (i = 0; i < 32; i++) + for (int j = 0; j < 32; j++) + for (int k = 0; k < 32; k++) + { + int idx = i * 1024 + j * 32 + k; + assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k); + } +} + + +/* Test of worker-private variables declared on a loop directive, broadcasting + to vector-partitioned mode. Addressable worker variable. */ + +void loop_w_5() +{ + int x = 5, i, arr[32 * 32 * 32]; + + for (i = 0; i < 32 * 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + { + int j; + + #pragma acc loop gang + for (i = 0; i < 32; i++) + { + #pragma acc loop worker private(x) + for (j = 0; j < 32; j++) + { + int k; + int *p = &x; + + x = i ^ j * 3; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += x * k; + + *p = i | j * 5; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += x * k; + } + } + } + + for (i = 0; i < 32; i++) + for (int j = 0; j < 32; j++) + for (int k = 0; k < 32; k++) + { + int idx = i * 1024 + j * 32 + k; + assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k); + } +} + + +/* Test of worker-private variables declared on a loop directive, broadcasting + to vector-partitioned mode. Aggregate worker variable. */ + +void loop_w_6() +{ + int i, arr[32 * 32 * 32]; + vec2 pt; + + for (i = 0; i < 32 * 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + { + int j; + + #pragma acc loop gang + for (i = 0; i < 32; i++) + { + #pragma acc loop worker private(pt) + for (j = 0; j < 32; j++) + { + int k; + + pt.x = i ^ j * 3; + pt.y = i | j * 5; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += pt.x * k; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += pt.y * k; + } + } + } + + for (i = 0; i < 32; i++) + for (int j = 0; j < 32; j++) + for (int k = 0; k < 32; k++) + { + int idx = i * 1024 + j * 32 + k; + assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k); + } +} + + +/* Test of worker-private variables declared on loop directive, broadcasting + to vector-partitioned mode. Array worker variable. */ + +void loop_w_7() +{ + int i, arr[32 * 32 * 32]; + int pt[2]; + + for (i = 0; i < 32 * 32 * 32; i++) + arr[i] = i; + + /* "pt" is treated as "present_or_copy" on the parallel directive because it + is an array variable. */ + #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32) + { + int j; + + #pragma acc loop gang + for (i = 0; i < 32; i++) + { + /* But here, it is made private per-worker. */ + #pragma acc loop worker private(pt) + for (j = 0; j < 32; j++) + { + int k; + + pt[0] = i ^ j * 3; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += pt[0] * k; + + pt[1] = i | j * 5; + + #pragma acc loop vector + for (k = 0; k < 32; k++) + arr[i * 1024 + j * 32 + k] += pt[1] * k; + } + } + } + + for (i = 0; i < 32; i++) + for (int j = 0; j < 32; j++) + for (int k = 0; k < 32; k++) + { + int idx = i * 1024 + j * 32 + k; + assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k); + } +} + + +/* Test of gang-private variables declared on the parallel directive. */ + +void parallel_g_1() +{ + int x = 5, i, arr[32]; + + for (i = 0; i < 32; i++) + arr[i] = 3; + + #pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(8) vector_length(32) + { + #pragma acc loop gang(static:1) + for (i = 0; i < 32; i++) + x = i * 2; + + #pragma acc loop gang(static:1) + for (i = 0; i < 32; i++) + { + if (acc_on_device (acc_device_host)) + x = i * 2; + arr[i] += x; + } + } + + for (i = 0; i < 32; i++) + assert (arr[i] == 3 + i * 2); +} + + +/* Test of gang-private array variable declared on the parallel directive. */ + +void parallel_g_2() +{ + int x[32], i, arr[32 * 32]; + + for (i = 0; i < 32 * 32; i++) + arr[i] = i; + + #pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(2) vector_length(32) + { + #pragma acc loop gang + for (i = 0; i < 32; i++) + { + int j; + for (j = 0; j < 32; j++) + x[j] = j * 2; + + #pragma acc loop worker + for (j = 0; j < 32; j++) + arr[i * 32 + j] += x[31 - j]; + } + } + + for (i = 0; i < 32 * 32; i++) + assert (arr[i] == i + (31 - (i % 32)) * 2); +} + + +int main () +{ + local_g_1(); + local_w_1(); + local_w_2(); + local_w_3(); + local_w_4(); + local_w_5(); + loop_g_1(); + loop_g_2(); + loop_g_3(); + loop_g_4(); + loop_g_5(); + loop_g_6(); + loop_v_1(); + loop_v_2(); + loop_w_1(); + loop_w_2(); + loop_w_3(); + loop_w_4(); + loop_w_5(); + loop_w_6(); + loop_w_7(); + parallel_g_1(); + parallel_g_2(); + + return 0; +} |