diff options
Diffstat (limited to 'libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c')
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c | 223 |
1 files changed, 223 insertions, 0 deletions
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c new file mode 100644 index 00000000000..6a52f746dcb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c @@ -0,0 +1,223 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <cuda.h> +#include <cuda_runtime_api.h> +#include <cublas_v2.h> +#include <openacc.h> + +void +saxpy (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + { + y[i] = a * x[i] + y[i]; + } +} + +void +context_check (CUcontext ctx1) +{ + CUcontext ctx2, ctx3; + CUresult r; + + r = cuCtxGetCurrent (&ctx2); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (ctx1 != ctx2) + { + fprintf (stderr, "new context established\n"); + exit (EXIT_FAILURE); + } + + ctx3 = (CUcontext) acc_get_current_cuda_context (); + + if (ctx1 != ctx3) + { + fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n"); + exit (EXIT_FAILURE); + } + + return; +} + +int +main (int argc, char **argv) +{ + cublasStatus_t s; + cudaError_t e; + cublasHandle_t h; + CUcontext pctx, ctx; + CUresult r; + int dev; + int i; + const int N = 256; + float *h_X, *h_Y1, *h_Y2; + float *d_X,*d_Y; + float alpha = 2.0f; + float error_norm; + float ref_norm; + + /* Test 2 - cuBLAS creates, OpenACC shares. */ + + s = cublasCreate (&h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasCreate failed: %d\n", s); + exit (EXIT_FAILURE); + } + + r = cuCtxGetCurrent (&pctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + e = cudaGetDevice (&dev); + if (e != cudaSuccess) + { + fprintf (stderr, "cudaGetDevice failed: %d\n", e); + exit (EXIT_FAILURE); + } + + acc_set_device_num (dev, acc_device_nvidia); + + h_X = (float *) malloc (N * sizeof (float)); + if (h_X == 0) + { + fprintf (stderr, "malloc failed: for h_X\n"); + exit (EXIT_FAILURE); + } + + h_Y1 = (float *) malloc (N * sizeof (float)); + if (h_Y1 == 0) + { + fprintf (stderr, "malloc failed: for h_Y1\n"); + exit (EXIT_FAILURE); + } + + h_Y2 = (float *) malloc (N * sizeof (float)); + if (h_Y2 == 0) + { + fprintf (stderr, "malloc failed: for h_Y2\n"); + exit (EXIT_FAILURE); + } + + for (i = 0; i < N; i++) + { + h_X[i] = rand () / (float) RAND_MAX; + h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX; + } + + d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float)); + if (d_X == NULL) + { + fprintf (stderr, "copyin error h_X\n"); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float)); + if (d_Y == NULL) + { + fprintf (stderr, "copyin error h_Y1\n"); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasSaxpy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float)); + + context_check (pctx); + +#pragma acc parallel present (h_X[0:N]), copy (h_Y2[0:N]) copyin (alpha) + { + int i; + + for (i = 0; i < N; i++) + { + h_Y2[i] = alpha * h_X[i] + h_Y2[i]; + } + } + + context_check (pctx); + + error_norm = 0; + ref_norm = 0; + + for (i = 0; i < N; ++i) + { + float diff; + + diff = h_Y1[i] - h_Y2[i]; + error_norm += diff * diff; + ref_norm += h_Y2[i] * h_Y2[i]; + } + + error_norm = (float) sqrt ((double) error_norm); + ref_norm = (float) sqrt ((double) ref_norm); + + if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f)) + { + fprintf (stderr, "math error\n"); + exit (EXIT_FAILURE); + } + + free (h_X); + free (h_Y1); + free (h_Y2); + + acc_free (d_X); + acc_free (d_Y); + + context_check (pctx); + + s = cublasDestroy (h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasDestroy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + acc_shutdown (acc_device_nvidia); + + r = cuCtxGetCurrent (&ctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (!ctx) + { + fprintf (stderr, "Expected context\n"); + exit (EXIT_FAILURE); + } + + if (pctx != ctx) + { + fprintf (stderr, "Unexpected new context\n"); + exit (EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} |