summaryrefslogtreecommitdiff
path: root/openmp
diff options
context:
space:
mode:
authorJonas Hahnfeld <hahnjo@hahnjo.de>2018-09-30 09:23:14 +0000
committerJonas Hahnfeld <hahnjo@hahnjo.de>2018-09-30 09:23:14 +0000
commitab1137e3f9cbf82ab0ec08f06bb039f0f4c6735d (patch)
treeee677aefdc4e2a22ca3395d6d556bde1bf43bc2f /openmp
parent477b14eb8cb3616d50911c3f840e504284fb1451 (diff)
[libomptarget-nvptx] Fix ancestor_thread_num and team_size (non-SPMD)
According to OpenMP 4.5, p250:12-14: If the requested nest level is outside the range of 0 and the nest level of the current thread, as returned by the omp_get_level routine, the routine returns -1. The SPMD code path will need a similar fix. Differential Revision: https://reviews.llvm.org/D51787
Diffstat (limited to 'openmp')
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu12
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/test/parallel/level.c84
2 files changed, 85 insertions, 11 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
index 352a4f2f0b4..ea9225db9b9 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
@@ -202,8 +202,10 @@ EXTERN int omp_get_ancestor_thread_num(int level) {
"Expected SPMD mode only with uninitialized runtime.");
return level == 1 ? GetThreadIdInBlock() : 0;
}
- int rc = 0; // default at level 0
- if (level >= 0) {
+ int rc = -1;
+ if (level == 0) {
+ rc = 0;
+ } else if (level > 0) {
int totLevel = omp_get_level();
if (level <= totLevel) {
omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor();
@@ -249,8 +251,10 @@ EXTERN int omp_get_team_size(int level) {
"Expected SPMD mode only with uninitialized runtime.");
return level == 1 ? GetNumberOfThreadsInBlock() : 1;
}
- int rc = 1; // default at level 0
- if (level >= 0) {
+ int rc = -1;
+ if (level == 0) {
+ rc = 1;
+ } else if (level > 0) {
int totLevel = omp_get_level();
if (level <= totLevel) {
omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor();
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/test/parallel/level.c b/openmp/libomptarget/deviceRTLs/nvptx/test/parallel/level.c
index aee95b33473..edb00e09e60 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/test/parallel/level.c
+++ b/openmp/libomptarget/deviceRTLs/nvptx/test/parallel/level.c
@@ -8,17 +8,39 @@ const int NumThreads = 64;
int main(int argc, char *argv[]) {
int level = -1, activeLevel = -1;
+ // The expected value is -1, initialize to different value.
+ int ancestorTNumNeg = 1, teamSizeNeg = 1;
+ int ancestorTNum0 = -1, teamSize0 = -1;
+ // The expected value is -1, initialize to different value.
+ int ancestorTNum1 = 1, teamSize1 = 1;
int check1[MaxThreads];
int check2[MaxThreads];
+ int check3[MaxThreads];
+ int check4[MaxThreads];
for (int i = 0; i < MaxThreads; i++) {
- check1[i] = check2[i] = 0;
+ check1[i] = check2[i] = check3[i] = check4[i] = 0;
}
- #pragma omp target map(level, activeLevel, check1[:], check2[:])
+ #pragma omp target map(level, activeLevel, ancestorTNumNeg, teamSizeNeg) \
+ map(ancestorTNum0, teamSize0, ancestorTNum1, teamSize1) \
+ map(check1[:], check2[:], check3[:], check4[:])
{
level = omp_get_level();
activeLevel = omp_get_active_level();
+ // Expected to return -1.
+ ancestorTNumNeg = omp_get_ancestor_thread_num(-1);
+ teamSizeNeg = omp_get_team_size(-1);
+
+ // Expected to return 0 and 1.
+ ancestorTNum0 = omp_get_ancestor_thread_num(0);
+ teamSize0 = omp_get_team_size(0);
+
+ // Expected to return -1 because the requested level is larger than
+ // the nest level.
+ ancestorTNum1 = omp_get_ancestor_thread_num(1);
+ teamSize1 = omp_get_team_size(1);
+
// Expecting active parallel region.
#pragma omp parallel num_threads(NumThreads)
{
@@ -27,24 +49,52 @@ int main(int argc, char *argv[]) {
// passes if both API calls return wrong values.
check1[id] += omp_get_level() * 5 + omp_get_active_level();
+ // Expected to return 0 and 1.
+ check2[id] += omp_get_ancestor_thread_num(0) + 5 * omp_get_team_size(0);
+ // Expected to return the current thread num.
+ check2[id] += (omp_get_ancestor_thread_num(1) - id);
+ // Exepcted to return the current number of threads.
+ check2[id] += 3 * omp_get_team_size(1);
+ // Expected to return -1, see above.
+ check2[id] += omp_get_ancestor_thread_num(2) + omp_get_team_size(2);
+
// Expecting serialized parallel region.
#pragma omp parallel
{
#pragma omp atomic
- check2[id] += omp_get_level() * 5 + omp_get_active_level();
+ check3[id] += omp_get_level() * 5 + omp_get_active_level();
+
+ // Expected to return 0 and 1.
+ int check4Inc = omp_get_ancestor_thread_num(0) + 5 * omp_get_team_size(0);
+ // Expected to return the parent thread num.
+ check4Inc += (omp_get_ancestor_thread_num(1) - id);
+ // Exepcted to return the number of threads in the active parallel region.
+ check4Inc += 3 * omp_get_team_size(1);
+ // Exptected to return 0 and 1.
+ check4Inc += omp_get_ancestor_thread_num(2) + 3 * omp_get_team_size(2);
+ // Expected to return -1, see above.
+ check4Inc += omp_get_ancestor_thread_num(3) + omp_get_team_size(3);
+
+ #pragma omp atomic
+ check4[id] += check4Inc;
}
}
}
// CHECK: target: level = 0, activeLevel = 0
printf("target: level = %d, activeLevel = %d\n", level, activeLevel);
+ // CHECK: level = -1: ancestorTNum = -1, teamSize = -1
+ printf("level = -1: ancestorTNum = %d, teamSize = %d\n", ancestorTNumNeg, teamSizeNeg);
+ // CHECK: level = 0: ancestorTNum = 0, teamSize = 1
+ printf("level = 0: ancestorTNum = %d, teamSize = %d\n", ancestorTNum0, teamSize0);
+ // CHECK: level = 1: ancestorTNum = -1, teamSize = -1
+ printf("level = 1: ancestorTNum = %d, teamSize = %d\n", ancestorTNum1, teamSize1);
// CHECK-NOT: invalid
for (int i = 0; i < MaxThreads; i++) {
// Check active parallel region:
// omp_get_level() = 1, omp_get_active_level() = 1
const int Expected1 = 6;
-
if (i < NumThreads) {
if (check1[i] != Expected1) {
printf("invalid: check1[%d] should be %d, is %d\n", i, Expected1, check1[i]);
@@ -53,9 +103,8 @@ int main(int argc, char *argv[]) {
printf("invalid: check1[%d] should be 0, is %d\n", i, check1[i]);
}
- // Check serialized parallel region:
- // omp_get_level() = 2, omp_get_active_level() = 1
- const int Expected2 = 11;
+ // 5 * 1 + 3 * 64 - 1 - 1 (see above)
+ const int Expected2 = 195;
if (i < NumThreads) {
if (check2[i] != Expected2) {
printf("invalid: check2[%d] should be %d, is %d\n", i, Expected2, check2[i]);
@@ -63,6 +112,27 @@ int main(int argc, char *argv[]) {
} else if (check2[i] != 0) {
printf("invalid: check2[%d] should be 0, is %d\n", i, check2[i]);
}
+
+ // Check serialized parallel region:
+ // omp_get_level() = 2, omp_get_active_level() = 1
+ const int Expected3 = 11;
+ if (i < NumThreads) {
+ if (check3[i] != Expected3) {
+ printf("invalid: check3[%d] should be %d, is %d\n", i, Expected3, check3[i]);
+ }
+ } else if (check3[i] != 0) {
+ printf("invalid: check3[%d] should be 0, is %d\n", i, check3[i]);
+ }
+
+ // 5 * 1 + 3 * 64 + 3 * 1 - 1 - 1 (see above)
+ const int Expected4 = 198;
+ if (i < NumThreads) {
+ if (check4[i] != Expected4) {
+ printf("invalid: check4[%d] should be %d, is %d\n", i, Expected4, check4[i]);
+ }
+ } else if (check4[i] != 0) {
+ printf("invalid: check4[%d] should be 0, is %d\n", i, check4[i]);
+ }
}
return 0;