aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJack Kirk <jack.kirk@codeplay.com>2022-08-05 11:41:47 -0700
committerArtem Belevich <tra@google.com>2022-08-05 12:14:06 -0700
commit3e0e5568a6a8c744d26f79a1e55360fe2655867c (patch)
tree93f06b3b63f25a3f3df1f8277491dcbfcc83e079
parent9a9848f4b95895ad97d3dc117f0a94773dc1607f (diff)
[CUDA] Fixed sm version constrain for __bmma_m8n8k128_mma_and_popc_b1.
As stated in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-wmma-mma: ".and operation in single-bit wmma requires sm_80 or higher." tra@: Fixed a bug in builtins-nvptx-mma.py test generator and regenerated the tests. Differential Revision: https://reviews.llvm.org/D131265
-rw-r--r--clang/include/clang/Basic/BuiltinsNVPTX.def2
-rw-r--r--clang/test/CodeGen/builtins-nvptx-mma.cu20
-rw-r--r--clang/test/CodeGen/builtins-nvptx-mma.py4
3 files changed, 13 insertions, 13 deletions
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index a5ec77a6112c..ea0efcef2ca5 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -853,7 +853,7 @@ TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX
TARGET_BUILTIN(__bmma_m8n8k128_ld_a_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
TARGET_BUILTIN(__bmma_m8n8k128_ld_b_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
TARGET_BUILTIN(__bmma_m8n8k128_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
-TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX71))
+TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_80,PTX71))
TARGET_BUILTIN(__bmma_m8n8k128_mma_xor_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX63))
TARGET_BUILTIN(__bmma_m8n8k128_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
TARGET_BUILTIN(__imma_m16n16k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
diff --git a/clang/test/CodeGen/builtins-nvptx-mma.cu b/clang/test/CodeGen/builtins-nvptx-mma.cu
index aaa44bcaa7e2..5375d88032b7 100644
--- a/clang/test/CodeGen/builtins-nvptx-mma.cu
+++ b/clang/test/CodeGen/builtins-nvptx-mma.cu
@@ -10,7 +10,7 @@
// RUN: -fcuda-is-device -target-feature +ptx71 \
// RUN: -DPTX=71 -DSM=80 \
// RUN: -S -emit-llvm -o - -x cuda %s \
-// RUN: | FileCheck -check-prefixes=CHECK_PTX70_SM80,CHECK_PTX60_SM70,CHECK_PTX63_SM72,CHECK_PTX61_SM70,CHECK_PTX63_SM75,CHECK_PTX71_SM75 %s
+// RUN: | FileCheck -check-prefixes=CHECK_PTX70_SM80,CHECK_PTX60_SM70,CHECK_PTX63_SM72,CHECK_PTX61_SM70,CHECK_PTX63_SM75,CHECK_PTX71_SM80 %s
// Verify that all builtins have correct constraints.
// RUN: %clang_cc1 -triple nvptx-unknown-unknown \
// RUN: -target-cpu sm_60 -target-feature +ptx42 \
@@ -167,7 +167,7 @@ __device__ void test_wmma_buitins(int *src, int *dst,
// CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32.satfinite
// expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
-#endif // (PTX >= 60) && (SM >= 70)
+#endif // (PTX >= 60) && (SM >= 70)
#if (PTX >= 61) && (SM >= 70)
@@ -435,7 +435,7 @@ __device__ void test_wmma_buitins(int *src, int *dst,
// CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f32.satfinite
// expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
__hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
-#endif // (PTX >= 61) && (SM >= 70)
+#endif // (PTX >= 61) && (SM >= 70)
#if (PTX >= 63) && (SM >= 72)
@@ -691,7 +691,7 @@ __device__ void test_wmma_buitins(int *src, int *dst,
// CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.u8.satfinite
// expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}}
__imma_m8n32k16_mma_u8(dst, src, src, src, 0, 1);
-#endif // (PTX >= 63) && (SM >= 72)
+#endif // (PTX >= 63) && (SM >= 72)
#if (PTX >= 63) && (SM >= 75)
@@ -752,7 +752,7 @@ __device__ void test_wmma_buitins(int *src, int *dst,
// CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.mma.row.col.u4.satfinite
// expected-error-re@+1 {{'__imma_m8n8k32_mma_u4' needs target feature (sm_75{{.*}},(ptx63{{.*}}}}
__imma_m8n8k32_mma_u4(dst, src, src, src, 1, 1);
-#endif // (PTX >= 63) && (SM >= 75)
+#endif // (PTX >= 63) && (SM >= 75)
#if (PTX >= 70) && (SM >= 80)
@@ -900,12 +900,12 @@ __device__ void test_wmma_buitins(int *src, int *dst,
// CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n8k4.mma.row.row.f64
// expected-error-re@+1 {{'__dmma_m8n8k4_mma_f64' needs target feature (sm_80{{.*}},(ptx70{{.*}}}}
__dmma_m8n8k4_mma_f64(ddst, dsrc, dsrc, dsrc, 0, 0);
-#endif // (PTX >= 70) && (SM >= 80)
+#endif // (PTX >= 70) && (SM >= 80)
-#if (PTX >= 71) && (SM >= 75)
+#if (PTX >= 71) && (SM >= 80)
- // CHECK_PTX71_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k128.mma.and.popc.row.col.b1
- // expected-error-re@+1 {{'__bmma_m8n8k128_mma_and_popc_b1' needs target feature (sm_75{{.*}},(ptx71{{.*}}}}
+ // CHECK_PTX71_SM80: call {{.*}} @llvm.nvvm.wmma.m8n8k128.mma.and.popc.row.col.b1
+ // expected-error-re@+1 {{'__bmma_m8n8k128_mma_and_popc_b1' needs target feature (sm_80{{.*}},(ptx71{{.*}}}}
__bmma_m8n8k128_mma_and_popc_b1(dst, src, src, src, 1);
-#endif // (PTX >= 71) && (SM >= 75)
+#endif // (PTX >= 71) && (SM >= 80)
}
diff --git a/clang/test/CodeGen/builtins-nvptx-mma.py b/clang/test/CodeGen/builtins-nvptx-mma.py
index 6c0991002027..baadc7e1a742 100644
--- a/clang/test/CodeGen/builtins-nvptx-mma.py
+++ b/clang/test/CodeGen/builtins-nvptx-mma.py
@@ -202,7 +202,7 @@ def get_required_sm(frag, b1op=""):
if frag.ptx_type in ["f64", "bf16", "tf32"]:
return 80
if frag.ptx_type in ["u4", "s4", "b1"]:
- if b1op == "_and_popc":
+ if b1op == ".and.popc":
return 80
return 75
if frag.ptx_type in ["s8", "u8"]:
@@ -409,7 +409,7 @@ __device__ void test_wmma_buitins(int *src, int *dst,
print()
print("#if (PTX >= %d) && (SM >= %d)" % (ptx, sm))
print(tests)
- print("#endif // (PTX >= %d) && (SM >= %d) "% (ptx, sm))
+ print("#endif // (PTX >= %d) && (SM >= %d)"% (ptx, sm))
print("}")