summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <tstellar@redhat.com>2018-10-08 17:49:29 +0000
committerTom Stellard <tstellar@redhat.com>2018-10-08 17:49:29 +0000
commita4790818ffb4964c671adb80ce7a7b7a098363a6 (patch)
tree88cb22d27bb920bf7e6a5a0f6277d8f869a750ba
parent125a52731ce604caabc379325f5cb498de0b9072 (diff)
AMDGPU/GlobalISel: Select amdgcn.cvt.pkrtz to 64-bit instructions
Summary: The 32-bit variants do not exist on VI+. Reviewers: arsenm Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D52958
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGISel.td5
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir7
2 files changed, 6 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index ba735390f67..59bb2a16e0f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -122,15 +122,14 @@ def : GISelVop2CommutePat <sra, V_ASHRREV_I32_e32, i32>;
}
def : GISelVop3Pat2CommutePat <sra, V_ASHRREV_I32_e64, i32>;
-// FIXME: Select directly to _e32 so we don't need to deal with modifiers.
// FIXME: We can't re-use SelectionDAG patterns here because they match
// against a custom SDNode and we would need to create a generic machine
// instruction that is equivalent to the custom SDNode. This would also require
// us to custom legalize the intrinsic to the new generic machine instruction,
// but I can't get custom legalizing of intrinsic to work and I'm not sure if
// this is even supported yet.
-defm : GISelVop2IntrPat <
- int_amdgcn_cvt_pkrtz, V_CVT_PKRTZ_F16_F32_e32, v2f16, f32>;
+def : GISelVop3Pat2ModsPat <
+ int_amdgcn_cvt_pkrtz, V_CVT_PKRTZ_F16_F32_e64, v2f16, f32>;
defm : GISelVop2IntrPat <int_maxnum, V_MAX_F32_e32, f32>;
def : GISelVop3Pat2ModsPat <int_maxnum, V_MAX_F64, f64>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir
index 852c20834e7..f1c19f124ff 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir
@@ -1,4 +1,5 @@
# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
--- |
define void @cvt_pkrtz(i32 addrspace(1)* %global0) { ret void }
@@ -23,15 +24,15 @@ body: |
%3:vgpr(s64) = COPY $vgpr3_vgpr4
; cvt_pkrtz vs
- ; GCN: V_CVT_PKRTZ_F16_F32_e32 [[SGPR0]], [[VGPR0]]
+ ; GCN: V_CVT_PKRTZ_F16_F32_e64 0, [[VGPR0]], 0, [[SGPR0]]
%4:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %1, %0
; cvt_pkrtz sv
- ; GCN: V_CVT_PKRTZ_F16_F32_e32 [[SGPR0]], [[VGPR0]]
+ ; GCN: V_CVT_PKRTZ_F16_F32_e64 0, [[SGPR0]], 0, [[VGPR0]]
%5:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1
; cvt_pkrtz vv
- ; GCN: V_CVT_PKRTZ_F16_F32_e32 [[VGPR0]], [[VGPR1]]
+ ; GCN: V_CVT_PKRTZ_F16_F32_e64 0, [[VGPR0]], 0, [[VGPR1]]
%6:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %1, %2
%7:vgpr(s32) = G_BITCAST %4