summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2015-11-02 23:15:42 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2015-11-02 23:15:42 +0000
commit1cb415f43ec27ed2b4b0d7edb44db0289c7c2585 (patch)
tree39171aa1ea32f1b5374653f358097e685da140c4 /llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
parent8d15686c55944ef1e102ceec06e31fad93e25da0 (diff)
AMDGPU: Distribute SGPR->VGPR copies of REG_SEQUENCE
Make the REG_SEQUENCE be a VGPR, and do the register class copy first.
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp112
1 files changed, 89 insertions, 23 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index b6941c1cc75..89a1372fb13 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -85,18 +85,6 @@ class SIFixSGPRCopies : public MachineFunctionPass {
private:
static char ID;
- std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
- getCopyRegClasses(const MachineInstr &Copy,
- const SIRegisterInfo &TRI,
- const MachineRegisterInfo &MRI) const;
-
- bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
- const TargetRegisterClass *DstRC,
- const SIRegisterInfo &TRI) const;
-
- bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
- const TargetRegisterClass *DstRC,
- const SIRegisterInfo &TRI) const;
public:
SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
@@ -134,10 +122,10 @@ static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
return false;
}
-std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
-SIFixSGPRCopies::getCopyRegClasses(const MachineInstr &Copy,
- const SIRegisterInfo &TRI,
- const MachineRegisterInfo &MRI) const {
+static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
+getCopyRegClasses(const MachineInstr &Copy,
+ const SIRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI) {
unsigned DstReg = Copy.getOperand(0).getReg();
unsigned SrcReg = Copy.getOperand(1).getReg();
@@ -157,18 +145,94 @@ SIFixSGPRCopies::getCopyRegClasses(const MachineInstr &Copy,
return std::make_pair(SrcRC, DstRC);
}
-bool SIFixSGPRCopies::isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
- const TargetRegisterClass *DstRC,
- const SIRegisterInfo &TRI) const {
+static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
+ const TargetRegisterClass *DstRC,
+ const SIRegisterInfo &TRI) {
return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
}
-bool SIFixSGPRCopies::isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
- const TargetRegisterClass *DstRC,
- const SIRegisterInfo &TRI) const {
+static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
+ const TargetRegisterClass *DstRC,
+ const SIRegisterInfo &TRI) {
return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
}
+// Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
+//
+// SGPRx = ...
+// SGPRy = REG_SEQUENCE SGPRx, sub0 ...
+// VGPRz = COPY SGPRy
+//
+// ==>
+//
+// VGPRx = COPY SGPRx
+// VGPRz = REG_SEQUENCE VGPRx, sub0
+//
+// This exposes immediate folding opportunities when materializing 64-bit
+// immediates.
+static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
+ const SIRegisterInfo *TRI,
+ const SIInstrInfo *TII,
+ MachineRegisterInfo &MRI) {
+ assert(MI.isRegSequence());
+
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
+ return false;
+
+ if (!MRI.hasOneUse(DstReg))
+ return false;
+
+ MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
+ if (!CopyUse.isCopy())
+ return false;
+
+ const TargetRegisterClass *SrcRC, *DstRC;
+ std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
+
+ if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
+ return false;
+
+ // TODO: Could have multiple extracts?
+ unsigned SubReg = CopyUse.getOperand(1).getSubReg();
+ if (SubReg != AMDGPU::NoSubRegister)
+ return false;
+
+ MRI.setRegClass(DstReg, DstRC);
+
+ // SGPRx = ...
+ // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
+ // VGPRz = COPY SGPRy
+
+ // =>
+ // VGPRx = COPY SGPRx
+ // VGPRz = REG_SEQUENCE VGPRx, sub0
+
+ MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
+
+ for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
+ unsigned SrcReg = MI.getOperand(I).getReg();
+ unsigned SrcSubReg = MI.getOperand(I).getReg();
+
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
+ assert(TRI->isSGPRClass(SrcRC) &&
+ "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
+
+ SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
+ const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
+
+ unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
+
+ BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg)
+ .addOperand(MI.getOperand(I));
+
+ MI.getOperand(I).setReg(TmpReg);
+ }
+
+ CopyUse.eraseFromParent();
+ return true;
+}
+
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIRegisterInfo *TRI =
@@ -273,8 +337,10 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
}
case AMDGPU::REG_SEQUENCE: {
if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
- !hasVGPROperands(MI, TRI))
+ !hasVGPROperands(MI, TRI)) {
+ foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
continue;
+ }
DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);