diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-11-02 23:15:42 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-11-02 23:15:42 +0000 |
commit | 1cb415f43ec27ed2b4b0d7edb44db0289c7c2585 (patch) | |
tree | 39171aa1ea32f1b5374653f358097e685da140c4 /llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | |
parent | 8d15686c55944ef1e102ceec06e31fad93e25da0 (diff) |
AMDGPU: Distribute SGPR->VGPR copies of REG_SEQUENCE
Make the REG_SEQUENCE be a VGPR, and do the register class
copy first.
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 112 |
1 files changed, 89 insertions, 23 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index b6941c1cc75..89a1372fb13 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -85,18 +85,6 @@ class SIFixSGPRCopies : public MachineFunctionPass { private: static char ID; - std::pair<const TargetRegisterClass *, const TargetRegisterClass *> - getCopyRegClasses(const MachineInstr &Copy, - const SIRegisterInfo &TRI, - const MachineRegisterInfo &MRI) const; - - bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC, - const TargetRegisterClass *DstRC, - const SIRegisterInfo &TRI) const; - - bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, - const TargetRegisterClass *DstRC, - const SIRegisterInfo &TRI) const; public: SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { } @@ -134,10 +122,10 @@ static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) { return false; } -std::pair<const TargetRegisterClass *, const TargetRegisterClass *> -SIFixSGPRCopies::getCopyRegClasses(const MachineInstr &Copy, - const SIRegisterInfo &TRI, - const MachineRegisterInfo &MRI) const { +static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> +getCopyRegClasses(const MachineInstr &Copy, + const SIRegisterInfo &TRI, + const MachineRegisterInfo &MRI) { unsigned DstReg = Copy.getOperand(0).getReg(); unsigned SrcReg = Copy.getOperand(1).getReg(); @@ -157,18 +145,94 @@ SIFixSGPRCopies::getCopyRegClasses(const MachineInstr &Copy, return std::make_pair(SrcRC, DstRC); } -bool SIFixSGPRCopies::isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC, - const TargetRegisterClass *DstRC, - const SIRegisterInfo &TRI) const { +static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const SIRegisterInfo &TRI) { return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC); } -bool SIFixSGPRCopies::isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, - const TargetRegisterClass *DstRC, - const SIRegisterInfo &TRI) const { +static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const SIRegisterInfo &TRI) { return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC); } +// Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE. +// +// SGPRx = ... +// SGPRy = REG_SEQUENCE SGPRx, sub0 ... +// VGPRz = COPY SGPRy +// +// ==> +// +// VGPRx = COPY SGPRx +// VGPRz = REG_SEQUENCE VGPRx, sub0 +// +// This exposes immediate folding opportunities when materializing 64-bit +// immediates. +static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, + const SIRegisterInfo *TRI, + const SIInstrInfo *TII, + MachineRegisterInfo &MRI) { + assert(MI.isRegSequence()); + + unsigned DstReg = MI.getOperand(0).getReg(); + if (!TRI->isSGPRClass(MRI.getRegClass(DstReg))) + return false; + + if (!MRI.hasOneUse(DstReg)) + return false; + + MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg); + if (!CopyUse.isCopy()) + return false; + + const TargetRegisterClass *SrcRC, *DstRC; + std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI); + + if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) + return false; + + // TODO: Could have multiple extracts? + unsigned SubReg = CopyUse.getOperand(1).getSubReg(); + if (SubReg != AMDGPU::NoSubRegister) + return false; + + MRI.setRegClass(DstReg, DstRC); + + // SGPRx = ... + // SGPRy = REG_SEQUENCE SGPRx, sub0 ... + // VGPRz = COPY SGPRy + + // => + // VGPRx = COPY SGPRx + // VGPRz = REG_SEQUENCE VGPRx, sub0 + + MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg()); + + for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { + unsigned SrcReg = MI.getOperand(I).getReg(); + unsigned SrcSubReg = MI.getOperand(I).getReg(); + + const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); + assert(TRI->isSGPRClass(SrcRC) && + "Expected SGPR REG_SEQUENCE to only have SGPR inputs"); + + SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg); + const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC); + + unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC); + + BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg) + .addOperand(MI.getOperand(I)); + + MI.getOperand(I).setReg(TmpReg); + } + + CopyUse.eraseFromParent(); + return true; +} + bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIRegisterInfo *TRI = @@ -273,8 +337,10 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { } case AMDGPU::REG_SEQUENCE: { if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) || - !hasVGPROperands(MI, TRI)) + !hasVGPROperands(MI, TRI)) { + foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI); continue; + } DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI); |