summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNemanja Ivanovic <nemanja.i.ibm@gmail.com>2018-10-09 10:54:04 +0000
committerNemanja Ivanovic <nemanja.i.ibm@gmail.com>2018-10-09 10:54:04 +0000
commit254326308d8c3ebbd8a9ec41d505f51e6b8e4399 (patch)
tree315914bc1a0de039e7f59a860fb06176c62959a3
parent4d82f5afb7072c46be93d3237a03481d77ab7213 (diff)
There are occasionally instances where AADB rewrites registers in such a way that a reg-reg copy becomes a self-copy. Such an instruction is obviously redundant and can be removed. This patch does precisely that. Note that this will not remove various nop's that we insert (which are themselves just self-copies). The reason those are left alone is that all of them have their own opcodes (that just encode to a self-copy). What prompted this patch is the fact that these self-copies sometimes end up using registers that make the instruction a priority-setting nop, thereby having a significant effect on performance. Differential revision: https://reviews.llvm.org/D52432
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.h10
-rw-r--r--llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp24
-rw-r--r--llvm/test/CodeGen/PowerPC/remove-self-copies.mir128
3 files changed, 162 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 42e43334622..8a062daab55 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -192,6 +192,16 @@ public:
bool isXFormMemOp(unsigned Opcode) const {
return get(Opcode).TSFlags & PPCII::XFormMemOp;
}
+ static bool isSameClassPhysRegCopy(unsigned Opcode) {
+ unsigned CopyOpcodes[] =
+ { PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf,
+ PPC::XSCPSGNDP, PPC::MCRF, PPC::QVFMR, PPC::QVFMRs, PPC::QVFMRb,
+ PPC::CROR, PPC::EVOR, -1U };
+ for (int i = 0; CopyOpcodes[i] != -1U; i++)
+ if (Opcode == CopyOpcodes[i])
+ return true;
+ return false;
+ }
ScheduleHazardRecognizer *
CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 55da0a295aa..3078a6610fe 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -34,6 +34,8 @@ STATISTIC(NumRRConvertedInPreEmit,
"Number of r+r instructions converted to r+i in pre-emit peephole");
STATISTIC(NumRemovedInPreEmit,
"Number of instructions deleted in pre-emit peephole");
+STATISTIC(NumberOfSelfCopies,
+ "Number of self copy instructions eliminated");
static cl::opt<bool>
RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
@@ -65,6 +67,28 @@ namespace {
SmallVector<MachineInstr *, 4> InstrsToErase;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
+ unsigned Opc = MI.getOpcode();
+ // Detect self copies - these can result from running AADB.
+ if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
+ const MCInstrDesc &MCID = TII->get(Opc);
+ if (MCID.getNumOperands() == 3 &&
+ MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
+ MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
+ NumberOfSelfCopies++;
+ LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
+ LLVM_DEBUG(MI.dump());
+ InstrsToErase.push_back(&MI);
+ continue;
+ }
+ else if (MCID.getNumOperands() == 2 &&
+ MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
+ NumberOfSelfCopies++;
+ LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
+ LLVM_DEBUG(MI.dump());
+ InstrsToErase.push_back(&MI);
+ continue;
+ }
+ }
MachineInstr *DefMIToErase = nullptr;
if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
Changed = true;
diff --git a/llvm/test/CodeGen/PowerPC/remove-self-copies.mir b/llvm/test/CodeGen/PowerPC/remove-self-copies.mir
new file mode 100644
index 00000000000..7180d74cb50
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/remove-self-copies.mir
@@ -0,0 +1,128 @@
+# RUN: llc -start-before ppc-pre-emit-peephole \
+# RUN: -verify-machineinstrs -ppc-asm-full-reg-names %s -o - | FileCheck %s
+--- |
+ ; ModuleID = 't.ll'
+ source_filename = "t.ll"
+ target datalayout = "e-m:e-i64:64-n32:64"
+
+ define signext i32 @test(i32 signext %a, i32 signext %b, i32 signext %c) {
+ entry:
+ %cmp = icmp slt i32 %a, %b
+ br i1 %cmp, label %return, label %if.end
+
+ if.end: ; preds = %entry
+ %cmp1 = icmp slt i32 %b, %a
+ br i1 %cmp1, label %return, label %if.end3
+
+ if.end3: ; preds = %if.end
+ %cmp4 = icmp eq i32 %a, %c
+ br i1 %cmp4, label %if.then5, label %if.end6
+
+ if.then5: ; preds = %if.end3
+ %add = shl nsw i32 %a, 1
+ br label %return
+
+ if.end6: ; preds = %if.end3
+ %cmp7 = icmp sgt i32 %c, %b
+ %add11 = add i32 %c, %b
+ %add12 = select i1 %cmp7, i32 %a, i32 0
+ %spec.select = add i32 %add11, %add12
+ ret i32 %spec.select
+
+ return: ; preds = %if.then5, %if.end, %entry
+ %retval.0 = phi i32 [ %add, %if.then5 ], [ %c, %entry ], [ %b, %if.end ]
+ ret i32 %retval.0
+ }
+
+...
+---
+name: test
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+registers:
+liveins:
+ - { reg: '$x3', virtual-reg: '' }
+ - { reg: '$x4', virtual-reg: '' }
+ - { reg: '$x5', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack:
+stack:
+constants:
+body: |
+ bb.0.entry:
+ successors: %bb.4(0x40000000), %bb.1(0x40000000)
+ liveins: $x3, $x4, $x5
+
+ renamable $cr0 = CMPW renamable $r3, renamable $r4
+ BCC 12, renamable $cr0, %bb.4
+
+ bb.1.if.end:
+ successors: %bb.6(0x40000000), %bb.2(0x40000000)
+ liveins: $cr0, $x3, $x4, $x5
+
+ BCC 36, killed renamable $cr0, %bb.2
+
+ bb.6:
+ liveins: $x4
+
+ ; CHECK: mr r5, r4
+ ; CHECK-NOT: mr r5, r5
+ ; CHECK: extsw r3, r5
+ $r5 = OR killed $r4, $r4, implicit $x4, implicit-def $x5
+ $r5 = OR $r5, $r5, implicit-def $x5
+ renamable $x3 = EXTSW_32_64 killed renamable $r5, implicit $x5
+ BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+
+ bb.2.if.end3:
+ successors: %bb.3(0x40000000), %bb.5(0x40000000)
+ liveins: $x3, $x4, $x5
+
+ renamable $cr0 = CMPLW renamable $r3, renamable $r5
+ BCC 68, killed renamable $cr0, %bb.5
+
+ bb.3.if.then5:
+ successors: %bb.4(0x80000000)
+ liveins: $x3
+
+ renamable $r5 = RLWINM killed renamable $r3, 1, 0, 30, implicit $x3, implicit-def $x5
+
+ bb.4.return:
+ liveins: $x5
+
+ renamable $x3 = EXTSW_32_64 killed renamable $r5, implicit $x5
+ BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+
+ bb.5.if.end6:
+ liveins: $x3, $x4, $x5
+
+ renamable $cr0 = CMPW renamable $r5, renamable $r4
+ renamable $r6 = LI 0
+ renamable $r4 = ADD4 killed renamable $r5, killed renamable $r4, implicit $x4, implicit $x5
+ renamable $r3 = ISEL killed renamable $r3, killed renamable $r6, killed renamable $cr0gt, implicit $cr0, implicit $x3
+ renamable $r3 = ADD4 killed renamable $r4, killed renamable $r3
+ renamable $x3 = EXTSW_32_64 killed renamable $r3
+ BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+
+...