summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Sanders <daniel_l_sanders@apple.com>2018-10-04 18:44:58 +0000
committerDaniel Sanders <daniel_l_sanders@apple.com>2018-10-04 18:44:58 +0000
commite24429ba59aed6160602a97f4d5832181fecc36a (patch)
treed68a6eefb2b73348a344149f2487e9cd462345eb
parentf0215d0b2c25a63677a63ffa26df8865d03bc9be (diff)
[globalisel][combine] Improve the truncate placement for the extending-loads combine
This brings the extending loads patch back to the original intent but minus the PHI bug and with another small improvement to de-dupe truncates that are inserted into the same block. The truncates are sunk to their uses unless this would require inserting before a phi in which case it sinks to the _beginning_ of the predecessor block for that path (but no earlier than the def). The reason for choosing the beginning of the predecessor is that it makes de-duping multiple truncates in the same block simple, and optimized code is going to run a scheduler at some point which will likely change the position anyway.
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp92
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir69
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir3
3 files changed, 131 insertions, 33 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 30e0da319e8..06f0ea54373 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -105,6 +105,37 @@ PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
}
return CurrentUse;
}
+
+/// Find a suitable place to insert some instructions and insert them. This
+/// function accounts for special cases like inserting before a PHI node.
+/// The current strategy for inserting before PHI's is to duplicate the
+/// instructions for each predecessor. However, while that's ok for G_TRUNC
+/// on most targets since it generally requires no code, other targets/cases may
+/// want to try harder to find a dominating block.
+static void InsertInsnsWithoutSideEffectsBeforeUse(
+ MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
+ std::function<void(MachineBasicBlock::iterator)> Inserter) {
+ MachineInstr &UseMI = *UseMO.getParent();
+
+ MachineBasicBlock *InsertBB = UseMI.getParent();
+
+ // If the use is a PHI then we want the predecessor block instead.
+ if (UseMI.isPHI()) {
+ MachineOperand *PredBB = std::next(&UseMO);
+ InsertBB = PredBB->getMBB();
+ }
+
+ // If the block is the same block as the def then we want to insert just after
+ // the def instead of at the start of the block.
+ if (InsertBB == DefMI.getParent()) {
+ MachineBasicBlock::iterator InsertPt = &DefMI;
+ Inserter(std::next(InsertPt));
+ return;
+ }
+
+ // Otherwise we want the start of the BB
+ Inserter(InsertBB->getFirstNonPHI());
+}
} // end anonymous namespace
bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
@@ -153,22 +184,6 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
// type since by definition the result of an extend is larger.
assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
- // Rewrite the load and schedule the canonical use for erasure.
- const auto TruncateUse = [&MI](MachineIRBuilder &Builder,
- MachineOperand &UseMO, unsigned DstReg,
- unsigned SrcReg) {
- MachineInstr &UseMI = *UseMO.getParent();
- MachineBasicBlock &UseMBB = *UseMI.getParent();
-
- Builder.setInsertPt(UseMBB, MachineBasicBlock::iterator(UseMI));
-
- if (UseMI.isPHI())
- Builder.setInsertPt(*MI.getParent(),
- std::next(MachineBasicBlock::iterator(MI)));
-
- Builder.buildTrunc(DstReg, SrcReg);
- };
-
// Rewrite the load to the chosen extending load.
unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg();
MI.setDesc(
@@ -180,7 +195,7 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
// Rewrite all the uses to fix up the types.
SmallVector<MachineInstr *, 1> ScheduleForErase;
- SmallVector<std::pair<MachineOperand*, unsigned>, 4> ScheduleForAssignReg;
+ SmallVector<std::pair<MachineOperand *, MachineInstr *>, 4> ScheduleForInsert;
for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) {
MachineInstr *UseMI = UseMO.getParent();
@@ -204,7 +219,6 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
// ... = ... %2(s32)
MRI.replaceRegWith(UseDstReg, ChosenDstReg);
ScheduleForErase.push_back(UseMO.getParent());
- Observer.erasedInstr(*UseMO.getParent());
} else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
// If the preferred size is smaller, then keep the extend but extend
// from the result of the extending load. For example:
@@ -229,9 +243,11 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
// %4:_(s8) = G_TRUNC %2:_(s32)
// %3:_(s64) = G_ZEXT %2:_(s8)
// ... = ... %3(s64)
- unsigned NewVReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
- TruncateUse(Builder, UseMO, NewVReg, ChosenDstReg);
- ScheduleForAssignReg.emplace_back(&UseMO, NewVReg);
+ InsertInsnsWithoutSideEffectsBeforeUse(
+ Builder, MI, UseMO,
+ [&](MachineBasicBlock::iterator InsertBefore) {
+ ScheduleForInsert.emplace_back(&UseMO, &*InsertBefore);
+ });
}
continue;
}
@@ -239,20 +255,40 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
// We're going to update the load to def this value later so just erase
// the old extend.
ScheduleForErase.push_back(UseMO.getParent());
- Observer.erasedInstr(*UseMO.getParent());
continue;
}
// The use isn't an extend. Truncate back to the type we originally loaded.
// This is free on many targets.
- unsigned NewVReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
- TruncateUse(Builder, UseMO, NewVReg, ChosenDstReg);
- ScheduleForAssignReg.emplace_back(&UseMO, NewVReg);
+ InsertInsnsWithoutSideEffectsBeforeUse(
+ Builder, MI, UseMO, [&](MachineBasicBlock::iterator InsertBefore) {
+ ScheduleForInsert.emplace_back(&UseMO, &*InsertBefore);
+ });
}
- for (auto &Assignment : ScheduleForAssignReg)
- Assignment.first->setReg(Assignment.second);
- for (auto &EraseMI : ScheduleForErase)
+
+ DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
+ for (auto &InsertionInfo : ScheduleForInsert) {
+ MachineOperand *UseMO = InsertionInfo.first;
+ MachineInstr *InsertBefore = InsertionInfo.second;
+
+ MachineInstr *PreviouslyEmitted =
+ EmittedInsns.lookup(InsertBefore->getParent());
+ if (PreviouslyEmitted) {
+ UseMO->setReg(PreviouslyEmitted->getOperand(0).getReg());
+ continue;
+ }
+
+ Builder.setInsertPt(*InsertBefore->getParent(), InsertBefore);
+ unsigned NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
+ MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
+ EmittedInsns[InsertBefore->getParent()] = NewMI;
+ UseMO->setReg(NewDstReg);
+ Observer.createdInstr(*NewMI);
+ }
+ for (auto &EraseMI : ScheduleForErase) {
EraseMI->eraseFromParent();
+ Observer.erasedInstr(*EraseMI);
+ }
MI.getOperand(0).setReg(ChosenDstReg);
return true;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
index 7b9243492f5..aaa4bfb054a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
@@ -3,6 +3,7 @@
--- |
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--"
+
define void @multiple_copies(i8* %addr) {
entry:
br i1 0, label %if, label %else
@@ -14,7 +15,7 @@
ret void
}
- define void @sink_to_phi(i8* %addr) {
+ define void @sink_to_phi_trivially_dominating(i8* %addr) {
entry:
br i1 0, label %if, label %exit
if:
@@ -22,6 +23,18 @@
exit:
ret void
}
+
+ define void @sink_to_phi_nondominating(i8* %addr) {
+ entry:
+ br i1 0, label %if, label %else
+ if:
+ br label %exit
+ else:
+ br label %exit
+ exit:
+ ret void
+ }
+
...
---
@@ -70,8 +83,8 @@ body: |
...
---
-name: sink_to_phi
-# CHECK-LABEL: name: sink_to_phi
+name: sink_to_phi_trivially_dominating
+# CHECK-LABEL: name: sink_to_phi_trivially_dominating
# This test currently tests that we don't sink if we would sink to a phi. This
# is needed to avoid inserting into the middle of the leading G_PHI instructions
# of a BB
@@ -109,3 +122,53 @@ body: |
$w0 = COPY %3
$w1 = COPY %9
...
+
+---
+name: sink_to_phi_nondominating
+# CHECK-LABEL: name: sink_to_phi_nondominating
+# This test currently tests that we don't sink if we would sink to a phi. This
+# is needed to avoid inserting into the middle of the leading G_PHI instructions
+# of a BB
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x0, $w1
+ successors: %bb.1(0x40000000), %bb.2(0x40000000); %bb.1(50.00%), %bb.2(50.00%)
+ ; CHECK: [[T0:%[0-9]+]]:_(s32) = G_SEXTLOAD
+ %0:_(p0) = COPY $x0
+ %1:_(s32) = COPY $w1
+ %2:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr)
+ %3:_(s32) = G_SEXT %2
+ %4:_(s32) = G_CONSTANT i32 1
+ %5:_(s1) = G_ICMP intpred(ne), %1:_(s32), %4:_
+ G_BRCOND %5:_(s1), %bb.1
+ G_BR %bb.2.else
+ bb.1.if:
+ ; CHECK: bb.1.if:
+ successors: %bb.3(0x80000000)
+ %10:_(s8) = G_CONSTANT i8 1
+ ; CHECK: [[T1:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
+ %6:_(s8) = G_ADD %2, %10
+ ; CHECK: [[T2:%[0-9]+]]:_(s8) = G_ADD [[T1]], {{.*}}
+ G_BR %bb.3.exit
+ bb.2.else:
+ ; CHECK: bb.2.else:
+ successors: %bb.3(0x80000000)
+ %11:_(s8) = G_CONSTANT i8 1
+ ; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
+ %7:_(s8) = G_SUB %2, %11
+ ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
+ G_BR %bb.3.exit
+ bb.3.exit:
+ ; CHECK: bb.3.exit:
+ %8:_(s8) = G_PHI %6:_(s8), %bb.1, %7:_(s8), %bb.2
+ ; CHECK: [[T5:%[0-9]+]]:_(s8) = G_PHI [[T2]](s8), %bb.1, [[T4]](s8)
+ %9:_(s32) = G_ZEXT %8
+ ; CHECK: [[T6:%[0-9]+]]:_(s32) = G_ZEXT [[T5]](s8)
+ ; CHECK: $w0 = COPY [[T0]](s32)
+ ; CHECK: $w1 = COPY [[T6]](s32)
+ $w0 = COPY %3
+ $w1 = COPY %9
+...
+
+
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir
index 5f1d9142ea8..47d7156789f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir
@@ -288,8 +288,7 @@ body: |
; CHECK: [[T1:%[0-9]+]]:_(s64) = G_SEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr)
; CHECK: [[T2:%[0-9]+]]:_(s8) = G_TRUNC [[T1]]
; CHECK: [[T3:%[0-9]+]]:_(s32) = G_ANYEXT [[T2]]
- ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_TRUNC [[T1]]
- ; CHECK: [[T5:%[0-9]+]]:_(s32) = G_ANYEXT [[T4]]
+ ; CHECK: [[T5:%[0-9]+]]:_(s32) = G_ANYEXT [[T2]]
; CHECK: $w0 = COPY [[T3]](s32)
; CHECK: $x1 = COPY [[T1]](s64)
; CHECK: $w2 = COPY [[T5]](s32)