diff options
author | Daniel Sanders <daniel_l_sanders@apple.com> | 2018-10-04 18:44:58 +0000 |
---|---|---|
committer | Daniel Sanders <daniel_l_sanders@apple.com> | 2018-10-04 18:44:58 +0000 |
commit | e24429ba59aed6160602a97f4d5832181fecc36a (patch) | |
tree | d68a6eefb2b73348a344149f2487e9cd462345eb | |
parent | f0215d0b2c25a63677a63ffa26df8865d03bc9be (diff) |
[globalisel][combine] Improve the truncate placement for the extending-loads combine
This brings the extending loads patch back to the original intent but minus the
PHI bug and with another small improvement to de-dupe truncates that are
inserted into the same block.
The truncates are sunk to their uses unless this would require inserting before a
phi in which case it sinks to the _beginning_ of the predecessor block for that
path (but no earlier than the def).
The reason for choosing the beginning of the predecessor is that it makes de-duping
multiple truncates in the same block simple, and optimized code is going to run a
scheduler at some point which will likely change the position anyway.
3 files changed, 131 insertions, 33 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 30e0da319e8..06f0ea54373 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -105,6 +105,37 @@ PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse, } return CurrentUse; } + +/// Find a suitable place to insert some instructions and insert them. This +/// function accounts for special cases like inserting before a PHI node. +/// The current strategy for inserting before PHI's is to duplicate the +/// instructions for each predecessor. However, while that's ok for G_TRUNC +/// on most targets since it generally requires no code, other targets/cases may +/// want to try harder to find a dominating block. +static void InsertInsnsWithoutSideEffectsBeforeUse( + MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO, + std::function<void(MachineBasicBlock::iterator)> Inserter) { + MachineInstr &UseMI = *UseMO.getParent(); + + MachineBasicBlock *InsertBB = UseMI.getParent(); + + // If the use is a PHI then we want the predecessor block instead. + if (UseMI.isPHI()) { + MachineOperand *PredBB = std::next(&UseMO); + InsertBB = PredBB->getMBB(); + } + + // If the block is the same block as the def then we want to insert just after + // the def instead of at the start of the block. + if (InsertBB == DefMI.getParent()) { + MachineBasicBlock::iterator InsertPt = &DefMI; + Inserter(std::next(InsertPt)); + return; + } + + // Otherwise we want the start of the BB + Inserter(InsertBB->getFirstNonPHI()); +} } // end anonymous namespace bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { @@ -153,22 +184,6 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { // type since by definition the result of an extend is larger. assert(Preferred.Ty != LoadValueTy && "Extending to same type?"); - // Rewrite the load and schedule the canonical use for erasure. - const auto TruncateUse = [&MI](MachineIRBuilder &Builder, - MachineOperand &UseMO, unsigned DstReg, - unsigned SrcReg) { - MachineInstr &UseMI = *UseMO.getParent(); - MachineBasicBlock &UseMBB = *UseMI.getParent(); - - Builder.setInsertPt(UseMBB, MachineBasicBlock::iterator(UseMI)); - - if (UseMI.isPHI()) - Builder.setInsertPt(*MI.getParent(), - std::next(MachineBasicBlock::iterator(MI))); - - Builder.buildTrunc(DstReg, SrcReg); - }; - // Rewrite the load to the chosen extending load. unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg(); MI.setDesc( @@ -180,7 +195,7 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { // Rewrite all the uses to fix up the types. SmallVector<MachineInstr *, 1> ScheduleForErase; - SmallVector<std::pair<MachineOperand*, unsigned>, 4> ScheduleForAssignReg; + SmallVector<std::pair<MachineOperand *, MachineInstr *>, 4> ScheduleForInsert; for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) { MachineInstr *UseMI = UseMO.getParent(); @@ -204,7 +219,6 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { // ... = ... %2(s32) MRI.replaceRegWith(UseDstReg, ChosenDstReg); ScheduleForErase.push_back(UseMO.getParent()); - Observer.erasedInstr(*UseMO.getParent()); } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) { // If the preferred size is smaller, then keep the extend but extend // from the result of the extending load. For example: @@ -229,9 +243,11 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { // %4:_(s8) = G_TRUNC %2:_(s32) // %3:_(s64) = G_ZEXT %2:_(s8) // ... = ... %3(s64) - unsigned NewVReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); - TruncateUse(Builder, UseMO, NewVReg, ChosenDstReg); - ScheduleForAssignReg.emplace_back(&UseMO, NewVReg); + InsertInsnsWithoutSideEffectsBeforeUse( + Builder, MI, UseMO, + [&](MachineBasicBlock::iterator InsertBefore) { + ScheduleForInsert.emplace_back(&UseMO, &*InsertBefore); + }); } continue; } @@ -239,20 +255,40 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { // We're going to update the load to def this value later so just erase // the old extend. ScheduleForErase.push_back(UseMO.getParent()); - Observer.erasedInstr(*UseMO.getParent()); continue; } // The use isn't an extend. Truncate back to the type we originally loaded. // This is free on many targets. - unsigned NewVReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); - TruncateUse(Builder, UseMO, NewVReg, ChosenDstReg); - ScheduleForAssignReg.emplace_back(&UseMO, NewVReg); + InsertInsnsWithoutSideEffectsBeforeUse( + Builder, MI, UseMO, [&](MachineBasicBlock::iterator InsertBefore) { + ScheduleForInsert.emplace_back(&UseMO, &*InsertBefore); + }); } - for (auto &Assignment : ScheduleForAssignReg) - Assignment.first->setReg(Assignment.second); - for (auto &EraseMI : ScheduleForErase) + + DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns; + for (auto &InsertionInfo : ScheduleForInsert) { + MachineOperand *UseMO = InsertionInfo.first; + MachineInstr *InsertBefore = InsertionInfo.second; + + MachineInstr *PreviouslyEmitted = + EmittedInsns.lookup(InsertBefore->getParent()); + if (PreviouslyEmitted) { + UseMO->setReg(PreviouslyEmitted->getOperand(0).getReg()); + continue; + } + + Builder.setInsertPt(*InsertBefore->getParent(), InsertBefore); + unsigned NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); + MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg); + EmittedInsns[InsertBefore->getParent()] = NewMI; + UseMO->setReg(NewDstReg); + Observer.createdInstr(*NewMI); + } + for (auto &EraseMI : ScheduleForErase) { EraseMI->eraseFromParent(); + Observer.erasedInstr(*EraseMI); + } MI.getOperand(0).setReg(ChosenDstReg); return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir index 7b9243492f5..aaa4bfb054a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir @@ -3,6 +3,7 @@ --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64--" + define void @multiple_copies(i8* %addr) { entry: br i1 0, label %if, label %else @@ -14,7 +15,7 @@ ret void } - define void @sink_to_phi(i8* %addr) { + define void @sink_to_phi_trivially_dominating(i8* %addr) { entry: br i1 0, label %if, label %exit if: @@ -22,6 +23,18 @@ exit: ret void } + + define void @sink_to_phi_nondominating(i8* %addr) { + entry: + br i1 0, label %if, label %else + if: + br label %exit + else: + br label %exit + exit: + ret void + } + ... --- @@ -70,8 +83,8 @@ body: | ... --- -name: sink_to_phi -# CHECK-LABEL: name: sink_to_phi +name: sink_to_phi_trivially_dominating +# CHECK-LABEL: name: sink_to_phi_trivially_dominating # This test currently tests that we don't sink if we would sink to a phi. This # is needed to avoid inserting into the middle of the leading G_PHI instructions # of a BB @@ -109,3 +122,53 @@ body: | $w0 = COPY %3 $w1 = COPY %9 ... + +--- +name: sink_to_phi_nondominating +# CHECK-LABEL: name: sink_to_phi_nondominating +# This test currently tests that we don't sink if we would sink to a phi. This +# is needed to avoid inserting into the middle of the leading G_PHI instructions +# of a BB +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $w1 + successors: %bb.1(0x40000000), %bb.2(0x40000000); %bb.1(50.00%), %bb.2(50.00%) + ; CHECK: [[T0:%[0-9]+]]:_(s32) = G_SEXTLOAD + %0:_(p0) = COPY $x0 + %1:_(s32) = COPY $w1 + %2:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %3:_(s32) = G_SEXT %2 + %4:_(s32) = G_CONSTANT i32 1 + %5:_(s1) = G_ICMP intpred(ne), %1:_(s32), %4:_ + G_BRCOND %5:_(s1), %bb.1 + G_BR %bb.2.else + bb.1.if: + ; CHECK: bb.1.if: + successors: %bb.3(0x80000000) + %10:_(s8) = G_CONSTANT i8 1 + ; CHECK: [[T1:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32) + %6:_(s8) = G_ADD %2, %10 + ; CHECK: [[T2:%[0-9]+]]:_(s8) = G_ADD [[T1]], {{.*}} + G_BR %bb.3.exit + bb.2.else: + ; CHECK: bb.2.else: + successors: %bb.3(0x80000000) + %11:_(s8) = G_CONSTANT i8 1 + ; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32) + %7:_(s8) = G_SUB %2, %11 + ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}} + G_BR %bb.3.exit + bb.3.exit: + ; CHECK: bb.3.exit: + %8:_(s8) = G_PHI %6:_(s8), %bb.1, %7:_(s8), %bb.2 + ; CHECK: [[T5:%[0-9]+]]:_(s8) = G_PHI [[T2]](s8), %bb.1, [[T4]](s8) + %9:_(s32) = G_ZEXT %8 + ; CHECK: [[T6:%[0-9]+]]:_(s32) = G_ZEXT [[T5]](s8) + ; CHECK: $w0 = COPY [[T0]](s32) + ; CHECK: $w1 = COPY [[T6]](s32) + $w0 = COPY %3 + $w1 = COPY %9 +... + + diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir index 5f1d9142ea8..47d7156789f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir @@ -288,8 +288,7 @@ body: | ; CHECK: [[T1:%[0-9]+]]:_(s64) = G_SEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) ; CHECK: [[T2:%[0-9]+]]:_(s8) = G_TRUNC [[T1]] ; CHECK: [[T3:%[0-9]+]]:_(s32) = G_ANYEXT [[T2]] - ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_TRUNC [[T1]] - ; CHECK: [[T5:%[0-9]+]]:_(s32) = G_ANYEXT [[T4]] + ; CHECK: [[T5:%[0-9]+]]:_(s32) = G_ANYEXT [[T2]] ; CHECK: $w0 = COPY [[T3]](s32) ; CHECK: $x1 = COPY [[T1]](s64) ; CHECK: $w2 = COPY [[T5]](s32) |