aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXinliang David Li <davidxl@google.com>2017-05-30 21:22:18 +0000
committerXinliang David Li <davidxl@google.com>2017-05-30 21:22:18 +0000
commitcbd4ff93102479238e293f6ddb688df3c6e9ba47 (patch)
tree9de180c4929944119ac1f551c53321738ee4e501
parentc832920d4b92e8b280cc33dee8449101991c196e (diff)
[PartialInlining] Shrinkwrap allocas with live range contained in outline region.
Differential Revision: http://reviews.llvm.org/D33618 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304245 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Transforms/Utils/CodeExtractor.h13
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp83
-rw-r--r--test/Transforms/CodeExtractor/PartialInlineAlloca.ll68
-rw-r--r--test/Transforms/CodeExtractor/PartialInlineAlloca2.ll65
-rw-r--r--test/Transforms/CodeExtractor/PartialInlineAlloca4.ll67
-rw-r--r--test/Transforms/CodeExtractor/PartialInlineAlloca5.ll67
6 files changed, 355 insertions, 8 deletions
diff --git a/include/llvm/Transforms/Utils/CodeExtractor.h b/include/llvm/Transforms/Utils/CodeExtractor.h
index a602498e5f2..7e23544af1a 100644
--- a/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -25,6 +25,7 @@ template <typename T> class ArrayRef;
class BranchProbabilityInfo;
class DominatorTree;
class Function;
+ class Instruction;
class Loop;
class Module;
class RegionNode;
@@ -103,7 +104,17 @@ template <typename T> class ArrayRef;
/// a code sequence, that sequence is modified, including changing these
/// sets, before extraction occurs. These modifications won't have any
/// significant impact on the cost however.
- void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs) const;
+ void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
+ const ValueSet &Allocas) const;
+ /// Find the set of allocas whose life ranges are contained within the
+ /// outlined region.
+ ///
+ /// Allocas which have life_time markers contained in the outlined region
+ /// should be pushed to the outlined function. The address bitcasts that
+ /// are used by the lifetime markers are also candidates for shrink-
+ /// wrapping. The instructions that need to be sinked are collected in
+ /// 'Allocas'.
+ void findAllocas(ValueSet &Allocas) const;
private:
void severSplitPHINodes(BasicBlock *&Header);
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index ed72099ec3e..24d28a6c283 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
@@ -141,16 +142,77 @@ static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) {
return false;
}
-void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
- ValueSet &Outputs) const {
+void CodeExtractor::findAllocas(ValueSet &SinkCands) const {
+ Function *Func = (*Blocks.begin())->getParent();
+ for (BasicBlock &BB : *Func) {
+ if (Blocks.count(&BB))
+ continue;
+ for (Instruction &II : BB) {
+ auto *AI = dyn_cast<AllocaInst>(&II);
+ if (!AI)
+ continue;
+
+ // Returns true if matching life time markers are found within
+ // the outlined region.
+ auto GetLifeTimeMarkers = [&](Instruction *Addr) {
+ Instruction *LifeStart = nullptr, *LifeEnd = nullptr;
+ for (User *U : Addr->users()) {
+ if (!definedInRegion(Blocks, U))
+ return false;
+
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
+ if (IntrInst) {
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start)
+ LifeStart = IntrInst;
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+ LifeEnd = IntrInst;
+ }
+ }
+ return LifeStart && LifeEnd;
+ };
+
+ if (GetLifeTimeMarkers(AI)) {
+ SinkCands.insert(AI);
+ continue;
+ }
+
+ // Follow the bitcast:
+ Instruction *MarkerAddr = nullptr;
+ for (User *U : AI->users()) {
+ if (U->stripPointerCasts() == AI) {
+ Instruction *Bitcast = cast<Instruction>(U);
+ if (GetLifeTimeMarkers(Bitcast)) {
+ MarkerAddr = Bitcast;
+ continue;
+ }
+ }
+ if (!definedInRegion(Blocks, U)) {
+ MarkerAddr = nullptr;
+ break;
+ }
+ }
+ if (MarkerAddr) {
+ if (!definedInRegion(Blocks, MarkerAddr))
+ SinkCands.insert(MarkerAddr);
+ SinkCands.insert(AI);
+ }
+ }
+ }
+}
+
+void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
+ const ValueSet &SinkCands) const {
+
for (BasicBlock *BB : Blocks) {
// If a used value is defined outside the region, it's an input. If an
// instruction is used outside the region, it's an output.
for (Instruction &II : *BB) {
for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE;
- ++OI)
- if (definedInCaller(Blocks, *OI))
- Inputs.insert(*OI);
+ ++OI) {
+ Value *V = *OI;
+ if (!SinkCands.count(V) && definedInCaller(Blocks, V))
+ Inputs.insert(V);
+ }
for (User *U : II.users())
if (!definedInRegion(Blocks, U)) {
@@ -718,7 +780,7 @@ Function *CodeExtractor::extractCodeRegion() {
if (!isEligible())
return nullptr;
- ValueSet inputs, outputs;
+ ValueSet inputs, outputs, SinkingCands;
// Assumption: this is a single-entry code region, and the header is the first
// block in the region.
@@ -757,8 +819,15 @@ Function *CodeExtractor::extractCodeRegion() {
"newFuncRoot");
newFuncRoot->getInstList().push_back(BranchInst::Create(header));
+ findAllocas(SinkingCands);
+
// Find inputs to, outputs from the code region.
- findInputsOutputs(inputs, outputs);
+ findInputsOutputs(inputs, outputs, SinkingCands);
+
+ // Now sink all instructions which only have non-phi uses inside the region
+ for (auto *II : SinkingCands)
+ cast<Instruction>(II)->moveBefore(*newFuncRoot,
+ newFuncRoot->getFirstInsertionPt());
// Calculate the exit blocks for the extracted region and the total exit
// weights for each of those blocks.
diff --git a/test/Transforms/CodeExtractor/PartialInlineAlloca.ll b/test/Transforms/CodeExtractor/PartialInlineAlloca.ll
new file mode 100644
index 00000000000..48db0b61a31
--- /dev/null
+++ b/test/Transforms/CodeExtractor/PartialInlineAlloca.ll
@@ -0,0 +1,68 @@
+
+; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+ ; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+
+%"class.base" = type { %"struct.base"* }
+%"struct.base" = type opaque
+
+@g = external local_unnamed_addr global i32, align 4
+
+; Function Attrs: nounwind uwtable
+define i32 @callee_sinkable_bitcast(i32 %arg) local_unnamed_addr #0 {
+; CHECK-LABEL:define{{.*}}@callee_sinkable_bitcast.{{[0-9]}}
+; CHECK: alloca
+; CHECK-NEXT: bitcast
+; CHECK: call void @llvm.lifetime
+bb:
+ %tmp = alloca %"class.base", align 4
+ %tmp1 = bitcast %"class.base"* %tmp to i8*
+ %tmp2 = load i32, i32* @g, align 4, !tbaa !2
+ %tmp3 = add nsw i32 %tmp2, 1
+ %tmp4 = icmp slt i32 %arg, 0
+ br i1 %tmp4, label %bb6, label %bb5
+
+bb5: ; preds = %bb
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) #2
+ %tmp11 = bitcast %"class.base"* %tmp to i32*
+ store i32 %tmp3, i32* %tmp11, align 4, !tbaa !2
+ store i32 %tmp3, i32* @g, align 4, !tbaa !2
+ call void @bar(i32* nonnull %tmp11) #2
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) #2
+ br label %bb6
+
+bb6: ; preds = %bb5, %bb
+ %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
+ ret i32 %tmp7
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare void @bar(i32*) local_unnamed_addr #2
+declare void @bar2(i32*, i32*) local_unnamed_addr #1
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+ %tmp = tail call i32 @callee_sinkable_bitcast(i32 %arg)
+ ret i32 %tmp
+}
+
+attributes #0 = { nounwind uwtable}
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 303574)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+
+
diff --git a/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll b/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll
new file mode 100644
index 00000000000..4ca418389e5
--- /dev/null
+++ b/test/Transforms/CodeExtractor/PartialInlineAlloca2.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+
+%"class.base" = type { %"struct.base"* }
+%"struct.base" = type opaque
+
+@g = external local_unnamed_addr global i32, align 4
+
+define i32 @callee_no_bitcast(i32 %arg) local_unnamed_addr #0 {
+; CHECK-LABEL:define{{.*}}@callee_no_bitcast.{{[0-9]}}
+; CHECK: alloca
+; CHECK: call void @llvm.lifetime
+bb:
+ %tmp = alloca i8, align 4
+ %tmp2 = load i32, i32* @g, align 4, !tbaa !2
+ %tmp3 = add nsw i32 %tmp2, 1
+ %tmp4 = icmp slt i32 %arg, 0
+ br i1 %tmp4, label %bb6, label %bb5
+
+bb5: ; preds = %bb
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp) #2
+ store i32 %tmp3, i32* @g, align 4, !tbaa !2
+ %tmp11 = bitcast i8 * %tmp to i32*
+ call void @bar(i32* nonnull %tmp11) #2
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp) #2
+ br label %bb6
+
+bb6: ; preds = %bb5, %bb
+ %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
+ ret i32 %tmp7
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare void @bar(i32*) local_unnamed_addr #2
+declare void @bar2(i32*, i32*) local_unnamed_addr #1
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+ %tmp = tail call i32 @callee_no_bitcast(i32 %arg)
+ ret i32 %tmp
+}
+
+attributes #0 = { nounwind uwtable}
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 303574)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+
+
+
diff --git a/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll b/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll
new file mode 100644
index 00000000000..6bb38d44f46
--- /dev/null
+++ b/test/Transforms/CodeExtractor/PartialInlineAlloca4.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+
+%"class.base" = type { %"struct.base"* }
+%"struct.base" = type opaque
+
+@g = external local_unnamed_addr global i32, align 4
+
+define i32 @callee_unknown_use1(i32 %arg) local_unnamed_addr #0 {
+; CHECK-LABEL:define{{.*}}@callee_unknown_use1.{{[0-9]}}
+; CHECK-NOT: alloca
+; CHECK: call void @llvm.lifetime
+bb:
+ %tmp = alloca i8, align 4
+ %tmp2 = load i32, i32* @g, align 4, !tbaa !2
+ %tmp3 = add nsw i32 %tmp2, 1
+ %tmp4 = icmp slt i32 %arg, 0
+ br i1 %tmp4, label %bb6, label %bb5
+
+bb5: ; preds = %bb
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp) #2
+ store i32 %tmp3, i32* @g, align 4, !tbaa !2
+ %tmp11 = bitcast i8* %tmp to i32*
+ call void @bar(i32* nonnull %tmp11) #2
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp) #2
+ br label %bb6
+
+bb6: ; preds = %bb5, %bb
+ %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
+ %tmp1 = bitcast i8* %tmp to i32*
+ ret i32 %tmp7
+}
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare void @bar(i32*) local_unnamed_addr #2
+declare void @bar2(i32*, i32*) local_unnamed_addr #1
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+ %tmp = tail call i32 @callee_unknown_use1(i32 %arg)
+ ret i32 %tmp
+}
+
+attributes #0 = { nounwind uwtable}
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 303574)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+
+
+
diff --git a/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll b/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll
new file mode 100644
index 00000000000..9c53496e1ce
--- /dev/null
+++ b/test/Transforms/CodeExtractor/PartialInlineAlloca5.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -skip-partial-inlining-cost-analysis -S | FileCheck %s
+
+%"class.base" = type { %"struct.base"* }
+%"struct.base" = type opaque
+
+@g = external local_unnamed_addr global i32, align 4
+
+define i32 @callee_unknown_use2(i32 %arg) local_unnamed_addr #0 {
+; CHECK-LABEL:define{{.*}}@callee_unknown_use2.{{[0-9]}}
+; CHECK-NOT: alloca
+; CHECK: call void @llvm.lifetime
+bb:
+ %tmp = alloca i32, align 4
+ %tmp1 = bitcast i32* %tmp to i8*
+ %tmp2 = load i32, i32* @g, align 4, !tbaa !2
+ %tmp3 = add nsw i32 %tmp2, 1
+ %tmp4 = icmp slt i32 %arg, 0
+ br i1 %tmp4, label %bb6, label %bb5
+
+bb5: ; preds = %bb
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) #2
+ store i32 %tmp3, i32* %tmp, align 4, !tbaa !2
+ store i32 %tmp3, i32* @g, align 4, !tbaa !2
+ call void @bar(i32* nonnull %tmp) #2
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) #2
+ br label %bb6
+
+bb6: ; preds = %bb5, %bb
+ %tmp7 = phi i32 [ 1, %bb5 ], [ 0, %bb ]
+ %tmp10 = bitcast i8* %tmp1 to i32*
+ ret i32 %tmp7
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare void @bar(i32*) local_unnamed_addr #2
+declare void @bar2(i32*, i32*) local_unnamed_addr #1
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+ %tmp = tail call i32 @callee_unknown_use2(i32 %arg)
+ ret i32 %tmp
+}
+
+attributes #0 = { nounwind uwtable}
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 303574)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+
+
+