summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-09-27 21:28:52 +0000
committerCraig Topper <craig.topper@intel.com>2018-09-27 21:28:52 +0000
commit745b5ffc593250c4efe8b0b0f450b8d6c2b55525 (patch)
tree1aaa79f53e84effa08d73b611607e9727c97894a
parent22589ff5d889fbde887a2f62a240248dacff694e (diff)
[ScalarizeMaskedMemIntrin] When expanding masked loads, start with the passthru value and insert each conditional load result over their element.
Previously we started with undef and did one final merge at the end with a select.
-rw-r--r--llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp34
-rw-r--r--llvm/test/CodeGen/X86/masked_memop.ll54
-rw-r--r--llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll17
3 files changed, 31 insertions, 74 deletions
diff --git a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index 35580df42f1..b039cdd01d4 100644
--- a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -90,7 +90,7 @@ FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
// cond.load: ; preds = %0
// %3 = getelementptr i32* %1, i32 0
// %4 = load i32* %3
-// %5 = insertelement <16 x i32> undef, i32 %4, i32 0
+// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
// br label %else
//
// else: ; preds = %0, %cond.load
@@ -146,10 +146,8 @@ static void scalarizeMaskedLoad(CallInst *CI) {
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
unsigned VectorWidth = VecType->getNumElements();
- Value *UndefVal = UndefValue::get(VecType);
-
// The result vector
- Value *VResult = UndefVal;
+ Value *VResult = Src0;
if (isa<Constant>(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
@@ -161,15 +159,11 @@ static void scalarizeMaskedLoad(CallInst *CI) {
VResult =
Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
}
- Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
- CI->replaceAllUsesWith(NewI);
+ CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
return;
}
- PHINode *Phi = nullptr;
- Value *PrevPhi = UndefVal;
-
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
@@ -177,13 +171,6 @@ static void scalarizeMaskedLoad(CallInst *CI) {
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
// br i1 %mask_1, label %cond.load, label %else
//
- if (Idx > 0) {
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- PrevPhi = Phi;
- VResult = Phi;
- }
Value *Predicate =
Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
@@ -200,7 +187,8 @@ static void scalarizeMaskedLoad(CallInst *CI) {
Value *Gep =
Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
- VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+ Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
+ Builder.getInt32(Idx));
// Create "else" block, fill it in the next iteration
BasicBlock *NewIfBlock =
@@ -211,13 +199,15 @@ static void scalarizeMaskedLoad(CallInst *CI) {
OldBr->eraseFromParent();
PrevIfBlock = IfBlock;
IfBlock = NewIfBlock;
+
+ // Create the phi to join the new and previous value.
+ PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(NewVResult, CondBlock);
+ Phi->addIncoming(VResult, PrevIfBlock);
+ VResult = Phi;
}
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
- CI->replaceAllUsesWith(NewI);
+ CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
}
diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll
index 50da0ed68a5..04cf58f556e 100644
--- a/llvm/test/CodeGen/X86/masked_memop.ll
+++ b/llvm/test/CodeGen/X86/masked_memop.ll
@@ -12,50 +12,20 @@ define <1 x double> @loadv1(<1 x i64> %trigger, <1 x double>* %addr, <1 x double
; AVX-LABEL: loadv1:
; AVX: ## %bb.0:
; AVX-NEXT: testq %rdi, %rdi
-; AVX-NEXT: ## implicit-def: $xmm1
-; AVX-NEXT: je LBB0_1
-; AVX-NEXT: ## %bb.2: ## %else
-; AVX-NEXT: testq %rdi, %rdi
-; AVX-NEXT: jne LBB0_3
-; AVX-NEXT: LBB0_4: ## %else
-; AVX-NEXT: vmovaps %xmm1, %xmm0
-; AVX-NEXT: retq
-; AVX-NEXT: LBB0_1: ## %cond.load
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: testq %rdi, %rdi
-; AVX-NEXT: je LBB0_4
-; AVX-NEXT: LBB0_3: ## %else
-; AVX-NEXT: vmovaps %xmm0, %xmm1
-; AVX-NEXT: vmovaps %xmm1, %xmm0
+; AVX-NEXT: jne LBB0_2
+; AVX-NEXT: ## %bb.1: ## %cond.load
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: LBB0_2: ## %else
; AVX-NEXT: retq
;
-; AVX512F-LABEL: loadv1:
-; AVX512F: ## %bb.0:
-; AVX512F-NEXT: testq %rdi, %rdi
-; AVX512F-NEXT: ## implicit-def: $xmm1
-; AVX512F-NEXT: jne LBB0_2
-; AVX512F-NEXT: ## %bb.1: ## %cond.load
-; AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512F-NEXT: LBB0_2: ## %else
-; AVX512F-NEXT: testq %rdi, %rdi
-; AVX512F-NEXT: sete %al
-; AVX512F-NEXT: kmovw %eax, %k1
-; AVX512F-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
-; AVX512F-NEXT: retq
-;
-; SKX-LABEL: loadv1:
-; SKX: ## %bb.0:
-; SKX-NEXT: testq %rdi, %rdi
-; SKX-NEXT: ## implicit-def: $xmm1
-; SKX-NEXT: jne LBB0_2
-; SKX-NEXT: ## %bb.1: ## %cond.load
-; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; SKX-NEXT: LBB0_2: ## %else
-; SKX-NEXT: testq %rdi, %rdi
-; SKX-NEXT: sete %al
-; SKX-NEXT: kmovd %eax, %k1
-; SKX-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
-; SKX-NEXT: retq
+; AVX512-LABEL: loadv1:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: testq %rdi, %rdi
+; AVX512-NEXT: jne LBB0_2
+; AVX512-NEXT: ## %bb.1: ## %cond.load
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512-NEXT: LBB0_2: ## %else
+; AVX512-NEXT: retq
%mask = icmp eq <1 x i64> %trigger, zeroinitializer
%res = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* %addr, i32 4, <1 x i1>%mask, <1 x double>%dst)
ret <1 x double> %res
diff --git a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll
index cd575f8bfaa..96ebb470283 100644
--- a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll
+++ b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll
@@ -9,10 +9,10 @@ define <2 x i64> @scalarize_v2i64(<2 x i64>* %p, <2 x i1> %mask, <2 x i64> %pass
; CHECK: cond.load:
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP4]], i32 0
; CHECK-NEXT: br label [[ELSE]]
; CHECK: else:
-; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD]] ], [ undef, [[TMP0:%.*]] ]
+; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[MASK]], i32 1
; CHECK-NEXT: br i1 [[TMP6]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
; CHECK: cond.load1:
@@ -21,9 +21,8 @@ define <2 x i64> @scalarize_v2i64(<2 x i64>* %p, <2 x i1> %mask, <2 x i64> %pass
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP8]], i32 1
; CHECK-NEXT: br label [[ELSE2]]
; CHECK: else2:
-; CHECK-NEXT: [[RES_PHI_SELECT:%.*]] = phi <2 x i64> [ [[TMP9]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[MASK]], <2 x i64> [[RES_PHI_SELECT]], <2 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT: ret <2 x i64> [[TMP10]]
+; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP9]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
+; CHECK-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]]
;
%ret = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %p, i32 8, <2 x i1> %mask, <2 x i64> %passthru)
ret <2 x i64> %ret
@@ -41,8 +40,7 @@ define <2 x i64> @scalarize_v2i64_ones_mask(<2 x i64>* %p, <2 x i64> %passthru)
define <2 x i64> @scalarize_v2i64_zero_mask(<2 x i64>* %p, <2 x i64> %passthru) {
; CHECK-LABEL: @scalarize_v2i64_zero_mask(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64>* [[P:%.*]] to i64*
-; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> zeroinitializer, <2 x i64> undef, <2 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT: ret <2 x i64> [[TMP2]]
+; CHECK-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
;
%ret = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %p, i32 8, <2 x i1> <i1 false, i1 false>, <2 x i64> %passthru)
ret <2 x i64> %ret
@@ -53,9 +51,8 @@ define <2 x i64> @scalarize_v2i64_const_mask(<2 x i64>* %p, <2 x i64> %passthru)
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64>* [[P:%.*]] to i64*
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> undef, i64 [[TMP3]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> <i1 false, i1 true>, <2 x i64> [[TMP4]], <2 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP3]], i32 1
+; CHECK-NEXT: ret <2 x i64> [[TMP4]]
;
%ret = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %p, i32 8, <2 x i1> <i1 false, i1 true>, <2 x i64> %passthru)
ret <2 x i64> %ret