summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp3
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll266
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll18
3 files changed, 188 insertions, 99 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2e856a7e680..a07fffe9b98 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1468,8 +1468,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// If any of the scalars is marked as a value that needs to stay scalar, then
// we need to gather the scalars.
+ // The reduction nodes (stored in UserIgnoreList) also should stay scalar.
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
- if (MustGather.count(VL[i])) {
+ if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
newTreeEntry(VL, false, UserTreeIdx);
return;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
index 8405117090b..67717a54659 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
@@ -3,93 +3,185 @@
; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-8 -slp-min-tree-size=6 | FileCheck %s --check-prefixes=ALL,FORCE_REDUCTION
define void @Test(i32) {
-; ALL-LABEL: @Test(
-; ALL-NEXT: entry:
-; ALL-NEXT: br label [[LOOP:%.*]]
-; ALL: loop:
-; ALL-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
-; ALL-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; ALL-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
-; ALL-NEXT: [[TMP3:%.*]] = add <8 x i32> <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>, [[SHUFFLE]]
-; ALL-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef
-; ALL-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
-; ALL-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
-; ALL-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
-; ALL-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
-; ALL-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
-; ALL-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
-; ALL-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
-; ALL-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
-; ALL-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
-; ALL-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
-; ALL-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
-; ALL-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
-; ALL-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
-; ALL-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
-; ALL-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
-; ALL-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], undef
-; ALL-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
-; ALL-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
-; ALL-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
-; ALL-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
-; ALL-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
-; ALL-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
-; ALL-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
-; ALL-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
-; ALL-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
-; ALL-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
-; ALL-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
-; ALL-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
-; ALL-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], undef
-; ALL-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
-; ALL-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
-; ALL-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
-; ALL-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], undef
-; ALL-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_40]], i32 0
-; ALL-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP2]], i32 1
-; ALL-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
-; ALL-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP6]], i32 0
-; ALL-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1
-; ALL-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP5]], [[TMP8]]
-; ALL-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP5]], [[TMP8]]
-; ALL-NEXT: [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
-; ALL-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-; ALL-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]]
-; ALL-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; ALL-NEXT: [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
-; ALL-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; ALL-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
-; ALL-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
-; ALL-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP12]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA31:%.*]] = and i32 [[OP_EXTRA30]], [[TMP2]]
-; ALL-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0
-; ALL-NEXT: br label [[LOOP]]
+; CHECK-LABEL: @Test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP15:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>, [[SHUFFLE]]
+; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef
+; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
+; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
+; CHECK-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
+; CHECK-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
+; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
+; CHECK-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
+; CHECK-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
+; CHECK-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
+; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
+; CHECK-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
+; CHECK-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
+; CHECK-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
+; CHECK-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
+; CHECK-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
+; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
+; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], undef
+; CHECK-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
+; CHECK-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
+; CHECK-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
+; CHECK-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
+; CHECK-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
+; CHECK-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
+; CHECK-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
+; CHECK-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
+; CHECK-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
+; CHECK-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
+; CHECK-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
+; CHECK-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
+; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], undef
+; CHECK-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
+; CHECK-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
+; CHECK-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
+; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], undef
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]]
+; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
+; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
+; CHECK-NEXT: [[VAL_42:%.*]] = and i32 [[VAL_40]], undef
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[OP_EXTRA30]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP6]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]]
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> undef, i32 [[TMP12]], i32 0
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP15]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP14]], i32 1
+; CHECK-NEXT: br label [[LOOP]]
+;
+; FORCE_REDUCTION-LABEL: @Test(
+; FORCE_REDUCTION-NEXT: entry:
+; FORCE_REDUCTION-NEXT: br label [[LOOP:%.*]]
+; FORCE_REDUCTION: loop:
+; FORCE_REDUCTION-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP13:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
+; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> <i32 0, i32 55, i32 285, i32 1240>, [[SHUFFLE]]
+; FORCE_REDUCTION-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef
+; FORCE_REDUCTION-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
+; FORCE_REDUCTION-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
+; FORCE_REDUCTION-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
+; FORCE_REDUCTION-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
+; FORCE_REDUCTION-NEXT: [[VAL_20:%.*]] = add i32 [[TMP2]], 1496
+; FORCE_REDUCTION-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]]
+; FORCE_REDUCTION-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_34:%.*]] = add i32 [[TMP2]], 8555
+; FORCE_REDUCTION-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]]
+; FORCE_REDUCTION-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; FORCE_REDUCTION-NEXT: [[BIN_RDX:%.*]] = and <4 x i32> [[TMP3]], [[RDX_SHUF]]
+; FORCE_REDUCTION-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; FORCE_REDUCTION-NEXT: [[BIN_RDX2:%.*]] = and <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; FORCE_REDUCTION-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
+; FORCE_REDUCTION-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]]
+; FORCE_REDUCTION-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP2]]
+; FORCE_REDUCTION-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_39:%.*]] = add i32 [[TMP2]], 12529
+; FORCE_REDUCTION-NEXT: [[VAL_40:%.*]] = and i32 [[OP_EXTRA29]], [[VAL_39]]
+; FORCE_REDUCTION-NEXT: [[VAL_41:%.*]] = add i32 [[TMP2]], 13685
+; FORCE_REDUCTION-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_40]], i32 0
+; FORCE_REDUCTION-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1
+; FORCE_REDUCTION-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_41]], i32 0
+; FORCE_REDUCTION-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 14910, i32 1
+; FORCE_REDUCTION-NEXT: [[TMP11:%.*]] = and <2 x i32> [[TMP8]], [[TMP10]]
+; FORCE_REDUCTION-NEXT: [[TMP12:%.*]] = add <2 x i32> [[TMP8]], [[TMP10]]
+; FORCE_REDUCTION-NEXT: [[TMP13]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> <i32 0, i32 3>
+; FORCE_REDUCTION-NEXT: br label [[LOOP]]
;
entry:
br label %loop
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
index 74e62e0e4ba..ad1434146a5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
@@ -7,7 +7,7 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 31, i32 undef>, i32 [[PARAM:%.*]], i32 1
; CHECK-NEXT: br label [[BCI_15:%.*]]
; CHECK: bci_15:
-; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP7:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15
@@ -28,13 +28,6 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
; CHECK-NEXT: [[V38:%.*]] = and i32 undef, [[V36]]
; CHECK-NEXT: [[V40:%.*]] = and i32 undef, [[V38]]
; CHECK-NEXT: [[V42:%.*]] = and i32 undef, [[V40]]
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP4]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP6]], i32 1
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> <i32 16, i32 undef>, i32 [[V42]], i32 1
-; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = and <2 x i32> [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = and <16 x i32> [[TMP4]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -43,9 +36,12 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <16 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x i32> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX6:%.*]] = and <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0
-; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP12]], [[TMP2]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0
+; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]]
+; CHECK-NEXT: [[V43:%.*]] = and i32 undef, [[V42]]
+; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP2]], 16
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> undef, i32 [[V44]], i32 0
+; CHECK-NEXT: [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[OP_EXTRA]], i32 1
; CHECK-NEXT: br i1 true, label [[BCI_15]], label [[LOOPEXIT:%.*]]
; CHECK: loopexit:
; CHECK-NEXT: ret void