From 1f3d459a80db9678a3243645cc3b2fc1d6af932d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 16 Jan 2019 15:39:52 +0000 Subject: [SLP] Fix PR40310: The reduction nodes should stay scalar. Summary: Sometimes the SLP vectorizer tries to vectorize the horizontal reduction nodes during regular vectorization. This may happen inside of the loops, when there are some vectorizable PHIs. Patch fixes this by checking if the node is the reduction node and thus it must not be vectorized, it must be gathered. Reviewers: RKSimon, spatel, hfinkel, fedor.sergeev Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D56783 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 +- llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll | 266 +++++++++++++++------- llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll | 18 +- 3 files changed, 188 insertions(+), 99 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2e856a7e680..a07fffe9b98 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1468,8 +1468,9 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // If any of the scalars is marked as a value that needs to stay scalar, then // we need to gather the scalars. + // The reduction nodes (stored in UserIgnoreList) also should stay scalar. for (unsigned i = 0, e = VL.size(); i != e; ++i) { - if (MustGather.count(VL[i])) { + if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); newTreeEntry(VL, false, UserTreeIdx); return; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll index 8405117090b..67717a54659 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -3,93 +3,185 @@ ; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-8 -slp-min-tree-size=6 | FileCheck %s --check-prefixes=ALL,FORCE_REDUCTION define void @Test(i32) { -; ALL-LABEL: @Test( -; ALL-NEXT: entry: -; ALL-NEXT: br label [[LOOP:%.*]] -; ALL: loop: -; ALL-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] -; ALL-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> -; ALL-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 -; ALL-NEXT: [[TMP3:%.*]] = add <8 x i32> , [[SHUFFLE]] -; ALL-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef -; ALL-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] -; ALL-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] -; ALL-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]] -; ALL-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]] -; ALL-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef -; ALL-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]] -; ALL-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]] -; ALL-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]] -; ALL-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef -; ALL-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]] -; ALL-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]] -; ALL-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]] -; ALL-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]] -; ALL-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]] -; ALL-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef -; ALL-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], undef -; ALL-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]] -; ALL-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]] -; ALL-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]] -; ALL-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]] -; ALL-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]] -; ALL-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]] -; ALL-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]] -; ALL-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]] -; ALL-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]] -; ALL-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]] -; ALL-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]] -; ALL-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]] -; ALL-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], undef -; ALL-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]] -; ALL-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]] -; ALL-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]] -; ALL-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], undef -; ALL-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_40]], i32 0 -; ALL-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP2]], i32 1 -; ALL-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 -; ALL-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP6]], i32 0 -; ALL-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1 -; ALL-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP5]], [[TMP8]] -; ALL-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP5]], [[TMP8]] -; ALL-NEXT: [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> -; ALL-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> -; ALL-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]] -; ALL-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> -; ALL-NEXT: [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; ALL-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> -; ALL-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; ALL-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; ALL-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP12]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]] -; ALL-NEXT: [[OP_EXTRA31:%.*]] = and i32 [[OP_EXTRA30]], [[TMP2]] -; ALL-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0 -; ALL-NEXT: br label [[LOOP]] +; CHECK-LABEL: @Test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP15:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef +; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] +; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] +; CHECK-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]] +; CHECK-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]] +; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef +; CHECK-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]] +; CHECK-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]] +; CHECK-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]] +; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef +; CHECK-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]] +; CHECK-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]] +; CHECK-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]] +; CHECK-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]] +; CHECK-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]] +; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef +; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], undef +; CHECK-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]] +; CHECK-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]] +; CHECK-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]] +; CHECK-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]] +; CHECK-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]] +; CHECK-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]] +; CHECK-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]] +; CHECK-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]] +; CHECK-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]] +; CHECK-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]] +; CHECK-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]] +; CHECK-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]] +; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], undef +; CHECK-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]] +; CHECK-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]] +; CHECK-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]] +; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], undef +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]] +; CHECK-NEXT: [[VAL_42:%.*]] = and i32 [[VAL_40]], undef +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[OP_EXTRA30]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> undef, i32 [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP15]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP14]], i32 1 +; CHECK-NEXT: br label [[LOOP]] +; +; FORCE_REDUCTION-LABEL: @Test( +; FORCE_REDUCTION-NEXT: entry: +; FORCE_REDUCTION-NEXT: br label [[LOOP:%.*]] +; FORCE_REDUCTION: loop: +; FORCE_REDUCTION-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP13:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] +; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> +; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1 +; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> , [[SHUFFLE]] +; FORCE_REDUCTION-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef +; FORCE_REDUCTION-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] +; FORCE_REDUCTION-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef +; FORCE_REDUCTION-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef +; FORCE_REDUCTION-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef +; FORCE_REDUCTION-NEXT: [[VAL_20:%.*]] = add i32 [[TMP2]], 1496 +; FORCE_REDUCTION-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]] +; FORCE_REDUCTION-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_34:%.*]] = add i32 [[TMP2]], 8555 +; FORCE_REDUCTION-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]] +; FORCE_REDUCTION-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> +; FORCE_REDUCTION-NEXT: [[BIN_RDX:%.*]] = and <4 x i32> [[TMP3]], [[RDX_SHUF]] +; FORCE_REDUCTION-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> +; FORCE_REDUCTION-NEXT: [[BIN_RDX2:%.*]] = and <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] +; FORCE_REDUCTION-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 +; FORCE_REDUCTION-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]] +; FORCE_REDUCTION-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP2]] +; FORCE_REDUCTION-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[VAL_39:%.*]] = add i32 [[TMP2]], 12529 +; FORCE_REDUCTION-NEXT: [[VAL_40:%.*]] = and i32 [[OP_EXTRA29]], [[VAL_39]] +; FORCE_REDUCTION-NEXT: [[VAL_41:%.*]] = add i32 [[TMP2]], 13685 +; FORCE_REDUCTION-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_40]], i32 0 +; FORCE_REDUCTION-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1 +; FORCE_REDUCTION-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_41]], i32 0 +; FORCE_REDUCTION-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 14910, i32 1 +; FORCE_REDUCTION-NEXT: [[TMP11:%.*]] = and <2 x i32> [[TMP8]], [[TMP10]] +; FORCE_REDUCTION-NEXT: [[TMP12:%.*]] = add <2 x i32> [[TMP8]], [[TMP10]] +; FORCE_REDUCTION-NEXT: [[TMP13]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> +; FORCE_REDUCTION-NEXT: br label [[LOOP]] ; entry: br label %loop diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll index 74e62e0e4ba..ad1434146a5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll @@ -7,7 +7,7 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[PARAM:%.*]], i32 1 ; CHECK-NEXT: br label [[BCI_15:%.*]] ; CHECK: bci_15: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP7:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15 @@ -28,13 +28,6 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) { ; CHECK-NEXT: [[V38:%.*]] = and i32 undef, [[V36]] ; CHECK-NEXT: [[V40:%.*]] = and i32 undef, [[V38]] ; CHECK-NEXT: [[V42:%.*]] = and i32 undef, [[V40]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> , i32 [[V42]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = and <2 x i32> [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <16 x i32> [[TMP4]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> @@ -43,9 +36,12 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) { ; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <16 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x i32> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX6:%.*]] = and <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0 -; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP12]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0 +; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[V43:%.*]] = and i32 undef, [[V42]] +; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP2]], 16 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> undef, i32 [[V44]], i32 0 +; CHECK-NEXT: [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[OP_EXTRA]], i32 1 ; CHECK-NEXT: br i1 true, label [[BCI_15]], label [[LOOPEXIT:%.*]] ; CHECK: loopexit: ; CHECK-NEXT: ret void -- cgit v1.2.3