diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-01-15 16:56:55 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-01-15 16:56:55 +0000 |
commit | e8ac90758e40201d7e98b79ac9ea0e42538caf70 (patch) | |
tree | ba21c755f256a6f0702ed105c0f9f42bf61b4412 | |
parent | 6b468682ccbc860737b1ec72e13541621fd84382 (diff) |
[X86] Bailout of lowerVectorShuffleAsPermuteAndUnpack for shuffle-with-zero (PR40306)
If we're shuffling with a zero vector, then we are better off not doing VECTOR_SHUFFLE(UNPCK()) as we lose track of those zero elements.
We were already doing this for SSSE3 targets as we have PSHUFB, but its worth doing for all targets.
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/buildvec-extract.ll | 15 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll | 15 |
3 files changed, 16 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d425c9520c2..5404c4c0846 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12035,11 +12035,10 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack( if (SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize)) return Unpack; - // If we have PSHUFB, and we're shuffling with a zero vector then we're - // better off not doing VECTOR_SHUFFLE(UNPCK()) as we lose track of those - // zero elements. - if (Subtarget.hasSSSE3() && (ISD::isBuildVectorAllZeros(V1.getNode()) || - ISD::isBuildVectorAllZeros(V2.getNode()))) + // If we're shuffling with a zero vector then we're better off not doing + // VECTOR_SHUFFLE(UNPCK()) as we lose track of those zero elements. + if (ISD::isBuildVectorAllZeros(V1.getNode()) || + ISD::isBuildVectorAllZeros(V2.getNode())) return SDValue(); // If none of the unpack-rooted lowerings worked (or were profitable) try an diff --git a/llvm/test/CodeGen/X86/buildvec-extract.ll b/llvm/test/CodeGen/X86/buildvec-extract.ll index 267eec4bdab..b3045800c64 100644 --- a/llvm/test/CodeGen/X86/buildvec-extract.ll +++ b/llvm/test/CodeGen/X86/buildvec-extract.ll @@ -404,11 +404,10 @@ define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) { define <2 x i64> @extract1_i16_zext_insert0_i64_undef(<8 x i16> %x) { ; SSE2-LABEL: extract1_i16_zext_insert0_i64_undef: ; SSE2: # %bb.0: +; SSE2-NEXT: psrld $16, %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,0,1,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; ; SSE41-LABEL: extract1_i16_zext_insert0_i64_undef: @@ -449,12 +448,10 @@ define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) { define <2 x i64> @extract2_i16_zext_insert0_i64_undef(<8 x i16> %x) { ; SSE2-LABEL: extract2_i16_zext_insert0_i64_undef: ; SSE2: # %bb.0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; ; SSE41-LABEL: extract2_i16_zext_insert0_i64_undef: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll index 9f2a0ccf642..c5224bb4be4 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -2479,11 +2479,10 @@ define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) { define <8 x i16> @shuffle_v8i16_9zzzuuuu(<8 x i16> %x) { ; SSE2-LABEL: shuffle_v8i16_9zzzuuuu: ; SSE2: # %bb.0: +; SSE2-NEXT: psrld $16, %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,0,1,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: shuffle_v8i16_9zzzuuuu: @@ -2510,12 +2509,10 @@ define <8 x i16> @shuffle_v8i16_9zzzuuuu(<8 x i16> %x) { define <8 x i16> @shuffle_v8i16_2zzzuuuu(<8 x i16> %x) { ; SSE2-LABEL: shuffle_v8i16_2zzzuuuu: ; SSE2: # %bb.0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: shuffle_v8i16_2zzzuuuu: |