summaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorJesper Antonsson <jesper.antonsson@ericsson.com>2018-10-01 14:59:25 +0000
committerJesper Antonsson <jesper.antonsson@ericsson.com>2018-10-01 14:59:25 +0000
commitab9c678f8c2fe6a192cce3f42fc02f544549742b (patch)
tree4f80f798921e5e64b0958a2aa48a65606fa14baf /llvm
parenta973e46f5ee871cde700d5061b867fb31d81e020 (diff)
[InstCombine] Handle vector compares in foldGEPIcmp(), take 2
Summary: This is a continuation of the fix for PR34627 "InstCombine assertion at vector gep/icmp folding". (I just realized bugpoint had fuzzed the original test for me, so I had fixed another trigger of the same assert in adjacent code in InstCombine.) This patch avoids optimizing an icmp (to look only at the base pointers) when the resulting icmp would have a different type. The patch adds a testcase and also cleans up and shrinks the pre-existing test for the adjacent assert trigger. Reviewers: lebedev.ri, majnemer, spatel Reviewed By: lebedev.ri Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D52494
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp3
-rw-r--r--llvm/test/Transforms/InstCombine/pr38984.ll44
2 files changed, 32 insertions, 15 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index e019d43ba30..07bd98b30ab 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -909,7 +909,8 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
}
// If all indices are the same, just compare the base pointers.
- if (IndicesTheSame)
+ Type *BaseType = GEPLHS->getOperand(0)->getType();
+ if (IndicesTheSame && CmpInst::makeCmpResultType(BaseType) == I.getType())
return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
// If we're comparing GEPs with two base pointers that only differ in type
diff --git a/llvm/test/Transforms/InstCombine/pr38984.ll b/llvm/test/Transforms/InstCombine/pr38984.ll
index 2da2f9fa42d..1334042d4a1 100644
--- a/llvm/test/Transforms/InstCombine/pr38984.ll
+++ b/llvm/test/Transforms/InstCombine/pr38984.ll
@@ -2,24 +2,40 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "p:16:16"
-@offsets = external dso_local global [4 x i16], align 1
+@a = external global [21 x i16], align 1
+@offsets = external global [4 x i16], align 1
-define void @PR38984() {
-; CHECK-LABEL: @PR38984(
+; The "same gep" optimization should work with vector icmp.
+define <4 x i1> @PR38984_1() {
+; CHECK-LABEL: @PR38984_1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
;
entry:
%0 = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef), align 1
%1 = insertelement <4 x i16> undef, i16 %0, i32 3
- %2 = sub <4 x i16> zeroinitializer, %1
- %3 = sext <4 x i16> %2 to <4 x i32>
- %4 = getelementptr inbounds i64, i64* null, <4 x i32> %3
- %5 = ptrtoint <4 x i64*> %4 to <4 x i32>
- %6 = getelementptr inbounds i64, i64* null, <4 x i16> %2
- %7 = ptrtoint <4 x i64*> %6 to <4 x i32>
- %8 = icmp eq <4 x i32> %5, %7
- %9 = select <4 x i1> %8, <4 x i16> zeroinitializer, <4 x i16> <i16 1, i16 1, i16 1, i16 1>
- %10 = sext <4 x i16> %9 to <4 x i32>
- ret void
+ %2 = getelementptr i32, i32* null, <4 x i16> %1
+ %3 = getelementptr i32, i32* null, <4 x i16> %1
+ %4 = icmp eq <4 x i32*> %2, %3
+ ret <4 x i1> %4
+}
+
+; The "compare base pointers" optimization should not kick in for vector icmp.
+define <4 x i1> @PR38984_2() {
+; CHECK-LABEL: @PR38984_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef), align 2
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, i16* getelementptr inbounds ([21 x i16], [21 x i16]* @a, i16 1, i16 0), <4 x i16> [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, i16* null, <4 x i16> [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i16*> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret <4 x i1> [[TMP4]]
+;
+entry:
+ %0 = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef)
+ %1 = insertelement <4 x i16> undef, i16 %0, i32 3
+ %2 = getelementptr i16, i16* getelementptr ([21 x i16], [21 x i16]* @a, i64 1, i32 0), <4 x i16> %1
+ %3 = getelementptr i16, i16* null, <4 x i16> %1
+ %4 = icmp eq <4 x i16*> %2, %3
+ ret <4 x i1> %4
}