diff options
author | Mingming Liu <mingmingl@google.com> | 2022-08-03 14:20:30 -0700 |
---|---|---|
committer | Mingming Liu <mingmingl@google.com> | 2022-08-04 12:58:25 -0700 |
commit | bc8f2f36496a3fda32943f261eebb9b053e44016 (patch) | |
tree | 3f6c8454f247858e698ac1771063f84becba18d2 | |
parent | 6e193b5cbb6d7591280e4d03a658bb11f9fcd4d9 (diff) |
[AArch64][TTI][NFC] Overload method 'getVectorInstrCost' to provide vector instruction itself, as a context information for cost estimation.
1) Overloaded (instruction-based) method is a wrapper around the current (opcode-based) method.
2) This patch also changes a few callsites (VectorCombine.cpp,
SLPVectorizer.cpp, CodeGenPrepare.cpp) to call the overloaded method.
3) This is a split of D128302.
Differential Revision: https://reviews.llvm.org/D131114
-rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfo.h | 18 | ||||
-rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 9 | ||||
-rw-r--r-- | llvm/include/llvm/CodeGen/BasicTTIImpl.h | 5 | ||||
-rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.h | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 13 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 19 |
16 files changed, 67 insertions, 23 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index f613cc3fae94..13c9a2eaeefa 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1166,9 +1166,21 @@ public: /// \return The expected cost of vector Insert and Extract. /// Use -1 to indicate that there is no information on the index value. + /// This is used when the instruction is not available; a typical use + /// case is to provision the cost of vectorization/scalarization in + /// vectorizer passes. InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const; + /// \return The expected cost of vector Insert and Extract. + /// This is used when instruction is available, and implementation + /// asserts 'I' is not nullptr. + /// + /// A typical suitable use case is cost estimation when vector instruction + /// exists (e.g., from basic blocks during transformation). + InstructionCost getVectorInstrCost(const Instruction *I, Type *Val, + unsigned Index = -1) const; + /// \return The cost of replication shuffle of \p VF elements typed \p EltTy /// \p ReplicationFactor times. /// @@ -1747,6 +1759,8 @@ public: const Instruction *I) = 0; virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) = 0; + virtual InstructionCost getVectorInstrCost(const Instruction *I, Type *Val, + unsigned Index) = 0; virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, @@ -2305,6 +2319,10 @@ public: unsigned Index) override { return Impl.getVectorInstrCost(Opcode, Val, Index); } + InstructionCost getVectorInstrCost(const Instruction *I, Type *Val, + unsigned Index) override { + return Impl.getVectorInstrCost(I, Val, Index); + } InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index f3376a3982a6..514a970c5999 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -575,6 +575,11 @@ public: return 1; } + InstructionCost getVectorInstrCost(const Instruction *I, Type *Val, + unsigned Index) const { + return 1; + } + unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) { @@ -1148,7 +1153,7 @@ public: if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) if (CI->getValue().getActiveBits() <= 32) Idx = CI->getZExtValue(); - return TargetTTI->getVectorInstrCost(Opcode, Ty, Idx); + return TargetTTI->getVectorInstrCost(IE, Ty, Idx); } case Instruction::ShuffleVector: { auto *Shuffle = dyn_cast<ShuffleVectorInst>(U); @@ -1238,7 +1243,7 @@ public: if (CI->getValue().getActiveBits() <= 32) Idx = CI->getZExtValue(); Type *DstTy = U->getOperand(0)->getType(); - return TargetTTI->getVectorInstrCost(Opcode, DstTy, Idx); + return TargetTTI->getVectorInstrCost(EEI, DstTy, Idx); } } // By default, just classify everything as 'basic'. diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 33c93e4c56ac..2b092d48121e 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1159,6 +1159,11 @@ public: return LT.first; } + InstructionCost getVectorInstrCost(const Instruction *I, Type *Val, + unsigned Index) { + return thisT()->getVectorInstrCost(I->getOpcode(), Val, Index); + } + InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 4d3d549803db..c86279eae9a5 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -869,11 +869,26 @@ InstructionCost TargetTransformInfo::getCmpSelInstrCost( InstructionCost TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { + // FIXME: Assert that Opcode is either InsertElement or ExtractElement. + // This is mentioned in the interface description and respected by all + // callers, but never asserted upon. InstructionCost Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } +InstructionCost TargetTransformInfo::getVectorInstrCost(const Instruction *I, + Type *Val, + unsigned Index) const { + assert((I != nullptr) && "Expect not-null instruction pointer"); + // FIXME: Assert that Opcode is either InsertElement or ExtractElement. + // This is mentioned in the interface description and respected by all + // callers, but never asserted upon. + InstructionCost Cost = TTIImpl->getVectorInstrCost(I, Val, Index); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + InstructionCost TargetTransformInfo::getReplicationShuffleCost( Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) { diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index b100fbe2b33c..57423ffbae13 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -7261,7 +7261,7 @@ class VectorPromoteHelper { // scalar to vector. // The vector chain has to account for the combining cost. InstructionCost ScalarCost = - TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); + TTI.getVectorInstrCost(Transition, PromotedType, Index); InstructionCost VectorCost = StoreExtractCombineCost; enum TargetTransformInfo::TargetCostKind CostKind = TargetTransformInfo::TCK_RecipThroughput; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index fcd0df6f1d06..c4e7135d8b57 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -173,6 +173,7 @@ public: InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index f2260c31e678..eeb304311342 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -159,6 +159,7 @@ public: bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef<unsigned> Indices = {}) const; + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); bool isSourceOfDivergence(const Value *V) const; diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h index 544292bc4fd9..f1a198fd14e4 100644 --- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h @@ -60,6 +60,7 @@ public: unsigned getMaxInterleaveFactor(unsigned VF); InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); }; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 5e39e85bac87..a3aed48f6beb 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -237,6 +237,7 @@ public: TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 7bbaf7ae9cb2..9d263642173a 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -151,6 +151,7 @@ public: TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 790eb0b42afa..9a5b26bd1b90 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -123,6 +123,7 @@ public: CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 33317e799eab..0804d0330de4 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -107,6 +107,7 @@ public: CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index fde58a9587b6..10179eff2620 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -67,6 +67,7 @@ public: TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, ArrayRef<const Value *> Args = ArrayRef<const Value *>(), const Instruction *CxtI = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index bd3c3fb1bb2f..9f83c0461b56 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -146,6 +146,7 @@ public: CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); InstructionCost getScalarizationOverhead(VectorType *Ty, diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d69d1e3d19f3..737fcb386764 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5882,8 +5882,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, continue; } } - Cost -= TTIRef.getVectorInstrCost(Instruction::ExtractElement, - EE->getVectorOperandType(), Idx); + Cost -= TTIRef.getVectorInstrCost(EE, EE->getVectorOperandType(), Idx); } // Add a cost for subvector extracts/inserts if required. for (const auto &Data : ExtractVectorsTys) { @@ -6116,9 +6115,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, for (unsigned I : E->ReuseShuffleIndices) { if (ShuffleOrOp == Instruction::ExtractElement) { auto *EE = cast<ExtractElementInst>(VL[I]); - CommonCost -= TTI->getVectorInstrCost(Instruction::ExtractElement, - EE->getVectorOperandType(), - *getExtractIndex(EE)); + CommonCost -= TTI->getVectorInstrCost( + EE, EE->getVectorOperandType(), *getExtractIndex(EE)); } else { CommonCost -= TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, Idx); @@ -6129,9 +6127,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, for (Value *V : VL) { if (ShuffleOrOp == Instruction::ExtractElement) { auto *EE = cast<ExtractElementInst>(V); - CommonCost += TTI->getVectorInstrCost(Instruction::ExtractElement, - EE->getVectorOperandType(), - *getExtractIndex(EE)); + CommonCost += TTI->getVectorInstrCost( + EE, EE->getVectorOperandType(), *getExtractIndex(EE)); } else { --Idx; CommonCost += TTI->getVectorInstrCost(Instruction::ExtractElement, diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index a38936644bd3..37901d8bce4d 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -270,10 +270,8 @@ ExtractElementInst *VectorCombine::getShuffleExtract( Type *VecTy = Ext0->getVectorOperand()->getType(); assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types"); - InstructionCost Cost0 = - TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0); - InstructionCost Cost1 = - TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1); + InstructionCost Cost0 = TTI.getVectorInstrCost(Ext0, VecTy, Index0); + InstructionCost Cost1 = TTI.getVectorInstrCost(Ext1, VecTy, Index1); // If both costs are invalid no shuffle is needed if (!Cost0.isValid() && !Cost1.isValid()) @@ -337,10 +335,8 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, unsigned Ext0Index = Ext0IndexC->getZExtValue(); unsigned Ext1Index = Ext1IndexC->getZExtValue(); - InstructionCost Extract0Cost = - TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index); - InstructionCost Extract1Cost = - TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index); + InstructionCost Extract0Cost = TTI.getVectorInstrCost(Ext0, VecTy, Ext0Index); + InstructionCost Extract1Cost = TTI.getVectorInstrCost(Ext1, VecTy, Ext1Index); // A more expensive extract will always be replaced by a splat shuffle. // For example, if Ext0 is more expensive: @@ -754,9 +750,8 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) { if (!VecTy) return false; - InstructionCost OldCost = - TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0); - OldCost += TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1); + InstructionCost OldCost = TTI.getVectorInstrCost(Ext0, VecTy, Index0); + OldCost += TTI.getVectorInstrCost(Ext1, VecTy, Index1); OldCost += TTI.getCmpSelInstrCost(CmpOpcode, I0->getType(), CmpInst::makeCmpResultType(I0->getType()), Pred) * @@ -776,7 +771,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) { NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, CmpTy, ShufMask); NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy); - NewCost += TTI.getVectorInstrCost(Ext0->getOpcode(), CmpTy, CheapIndex); + NewCost += TTI.getVectorInstrCost(Ext0, CmpTy, CheapIndex); // Aggressively form vector ops if the cost is equal because the transform // may enable further optimization. |