diff options
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 102 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.h | 33 |
2 files changed, 47 insertions, 88 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index d073c0a5cc9..a0da1765ff9 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6763,34 +6763,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, /// Case 0 - Possible to commute the first and second operands. /// Case 1 - Possible to commute the first and third operands. /// Case 2 - Possible to commute the second and third operands. -static int getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1, - unsigned SrcOpIdx2) { +static unsigned getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1, + unsigned SrcOpIdx2) { // Put the lowest index to SrcOpIdx1 to simplify the checks below. if (SrcOpIdx1 > SrcOpIdx2) std::swap(SrcOpIdx1, SrcOpIdx2); unsigned Op1 = 1, Op2 = 2, Op3 = 3; if (X86II::isKMasked(TSFlags)) { - // The k-mask operand cannot be commuted. - if (SrcOpIdx1 == 2) - return -1; - - // For k-zero-masked operations it is Ok to commute the first vector - // operand. - // For regular k-masked operations a conservative choice is done as the - // elements of the first vector operand, for which the corresponding bit - // in the k-mask operand is set to 0, are copied to the result of the - // instruction. - // TODO/FIXME: The commute still may be legal if it is known that the - // k-mask operand is set to either all ones or all zeroes. - // It is also Ok to commute the 1st operand if all users of MI use only - // the elements enabled by the k-mask operand. For example, - // v4 = VFMADD213PSZrk v1, k, v2, v3; // v1[i] = k[i] ? v2[i]*v1[i]+v3[i] - // : v1[i]; - // VMOVAPSZmrk <mem_addr>, k, v4; // this is the ONLY user of v4 -> - // // Ok, to commute v1 in FMADD213PSZrk. - if (X86II::isKMergeMasked(TSFlags) && SrcOpIdx1 == Op1) - return -1; Op2++; Op3++; } @@ -6801,7 +6781,7 @@ static int getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1, return 1; if (SrcOpIdx1 == Op2 && SrcOpIdx2 == Op3) return 2; - return -1; + llvm_unreachable("Unknown three src commute case."); } unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands( @@ -6810,23 +6790,19 @@ unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands( unsigned Opc = MI.getOpcode(); - // Put the lowest index to SrcOpIdx1 to simplify the checks below. - if (SrcOpIdx1 > SrcOpIdx2) - std::swap(SrcOpIdx1, SrcOpIdx2); - // TODO: Commuting the 1st operand of FMA*_Int requires some additional // analysis. The commute optimization is legal only if all users of FMA*_Int // use only the lowest element of the FMA*_Int instruction. Such analysis are // not implemented yet. So, just return 0 in that case. // When such analysis are available this place will be the right place for // calling it. - if (FMA3Group.isIntrinsic() && SrcOpIdx1 == 1) - return 0; + assert(!(FMA3Group.isIntrinsic() && (SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) && + "Intrinsic instructions can't commute operand 1"); // Determine which case this commute is or if it can't be done. - int Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, SrcOpIdx2); - if (Case < 0) - return 0; + unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, + SrcOpIdx2); + assert(Case < 3 && "Unexpected case number!"); // Define the FMA forms mapping array that helps to map input FMA form // to output FMA form to preserve the operation semantics after @@ -6874,12 +6850,10 @@ unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands( static bool commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2) { - uint64_t TSFlags = MI.getDesc().TSFlags; - // Determine which case this commute is or if it can't be done. - int Case = getThreeSrcCommuteCase(TSFlags, SrcOpIdx1, SrcOpIdx2); - if (Case < 0) - return false; + unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, + SrcOpIdx2); + assert(Case < 3 && "Unexpected case value!"); // For each case we need to swap two pairs of bits in the final immediate. static const uint8_t SwapMasks[3][4] = { @@ -7343,27 +7317,32 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, } } -bool X86InstrInfo::findFMA3CommutedOpIndices( - const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2, - const X86InstrFMA3Group &FMA3Group) const { - - if (!findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2)) - return false; - - // Check if we can adjust the opcode to preserve the semantics when - // commute the register operands. - return getFMA3OpcodeToCommuteOperands(MI, SrcOpIdx1, SrcOpIdx2, FMA3Group) != 0; -} - -bool X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI, - unsigned &SrcOpIdx1, - unsigned &SrcOpIdx2) const { +bool +X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2, + bool IsIntrinsic) const { uint64_t TSFlags = MI.getDesc().TSFlags; unsigned FirstCommutableVecOp = 1; unsigned LastCommutableVecOp = 3; - unsigned KMaskOp = 0; + unsigned KMaskOp = -1U; if (X86II::isKMasked(TSFlags)) { + // For k-zero-masked operations it is Ok to commute the first vector + // operand. + // For regular k-masked operations a conservative choice is done as the + // elements of the first vector operand, for which the corresponding bit + // in the k-mask operand is set to 0, are copied to the result of the + // instruction. + // TODO/FIXME: The commute still may be legal if it is known that the + // k-mask operand is set to either all ones or all zeroes. + // It is also Ok to commute the 1st operand if all users of MI use only + // the elements enabled by the k-mask operand. For example, + // v4 = VFMADD213PSZrk v1, k, v2, v3; // v1[i] = k[i] ? v2[i]*v1[i]+v3[i] + // : v1[i]; + // VMOVAPSZmrk <mem_addr>, k, v4; // this is the ONLY user of v4 -> + // // Ok, to commute v1 in FMADD213PSZrk. + // The k-mask operand has index = 2 for masked and zero-masked operations. KMaskOp = 2; @@ -7373,6 +7352,10 @@ bool X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI, FirstCommutableVecOp = 3; LastCommutableVecOp++; + } else if (IsIntrinsic) { + // Commuting the first operand of an intrinsic instruction isn't possible + // unless we can prove that only the lowest element of the result is used. + FirstCommutableVecOp = 2; } if (isMem(MI, LastCommutableVecOp)) @@ -7535,7 +7518,7 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, case X86::VPMADD52LUQZrkz: { unsigned CommutableOpIdx1 = 2; unsigned CommutableOpIdx2 = 3; - if (Desc.TSFlags & X86II::EVEX_K) { + if (X86II::isKMasked(Desc.TSFlags)) { // Skip the mask register. ++CommutableOpIdx1; ++CommutableOpIdx2; @@ -7554,11 +7537,12 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, const X86InstrFMA3Group *FMA3Group = X86InstrFMA3Info::getFMA3Group(MI.getOpcode()); if (FMA3Group) - return findFMA3CommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2, *FMA3Group); + return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2, + FMA3Group->isIntrinsic()); // Handled masked instructions since we need to skip over the mask input // and the preserved input. - if (Desc.TSFlags & X86II::EVEX_K) { + if (X86II::isKMasked(Desc.TSFlags)) { // First assume that the first input is the mask operand and skip past it. unsigned CommutableOpIdx1 = Desc.getNumDefs() + 1; unsigned CommutableOpIdx2 = Desc.getNumDefs() + 2; @@ -7571,11 +7555,11 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, // be a 3 input instruction and we want the first two non-mask inputs. // Otherwise this is a 2 input instruction with a preserved input and // mask, so we need to move the indices to skip one more input. - if (Desc.TSFlags & X86II::EVEX_Z) - --CommutableOpIdx1; - else { + if (X86II::isKMergeMasked(Desc.TSFlags)) { ++CommutableOpIdx1; ++CommutableOpIdx2; + } else { + --CommutableOpIdx1; } } diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 8d2c592be43..98e08bbb9bf 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -314,34 +314,6 @@ public: bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; - /// Returns true if the routine could find two commutable operands - /// in the given FMA instruction \p MI. Otherwise, returns false. - /// - /// \p SrcOpIdx1 and \p SrcOpIdx2 are INPUT and OUTPUT arguments. - /// The output indices of the commuted operands are returned in these - /// arguments. Also, the input values of these arguments may be preset either - /// to indices of operands that must be commuted or be equal to a special - /// value 'CommuteAnyOperandIndex' which means that the corresponding - /// operand index is not set and this method is free to pick any of - /// available commutable operands. - /// The parameter \p FMA3Group keeps the reference to the group of relative - /// FMA3 opcodes including register/memory forms of 132/213/231 opcodes. - /// - /// For example, calling this method this way: - /// unsigned Idx1 = 1, Idx2 = CommuteAnyOperandIndex; - /// findFMA3CommutedOpIndices(MI, Idx1, Idx2, FMA3Group); - /// can be interpreted as a query asking if the operand #1 can be swapped - /// with any other available operand (e.g. operand #2, operand #3, etc.). - /// - /// The returned FMA opcode may differ from the opcode in the given MI. - /// For example, commuting the operands #1 and #3 in the following FMA - /// FMA213 #1, #2, #3 - /// results into instruction with adjusted opcode: - /// FMA231 #3, #2, #1 - bool findFMA3CommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, - unsigned &SrcOpIdx2, - const X86InstrFMA3Group &FMA3Group) const; - /// Returns an adjusted FMA opcode that must be used in FMA instruction that /// performs the same computations as the given \p MI but which has the /// operands \p SrcOpIdx1 and \p SrcOpIdx2 commuted. @@ -664,9 +636,12 @@ private: /// findThreeSrcCommutedOpIndices(MI, Op1, Op2); /// can be interpreted as a query asking to find an operand that would be /// commutable with the operand#1. + /// + /// If IsIntrinsic is set, operand 1 will be ignored for commuting. bool findThreeSrcCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, - unsigned &SrcOpIdx2) const; + unsigned &SrcOpIdx2, + bool IsIntrinsic = false) const; }; } // namespace llvm |