diff options
author | Craig Topper <craig.topper@intel.com> | 2019-01-13 02:59:57 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2019-01-13 02:59:57 +0000 |
commit | 47100f5f523424a284c2ccf3f9442179aa27d6d4 (patch) | |
tree | 19411f538568c0c1dd598bde00e0346b8dce1a29 | |
parent | 114f0172187583259678f83bd691b27302d95ae4 (diff) |
[X86] Add X86ISD::VMFPROUND to handle the masked case of VCVTPD2PSZ128 which only produces 2 result elements and zeroes the upper elements.
We can't represent this properly with vselect like we normally do. We also have to update the instruction definition to use a VK2WM mask instead of VK4WM to represent this.
Fixes another case from PR34877.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351017 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 81 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 8 | ||||
-rw-r--r-- | lib/Target/X86/X86IntrinsicsInfo.h | 10 |
5 files changed, 89 insertions, 18 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 843d2875832..e1e30ec5aae 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -21796,7 +21796,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // does not change the value. Set it to 0 since it can change. return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1), DAG.getIntPtrConstant(0, dl)); - case CVTPD2PS_MASK: { + case CVTPD2PS_RND_MASK: { SDValue Src = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); SDValue Mask = Op.getOperand(3); @@ -22058,6 +22058,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue Results[] = { SetCC, Res }; return DAG.getMergeValues(Results, dl); } + case CVTPD2PS_MASK: case TRUNCATE_TO_REG: { SDValue Src = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); @@ -27217,6 +27218,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND"; case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND"; case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; + case X86ISD::VMFPROUND: return "X86ISD::VMFPROUND"; case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND"; case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 4af6ea19834..4b4c2d94e6f 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -304,6 +304,10 @@ namespace llvm { // Vector FP round. VFPROUND, VFPROUND_RND, VFPROUNDS_RND, + // Masked version of above. Used for v2f64->v4f32. + // SRC, PASSTHRU, MASK + VMFPROUND, + // 128-bit vector logical left / right shift VSHLDQ, VSRLDQ, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 14faaac08cc..e05669f9ef9 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -7969,26 +7969,53 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNode, X86FoldableSchedWrite sched, string Broadcast = _.BroadcastStr, - string Alias = "", X86MemOperand MemOp = _Src.MemOp> { + string Alias = "", X86MemOperand MemOp = _Src.MemOp, + RegisterClass MaskRC = _.KRCWM> { - defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), - (ins _Src.RC:$src), OpcodeStr, "$src", "$src", - (_.VT (OpNode (_Src.VT _Src.RC:$src)))>, + defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _Src.RC:$src), + (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), + (ins MaskRC:$mask, _Src.RC:$src), + OpcodeStr, "$src", "$src", + (_.VT (OpNode (_Src.VT _Src.RC:$src))), + (vselect MaskRC:$mask, + (_.VT (OpNode (_Src.VT _Src.RC:$src))), + _.RC:$src0), + vselect, "$src0 = $dst">, EVEX, Sched<[sched]>; - defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src", + defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins MemOp:$src), + (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), + (ins MaskRC:$mask, MemOp:$src), + OpcodeStr#Alias, "$src", "$src", (_.VT (OpNode (_Src.VT - (_Src.LdFrag addr:$src))))>, + (_Src.LdFrag addr:$src)))), + (vselect MaskRC:$mask, + (_.VT (OpNode (_Src.VT + (_Src.LdFrag addr:$src)))), + _.RC:$src0), + vselect, "$src0 = $dst">, EVEX, Sched<[sched.Folded]>; - defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins _Src.ScalarMemOp:$src), OpcodeStr, + defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _Src.ScalarMemOp:$src), + (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), + (ins MaskRC:$mask, _Src.ScalarMemOp:$src), + OpcodeStr, "${src}"##Broadcast, "${src}"##Broadcast, (_.VT (OpNode (_Src.VT (X86VBroadcast (_Src.ScalarLdFrag addr:$src))) - ))>, EVEX, EVEX_B, - Sched<[sched.Folded]>; + )), + (vselect MaskRC:$mask, + (_.VT + (OpNode + (_Src.VT + (X86VBroadcast + (_Src.ScalarLdFrag addr:$src))))), + _.RC:$src0), + vselect, "$src0 = $dst">, + EVEX, EVEX_B, Sched<[sched.Folded]>; } // Coversion with SAE - suppress all exceptions multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, @@ -8039,7 +8066,8 @@ multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sc } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info, - X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128; + null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>, + EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround, sched.YMM, "{1to4}", "{y}">, EVEX_V256; @@ -8073,6 +8101,35 @@ let Predicates = [HasVLX] in { (VCVTPS2PDZ128rm addr:$src)>; def : Pat<(v4f64 (extloadv4f32 addr:$src)), (VCVTPS2PDZ256rm addr:$src)>; + + // Special patterns to allow use of X86vmfpround for masking. Instruction + // patterns have been disabled with null_frag. + def : Pat<(X86vfpround (v2f64 VR128X:$src)), + (VCVTPD2PSZ128rr VR128X:$src)>; + def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0), + VK2WM:$mask), + (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; + def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV, + VK2WM:$mask), + (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; + + def : Pat<(X86vfpround (loadv2f64 addr:$src)), + (VCVTPD2PSZ128rm addr:$src)>; + def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0), + VK2WM:$mask), + (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV, + VK2WM:$mask), + (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>; + + def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))), + (VCVTPD2PSZ128rmb addr:$src)>; + def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))), + (v4f32 VR128X:$src0), VK2WM:$mask), + (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))), + v4f32x_info.ImmAllZerosV, VK2WM:$mask), + (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>; } // Convert Signed/Unsigned Doubleword to Double diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index e2746a0e992..1a79ebec620 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -143,6 +143,14 @@ def X86fpextRnd : SDNode<"X86ISD::VFPEXTS_RND", SDTCisSameSizeAs<0, 2>, SDTCisVT<3, i32>]>>; +def X86vmfpround: SDNode<"X86ISD::VMFPROUND", + SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>, + SDTCVecEltisVT<1, f64>, + SDTCisSameSizeAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCVecEltisVT<3, i1>, + SDTCisSameNumEltsAs<1, 3>]>>; + def X86vshiftimm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisVT<2, i8>, SDTCisInt<0>]>; diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 3e260ca42c1..3959e35581f 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -24,7 +24,7 @@ enum IntrinsicType : uint16_t { INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP_IMM8, CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, - CVTPD2PS, CVTPD2PS_MASK, + CVTPD2PS, CVTPD2PS_MASK, CVTPD2PS_RND_MASK, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_3OP_MASK, @@ -461,10 +461,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK, - X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND), - X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, INTR_TYPE_1OP_MASK, - X86ISD::VFPROUND, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_MASK, + X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND), + X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, CVTPD2PS_MASK, + X86ISD::VFPROUND, X86ISD::VMFPROUND), + X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_RND_MASK, ISD::FP_ROUND, X86ISD::VFPROUND_RND), X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTP2SI, 0), |