summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-01-15 23:36:25 +0000
committerCraig Topper <craig.topper@intel.com>2019-01-15 23:36:25 +0000
commitce5167b234b31e4aac577f313dfec5592b5a683a (patch)
tree0ddf8e99eb5890341abb80cba19163ecc57c9869
parent36d962d4960b6c814f747dcd9fe58011170b19d2 (diff)
[X86] Add avx512 scatter intrinsics that use a vXi1 mask instead of a scalar integer.
We're trying to have the vXi1 types in IR as much as possible. This prevents the need for bitcasts when the producer of the mask was already a vXi1 value like an icmp. The bitcasts can be subject to code motion and interfere with basic block at a time isel in bad ways.
-rw-r--r--llvm/include/llvm/IR/IntrinsicsX86.td119
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp8
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h25
-rw-r--r--llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll200
4 files changed, 257 insertions, 95 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 33ae80073ef..b533a852415 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -3569,6 +3569,7 @@ let TargetPrefix = "x86" in {
// Gather and Scatter ops
let TargetPrefix = "x86" in {
+ // NOTE: These are deprecated in favor of the versions that take a vXi1 mask.
def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gathersiv8df">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
@@ -3701,6 +3702,7 @@ let TargetPrefix = "x86" in {
[IntrReadMem, IntrArgMemOnly]>;
// scatter
+ // NOTE: These are deprecated in favor of the versions that take a vXi1 mask.
def int_x86_avx512_scatter_dpd_512 : GCCBuiltin<"__builtin_ia32_scattersiv8df">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty],
@@ -3861,7 +3863,7 @@ let TargetPrefix = "x86" in {
llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
}
-// AVX512 gather intrinsics that use vXi1 masks.
+// AVX512 gather/scatter intrinsics that use vXi1 masks.
let TargetPrefix = "x86" in {
def int_x86_avx512_mask_gather_dpd_512 :
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
@@ -3977,6 +3979,121 @@ let TargetPrefix = "x86" in {
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatter_dpd_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
+ llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+ def int_x86_avx512_mask_scatter_dps_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v16i1_ty,
+ llvm_v16i32_ty, llvm_v16f32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+ def int_x86_avx512_mask_scatter_qpd_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
+ llvm_v8i64_ty, llvm_v8f64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+ def int_x86_avx512_mask_scatter_qps_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
+ llvm_v8i64_ty, llvm_v8f32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+
+ def int_x86_avx512_mask_scatter_dpq_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
+ llvm_v8i32_ty, llvm_v8i64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+ def int_x86_avx512_mask_scatter_dpi_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v16i1_ty,
+ llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+ def int_x86_avx512_mask_scatter_qpq_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,llvm_v8i64_ty, llvm_v8i64_ty,
+ llvm_i32_ty],
+ [IntrArgMemOnly]>;
+ def int_x86_avx512_mask_scatter_qpi_512 :
+ Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i64_ty, llvm_v8i32_ty,
+ llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv2_df :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv2_di :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv4_df :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv4_di :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv4_sf :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv4_si :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv8_sf :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scatterdiv8_si :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv2_df :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i1_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv2_di :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v2i1_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv4_df :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv4_di :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv4_sf :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv4_si :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv8_sf :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
+
+ def int_x86_avx512_mask_scattersiv8_si :
+ Intrinsic<[],
+ [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
+ [IntrArgMemOnly]>;
}
// AVX-512 conflict detection instruction
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 12a7998adcf..1d99f097dc7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22361,9 +22361,13 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
Src.getSimpleValueType().getVectorNumElements());
MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts);
- SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ // We support two versions of the scatter intrinsics. One with scalar mask and
+ // one with vXi1 mask. Convert scalar to vXi1 if necessary.
+ if (Mask.getValueType() != MaskVT)
+ Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
- SDValue Ops[] = {Base, Scale, Index, Disp, Segment, VMask, Src, Chain};
+ SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Src, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
return SDValue(Res, 1);
}
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 73503a86347..151e1b9136c 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -249,6 +249,31 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_512, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dps_512, SCATTER, X86::VSCATTERDPSZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qpd_512, SCATTER, X86::VSCATTERQPDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_df, SCATTER, X86::VSCATTERQPDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_di, SCATTER, X86::VPSCATTERQQZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_df, SCATTER, X86::VSCATTERQPDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_di, SCATTER, X86::VPSCATTERQQZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_sf, SCATTER, X86::VSCATTERQPSZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_si, SCATTER, X86::VPSCATTERQDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_sf, SCATTER, X86::VSCATTERQPSZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_si, SCATTER, X86::VPSCATTERQDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv2_df, SCATTER, X86::VSCATTERDPDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv2_di, SCATTER, X86::VPSCATTERDQZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_df, SCATTER, X86::VSCATTERDPDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_di, SCATTER, X86::VPSCATTERDQZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_sf, SCATTER, X86::VSCATTERDPSZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0),
+
X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
diff --git a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
index c2782cbcaf2..b1c66abd208 100644
--- a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
+++ b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
@@ -1,12 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s
-declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
-declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32)
-
-declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32)
-declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
-
define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) {
; CHECK-LABEL: gather_mask_dps:
; CHECK: ## %bb.0:
@@ -20,7 +14,7 @@ define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8*
%1 = bitcast i16 %mask to <16 x i1>
%x = call <16 x float> @llvm.x86.avx512.mask.gather.dps.512(<16 x float> %src, i8* %base, <16 x i32> %ind, <16 x i1> %1, i32 4)
%ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
- call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.dps.512(i8* %stbuf, <16 x i1> %1, <16 x i32> %ind2, <16 x float> %x, i32 4)
ret void
}
@@ -37,7 +31,7 @@ define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %b
%1 = bitcast i8 %mask to <8 x i1>
%x = call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512(<8 x double> %src, i8* %base, <8 x i32> %ind, <8 x i1> %1, i32 4)
%ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
- call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.dpd.512(i8* %stbuf, <8 x i1> %1, <8 x i32> %ind2, <8 x double> %x, i32 4)
ret void
}
@@ -54,7 +48,7 @@ define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %ba
%1 = bitcast i8 %mask to <8 x i1>
%x = call <8 x float> @llvm.x86.avx512.mask.gather.qps.512(<8 x float> %src, i8* %base, <8 x i64> %ind, <8 x i1> %1, i32 4)
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
- call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.qps.512(i8* %stbuf, <8 x i1> %1, <8 x i64> %ind2, <8 x float> %x, i32 4)
ret void
}
@@ -71,17 +65,12 @@ define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %b
%1 = bitcast i8 %mask to <8 x i1>
%x = call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512(<8 x double> %src, i8* %base, <8 x i64> %ind, <8 x i1> %1, i32 4)
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
- call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.qpd.512(i8* %stbuf, <8 x i1> %1, <8 x i64> %ind2, <8 x double> %x, i32 4)
ret void
}
;;
;; Integer Gather/Scatter
;;
-declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, i16, <16 x i32>, <16 x i32>, i32)
-declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, i8, <8 x i32>, <8 x i64>, i32)
-
-declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32)
-declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) {
; CHECK-LABEL: gather_mask_dd:
@@ -96,7 +85,7 @@ define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %ba
%1 = bitcast i16 %mask to <16 x i1>
%x = call <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512(<16 x i32> %src, i8* %base, <16 x i32> %ind, <16 x i1> %1, i32 4)
%ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
- call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.dpi.512(i8* %stbuf, <16 x i1> %1, <16 x i32> %ind2, <16 x i32> %x, i32 4)
ret void
}
@@ -113,7 +102,7 @@ define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base,
%1 = bitcast i8 %mask to <8 x i1>
%x = call <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512(<8 x i32> %src, i8* %base, <8 x i64> %ind, <8 x i1> %1, i32 4)
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
- call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.qpi.512(i8* %stbuf, <8 x i1> %1, <8 x i64> %ind2, <8 x i32> %x, i32 4)
ret void
}
@@ -130,7 +119,7 @@ define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base,
%1 = bitcast i8 %mask to <8 x i1>
%x = call <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512(<8 x i64> %src, i8* %base, <8 x i64> %ind, <8 x i1> %1, i32 4)
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
- call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.qpq.512(i8* %stbuf, <8 x i1> %1, <8 x i64> %ind2, <8 x i64> %x, i32 4)
ret void
}
@@ -147,7 +136,7 @@ define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base,
%1 = bitcast i8 %mask to <8 x i1>
%x = call <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512(<8 x i64> %src, i8* %base, <8 x i32> %ind, <8 x i1> %1, i32 4)
%ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
- call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.dpq.512(i8* %stbuf, <8 x i1> %1, <8 x i32> %ind2, <8 x i64> %x, i32 4)
ret void
}
@@ -211,8 +200,9 @@ define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8
; CHECK-NEXT: vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
+ %1 = bitcast i8 %mask to <8 x i1>
%x = load <8 x double>, <8 x double>* %src, align 64
- call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.dpd.512(i8* %stbuf, <8 x i1> %1, <8 x i32>%ind, <8 x double> %x, i32 4)
ret void
}
@@ -224,8 +214,9 @@ define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8
; CHECK-NEXT: vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
+ %1 = bitcast i8 %mask to <8 x i1>
%x = load <8 x double>, <8 x double>* %src, align 64
- call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.qpd.512(i8* %stbuf, <8 x i1> %1, <8 x i64>%ind, <8 x double> %x, i32 4)
ret void
}
@@ -237,8 +228,9 @@ define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i1
; CHECK-NEXT: vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
+ %1 = bitcast i16 %mask to <16 x i1>
%x = load <16 x float>, <16 x float>* %src, align 64
- call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.dps.512(i8* %stbuf, <16 x i1> %1, <16 x i32>%ind, <16 x float> %x, i32 4)
ret void
}
@@ -250,8 +242,9 @@ define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %
; CHECK-NEXT: vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
+ %1 = bitcast i8 %mask to <8 x i1>
%x = load <8 x float>, <8 x float>* %src, align 32
- call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.qps.512(i8* %stbuf, <8 x i1> %1, <8 x i64>%ind, <8 x float> %x, i32 4)
ret void
}
@@ -268,7 +261,7 @@ define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf)
; CHECK-NEXT: retq
%x = call <8 x float> @llvm.x86.avx512.mask.gather.qps.512(<8 x float> %src, i8* %base, <8 x i64> %ind, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
- call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4)
+ call void @llvm.x86.avx512.mask.scatter.qps.512(i8* %stbuf, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i64> %ind2, <8 x float> %x, i32 4)
ret void
}
@@ -584,8 +577,6 @@ define <8 x i32> @test_int_x86_avx512_mask_gather3siv8_si(<8 x i32> %x0, i8* %x1
ret <8 x i32> %res2
}
-declare void @llvm.x86.avx512.scatterdiv2.df(i8*, i8, <2 x i64>, <2 x double>, i32)
-
define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df:
; CHECK: ## %bb.0:
@@ -594,13 +585,13 @@ define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <
; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2}
; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ call void @llvm.x86.avx512.mask.scatterdiv2.df(i8* %x0, <2 x i1> <i1 true, i1 true>, <2 x i64> %x2, <2 x double> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv2.df(i8* %x0, <2 x i1> %2, <2 x i64> %x2, <2 x double> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scatterdiv2.di(i8*, i8, <2 x i64>, <2 x i64>, i32)
-
define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di:
; CHECK: ## %bb.0:
@@ -609,13 +600,13 @@ define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ call void @llvm.x86.avx512.mask.scatterdiv2.di(i8* %x0, <2 x i1> %2, <2 x i64> %x2, <2 x i64> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv2.di(i8* %x0, <2 x i1> <i1 true, i1 true>, <2 x i64> %x2, <2 x i64> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scatterdiv4.df(i8*, i8, <4 x i64>, <4 x double>, i32)
-
define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df:
; CHECK: ## %bb.0:
@@ -625,13 +616,13 @@ define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <
; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scatterdiv4.df(i8* %x0, <4 x i1> %2, <4 x i64> %x2, <4 x double> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv4.df(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> %x2, <4 x double> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scatterdiv4.di(i8*, i8, <4 x i64>, <4 x i64>, i32)
-
define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di:
; CHECK: ## %bb.0:
@@ -641,13 +632,13 @@ define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <
; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scatterdiv4.di(i8* %x0, <4 x i1> %2, <4 x i64> %x2, <4 x i64> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv4.di(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> %x2, <4 x i64> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scatterdiv4.sf(i8*, i8, <2 x i64>, <4 x float>, i32)
-
define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf:
; CHECK: ## %bb.0:
@@ -656,13 +647,13 @@ define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ call void @llvm.x86.avx512.mask.scatterdiv4.sf(i8* %x0, <2 x i1> %2, <2 x i64> %x2, <4 x float> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv4.sf(i8* %x0, <2 x i1> <i1 true, i1 true>, <2 x i64> %x2, <4 x float> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scatterdiv4.si(i8*, i8, <2 x i64>, <4 x i32>, i32)
-
define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si:
; CHECK: ## %bb.0:
@@ -671,13 +662,13 @@ define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2}
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ call void @llvm.x86.avx512.mask.scatterdiv4.si(i8* %x0, <2 x i1> <i1 true, i1 true>, <2 x i64> %x2, <4 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv4.si(i8* %x0, <2 x i1> %2, <2 x i64> %x2, <4 x i32> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scatterdiv8.sf(i8*, i8, <4 x i64>, <4 x float>, i32)
-
define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf:
; CHECK: ## %bb.0:
@@ -687,13 +678,13 @@ define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scatterdiv8.sf(i8* %x0, <4 x i1> %2, <4 x i64> %x2, <4 x float> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv8.sf(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> %x2, <4 x float> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scatterdiv8.si(i8*, i8, <4 x i64>, <4 x i32>, i32)
-
define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si:
; CHECK: ## %bb.0:
@@ -703,13 +694,13 @@ define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 2)
- call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scatterdiv8.si(i8* %x0, <4 x i1> %2, <4 x i64> %x2, <4 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scatterdiv8.si(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> %x2, <4 x i32> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv2.df(i8*, i8, <4 x i32>, <2 x double>, i32)
-
define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df:
; CHECK: ## %bb.0:
@@ -718,13 +709,13 @@ define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <
; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2}
; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ call void @llvm.x86.avx512.mask.scattersiv2.df(i8* %x0, <2 x i1> <i1 true, i1 true>, <4 x i32> %x2, <2 x double> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv2.df(i8* %x0, <2 x i1> %2, <4 x i32> %x2, <2 x double> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv2.di(i8*, i8, <4 x i32>, <2 x i64>, i32)
-
define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di:
; CHECK: ## %bb.0:
@@ -733,13 +724,13 @@ define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <
; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2}
; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ call void @llvm.x86.avx512.mask.scattersiv2.di(i8* %x0, <2 x i1> <i1 true, i1 true>, <4 x i32> %x2, <2 x i64> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv2.di(i8* %x0, <2 x i1> %2, <4 x i32> %x2, <2 x i64> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv4.df(i8*, i8, <4 x i32>, <4 x double>, i32)
-
define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df:
; CHECK: ## %bb.0:
@@ -749,13 +740,13 @@ define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <
; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scattersiv4.df(i8* %x0, <4 x i1> %2, <4 x i32> %x2, <4 x double> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv4.df(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %x2, <4 x double> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv4.di(i8*, i8, <4 x i32>, <4 x i64>, i32)
-
define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di:
; CHECK: ## %bb.0:
@@ -765,13 +756,13 @@ define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <
; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scattersiv4.di(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %x2, <4 x i64> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv4.di(i8* %x0, <4 x i1> %2, <4 x i32> %x2, <4 x i64> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv4.sf(i8*, i8, <4 x i32>, <4 x float>, i32)
-
define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf:
; CHECK: ## %bb.0:
@@ -780,13 +771,13 @@ define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scattersiv4.sf(i8* %x0, <4 x i1> %2, <4 x i32> %x2, <4 x float> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv4.sf(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %x2, <4 x float> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv4.si(i8*, i8, <4 x i32>, <4 x i32>, i32)
-
define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si:
; CHECK: ## %bb.0:
@@ -795,13 +786,13 @@ define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.x86.avx512.mask.scattersiv4.si(i8* %x0, <4 x i1> %2, <4 x i32> %x2, <4 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv4.si(i8* %x0, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %x2, <4 x i32> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv8.sf(i8*, i8, <8 x i32>, <8 x float>, i32)
-
define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf:
; CHECK: ## %bb.0:
@@ -811,13 +802,12 @@ define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <
; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ call void @llvm.x86.avx512.mask.scattersiv8.sf(i8* %x0, <8 x i1> %1, <8 x i32> %x2, <8 x float> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv8.sf(i8* %x0, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> %x2, <8 x float> %x3, i32 4)
ret void
}
-declare void @llvm.x86.avx512.scattersiv8.si(i8*, i8, <8 x i32>, <8 x i32>, i32)
-
define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si:
; CHECK: ## %bb.0:
@@ -827,8 +817,9 @@ define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4)
+ %1 = bitcast i8 %x1 to <8 x i1>
+ call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> %1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> %x2, <8 x i32> %x3, i32 4)
ret void
}
@@ -847,10 +838,10 @@ define void @scatter_mask_test(i8* %x0, <8 x i32> %x2, <8 x i32> %x3) {
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 0, <8 x i32> %x2, <8 x i32> %x3, i32 4)
- call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
- call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 96, <8 x i32> %x2, <8 x i32> %x3, i32 4)
+ call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> %x2, <8 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> zeroinitializer, <8 x i32> %x2, <8 x i32> %x3, i32 4)
+ call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> bitcast (<1 x i8> <i8 1> to <8 x i1>), <8 x i32> %x2, <8 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.mask.scattersiv8.si(i8* %x0, <8 x i1> bitcast (<1 x i8> <i8 96> to <8 x i1>), <8 x i32> %x2, <8 x i32> %x3, i32 4)
ret void
}
@@ -908,3 +899,28 @@ declare <4 x float> @llvm.x86.avx512.mask.gather3siv4.sf(<4 x float>, i8*, <4 x
declare <4 x i32> @llvm.x86.avx512.mask.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, <4 x i1>, i32)
declare <8 x float> @llvm.x86.avx512.mask.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.x86.avx512.mask.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, <8 x i1>, i32)
+declare void @llvm.x86.avx512.mask.scatter.dps.512(i8*, <16 x i1>, <16 x i32>, <16 x float>, i32)
+declare void @llvm.x86.avx512.mask.scatter.dpd.512(i8*, <8 x i1>, <8 x i32>, <8 x double>, i32)
+declare void @llvm.x86.avx512.mask.scatter.qps.512(i8*, <8 x i1>, <8 x i64>, <8 x float>, i32)
+declare void @llvm.x86.avx512.mask.scatter.qpd.512(i8*, <8 x i1>, <8 x i64>, <8 x double>, i32)
+declare void @llvm.x86.avx512.mask.scatter.dpi.512(i8*, <16 x i1>, <16 x i32>, <16 x i32>, i32)
+declare void @llvm.x86.avx512.mask.scatter.dpq.512(i8*, <8 x i1>, <8 x i32>, <8 x i64>, i32)
+declare void @llvm.x86.avx512.mask.scatter.qpi.512(i8*, <8 x i1>, <8 x i64>, <8 x i32>, i32)
+declare void @llvm.x86.avx512.mask.scatter.qpq.512(i8*, <8 x i1>, <8 x i64>, <8 x i64>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv2.df(i8*, <2 x i1>, <2 x i64>, <2 x double>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv2.di(i8*, <2 x i1>, <2 x i64>, <2 x i64>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv4.df(i8*, <4 x i1>, <4 x i64>, <4 x double>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv4.di(i8*, <4 x i1>, <4 x i64>, <4 x i64>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv4.sf(i8*, <2 x i1>, <2 x i64>, <4 x float>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv4.si(i8*, <2 x i1>, <2 x i64>, <4 x i32>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv8.sf(i8*, <4 x i1>, <4 x i64>, <4 x float>, i32)
+declare void @llvm.x86.avx512.mask.scatterdiv8.si(i8*, <4 x i1>, <4 x i64>, <4 x i32>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv2.df(i8*, <2 x i1>, <4 x i32>, <2 x double>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv2.di(i8*, <2 x i1>, <4 x i32>, <2 x i64>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv4.df(i8*, <4 x i1>, <4 x i32>, <4 x double>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv4.di(i8*, <4 x i1>, <4 x i32>, <4 x i64>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv4.sf(i8*, <4 x i1>, <4 x i32>, <4 x float>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv4.si(i8*, <4 x i1>, <4 x i32>, <4 x i32>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv8.sf(i8*, <8 x i1>, <8 x i32>, <8 x float>, i32)
+declare void @llvm.x86.avx512.mask.scattersiv8.si(i8*, <8 x i1>, <8 x i32>, <8 x i32>, i32)
+