aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/avx512f_description.txt214
-rw-r--r--gcc/common/config/i386/i386-common.c87
-rw-r--r--gcc/config.gcc16
-rw-r--r--gcc/config/i386/avx512cdintrin.h185
-rw-r--r--gcc/config/i386/avx512erintrin.h352
-rw-r--r--gcc/config/i386/avx512fintrin.h15334
-rw-r--r--gcc/config/i386/avx512pfintrin.h130
-rw-r--r--gcc/config/i386/constraints.md15
-rw-r--r--gcc/config/i386/cpuid.h5
-rw-r--r--gcc/config/i386/driver-i386.c17
-rw-r--r--gcc/config/i386/i386-builtin-types.def259
-rw-r--r--gcc/config/i386/i386-c.c10
-rw-r--r--gcc/config/i386/i386-modes.def3
-rw-r--r--gcc/config/i386/i386.c2848
-rw-r--r--gcc/config/i386/i386.h159
-rw-r--r--gcc/config/i386/i386.md374
-rw-r--r--gcc/config/i386/i386.opt20
-rw-r--r--gcc/config/i386/immintrin.h10
-rw-r--r--gcc/config/i386/mmx.md4
-rw-r--r--gcc/config/i386/predicates.md65
-rw-r--r--gcc/config/i386/shaintrin.h99
-rw-r--r--gcc/config/i386/sse.md5334
-rw-r--r--gcc/config/i386/subst.md222
-rw-r--r--gcc/doc/invoke.texi23
-rw-r--r--gcc/doc/rtl.texi4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-1.c196
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512cd-check.h46
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmb2q-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmb2q-2.c36
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmw2d-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmw2d-2.c36
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512cd-vpconflictd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512cd-vpconflictd-2.c57
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512cd-vpconflictq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512cd-vpconflictq-2.c57
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512cd-vplzcntd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512cd-vplzcntd-2.c57
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512cd-vplzcntq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512cd-vplzcntq-2.c57
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512er-check.h46
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-2.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-2.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-2.c47
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-2.c47
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-2.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-2.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-broadcast-gpr-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-broadcast-gpr-2.c29
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-ceil-sfix-vec-1.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-ceil-sfix-vec-2.c7
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-check.h47
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-dummy.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-floor-sfix-vec-1.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-floor-sfix-vec-2.c7
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-gather-1.c217
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-gather-2.c11
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-gather-3.c169
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-gather-4.c38
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-gather-5.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-helper.h96
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32gatherps512-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32gatherps512-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32gatherq512-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32gatherq512-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32scatterd512-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32scatterd512-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32scatterpd512-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32scatterpd512-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32scatterps512-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32scatterps512-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32scatterq512-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i32scatterq512-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64gatherd512-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64gatherd512-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64gatherpd512-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64gatherpd512-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64gatherps512-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64gatherps512-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64gatherq512-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64gatherq512-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64scatterd512-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64scatterd512-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64scatterpd512-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64scatterpd512-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64scatterps512-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64scatterps512-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64scatterq512-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-i64scatterq512-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-inline-asm.c68
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-kandnw-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-kandw-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-klogic-2.c58
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-knotw-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-kortestw-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-kortestw-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-korw-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-kxnorw-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-kxorw-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-mask-type.h8
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-os-support.h10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-rounding.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-1.c45
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-3.c45
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-4.c119
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-5.c119
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v16si-1.c47
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v16si-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v16si-3.c45
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v16si-4.c119
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v16si-5.c119
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v8df-1.c38
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v8df-2.c40
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v8df-3.c43
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v8df-4.c87
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v8df-5.c87
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v8di-1.c39
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v8di-2.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v8di-3.c43
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v8di-4.c87
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-set-v8di-5.c87
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-setzero-pd-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-setzero-ps-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-setzero-si512-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vaddpd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vaddpd-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vaddps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vaddps-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vaddsd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vaddsd-2.c39
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vaddss-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vaddss-2.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-valignd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-valignd-2.c59
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-valignq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-valignq-2.c59
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vblendmpd-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vblendmpd-2.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vblendmps-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vblendmps-2.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf32x4-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf32x4-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf64x4-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf64x4-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti32x4-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti32x4-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti64x4-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti64x4-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vbroadcastsd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vbroadcastsd-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vbroadcastss-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vbroadcastss-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c73
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c77
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcmpsd-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcmpsd-2.c67
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcmpss-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcmpss-2.c68
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcomisd-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcomiss-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcompresspd-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcompresspd-2.c60
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcompressps-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcompressps-2.c60
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2pd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2pd-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2ps-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2ps-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2dq-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2dq-2.c57
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2ps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2ps-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2udq-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2udq-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtph2ps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtph2ps-2.c82
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtps2dq-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtps2dq-2.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtps2pd-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtps2pd-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-2.c82
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtps2udq-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtps2udq-2.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2si-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2si64-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2ss-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2ss-2.c45
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi-2.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi64-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi64-2.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtsi2sd64-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtsi2ss-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtsi2ss64-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtss2sd-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtss2sd-2.c43
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtss2si-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtss2si64-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi-2.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi64-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi64-2.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2dq-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2dq-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2udq-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2udq-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttps2dq-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttps2dq-2.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttps2udq-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttps2udq-2.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si-2.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si64-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si64-2.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi-2.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi64-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi64-2.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si-2.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si64-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si64-2.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi-2.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi64-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi64-2.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2pd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2pd-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2ps-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2ps-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd-2.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd64-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd64-2.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss-2.c33
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss64-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss64-2.c33
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vdivpd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vdivpd-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vdivps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vdivps-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vdivsd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vdivsd-2.c39
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vdivss-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vdivss-2.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vec-init.c140
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vec-unpack.c127
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vexpandpd-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vexpandpd-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vexpandps-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vexpandps-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vextractf32x4-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vextractf32x4-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vextractf64x4-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vextractf64x4-2.c65
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vextracti32x4-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vextracti32x4-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vextracti64x4-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vextracti64x4-2.c62
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfixupimmpd-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfixupimmpd-2.c113
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfixupimmps-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfixupimmps-2.c119
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfixupimmsd-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfixupimmsd-2.c118
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfixupimmss-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfixupimmss-2.c119
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXpd-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXpd-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXps-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXps-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXpd-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXpd-2.c67
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXps-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXps-2.c67
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXpd-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXpd-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXps-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXps-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXpd-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXpd-2.c67
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXps-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXps-2.c67
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXpd-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXpd-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXps-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXps-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXpd-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXpd-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXps-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXps-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetexppd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetexppd-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetexpps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetexpps-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetmantpd-1.c33
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetmantpd-2.c110
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetmantps-1.c33
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetmantps-2.c110
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-1.c33
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-2.c100
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-1.c33
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-2.c104
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vinsertf32x4-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vinsertf32x4-2.c57
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vinsertf64x4-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vinsertf64x4-2.c65
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vinserti32x4-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vinserti32x4-2.c57
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vinserti64x4-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vinserti64x4-2.c65
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmaxpd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmaxpd-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmaxps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmaxps-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-2.c45
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmaxss-2.c46
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vminpd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vminpd-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vminps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vminps-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vminsd-2.c45
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vminss-2.c46
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovapd-2.c69
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovaps-2.c69
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovddup-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovddup-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c29
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-2.c78
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-2.c69
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-2.c60
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-2.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovntdq-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovntdq-2.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovntpd-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovntpd-2.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovntps-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovntps-2.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovsd-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovsd-2.c87
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovshdup-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovshdup-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovsldup-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovsldup-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovss-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovss-2.c91
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovupd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovupd-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovups-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmovups-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmulpd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmulpd-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmulps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmulps-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmulsd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmulsd-2.c39
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmulss-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vmulss-2.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpabsd-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpabsd512-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpabsq-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpabsq512-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpaddd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpaddd-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpaddq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpaddq-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpandd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpandd-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpandnd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpandnd-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpandnq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpandnq-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpandq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpandq-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpblendmd-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpblendmd-2.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpblendmq-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpblendmq-2.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastd-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastd-2.c70
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastq-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastq-2.c70
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqd-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqd-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqq-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqq-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtd-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtd-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtq-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtq-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c46
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c46
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcompressd-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcompressd-2.c60
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcompressq-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcompressq-2.c60
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermi2d-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermi2d-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermi2pd-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermi2pd-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermi2ps-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermi2ps-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermi2q-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermi2q-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermilpd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermilpd-2.c58
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermilpdi-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermilpdi-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermilps-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermilps-2.c58
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermilpsi-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermilpsi-2.c80
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermpd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermpd-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermpdi-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermpdi-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermps-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermps-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c57
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpexpandd-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpexpandd-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpexpandq-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpexpandq-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmaxsd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmaxsd-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmaxsq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmaxsq-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmaxud-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmaxud-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmaxuq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmaxuq-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpminsd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpminsd-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpminsq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpminsq-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpminud-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpminud-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpminuq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpminuq-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovdb-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovdb-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovdw-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovdw-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovqb-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovqb-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovqd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovqd-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovqw-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovqw-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsdb-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsdb-2.c59
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsdw-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsdw-2.c60
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsqb-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsqb-2.c59
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsqd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsqd-2.c60
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsqw-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsqw-2.c59
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbd-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbq-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbq-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsxdq-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsxdq-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwd-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwq-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwq-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovusdb-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovusdb-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovusdw-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovusdw-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovusqb-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovusqb-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovusqd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovusqd-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovusqw-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovusqw-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbd-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbq-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbq-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovzxdq-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovzxdq-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwd-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwq-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwq-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmuldq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmuldq-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmulld-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmulld-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmuludq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmuludq-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpord-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpord-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vporq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vporq-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprold-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprold-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprolq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprolq-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprolvd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprolvd-2.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprolvq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprolvq-2.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprord-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprord-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprorq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprorq-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprorvd-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprorvd-2.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprorvq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vprorvq-2.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpshufd-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpshufd-2.c57
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpslld-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpslld-2.c60
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpslldi-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpslldi-2.c74
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsllq-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsllq-2.c60
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsllqi-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsllqi-2.c74
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsllvd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsllvd-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsllvq-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsllvq-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsllvq512-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsllvq512-2.c47
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrad-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrad-2.c62
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsradi-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsradi-2.c76
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsraq-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsraq-2.c63
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsraqi-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsraqi-2.c78
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsravd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsravd-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsravq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsravq-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsravq512-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsravq512-2.c47
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrld-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrld-2.c58
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrldi-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrldi-2.c74
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrlq-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrlq-2.c58
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrlqi-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrlqi-2.c75
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrlvd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrlvd-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq512-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq512-2.c47
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsubd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsubd-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsubq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpsubq-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-2.c69
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpternlogq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpternlogq-2.c71
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vptestmd-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vptestmd-2.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vptestmq-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vptestmq-2.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vptestnmd-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vptestnmd-2.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vptestnmq-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vptestnmq-2.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpunpckhdq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpunpckhdq-2.c57
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpunpckhqdq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpunpckhqdq-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpunpckldq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpunpckldq-2.c57
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpunpcklqdq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpunpcklqdq-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpxord-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpxord-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpxorq-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpxorq-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrcp14pd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrcp14pd-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrcp14ps-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrcp14ps-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c43
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c45
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrndscalepd-1.c35
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrndscalepd-2.c94
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrndscaleps-1.c35
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrndscaleps-2.c92
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-2.c63
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-2.c65
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14pd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14pd-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ps-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ps-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c44
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c46
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefps-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshuff32x4-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshuff32x4-2.c66
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshuff64x2-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshuff64x2-2.c66
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshufi32x4-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshufi32x4-2.c66
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshufi64x2-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshufi64x2-2.c66
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshufpd-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshufpd-2.c59
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshufps-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshufps-2.c72
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsqrtpd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsqrtpd-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsqrtps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsqrtps-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-2.c40
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-2.c42
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsubpd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsubpd-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsubps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsubps-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsubsd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsubsd-2.c39
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsubss-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vsubss-2.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vucomisd-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vucomiss-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vunpckhpd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vunpckhpd-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vunpckhps-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vunpckhps-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vunpcklpd-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vunpcklpd-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vunpcklps-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vunpcklps-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f_cond_move.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f_evex_reg_asm-1.c9
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f_evex_reg_asm-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dps-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qps-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dps-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qps-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/i386.exp54
-rw-r--r--gcc/testsuite/gcc.target/i386/m128-check.h23
-rw-r--r--gcc/testsuite/gcc.target/i386/m512-check.h73
-rw-r--r--gcc/testsuite/gcc.target/i386/sha-check.h37
-rw-r--r--gcc/testsuite/gcc.target/i386/sha1msg1-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/sha1msg1-2.c42
-rw-r--r--gcc/testsuite/gcc.target/i386/sha1msg2-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/sha1msg2-2.c44
-rw-r--r--gcc/testsuite/gcc.target/i386/sha1nexte-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/sha1nexte-2.c36
-rw-r--r--gcc/testsuite/gcc.target/i386/sha1rnds4-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/sha1rnds4-2.c93
-rw-r--r--gcc/testsuite/gcc.target/i386/sha256msg1-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/sha256msg1-2.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/sha256msg2-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/sha256msg2-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/sha256rnds2-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/sha256rnds2-2.c85
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-12.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-13.c202
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-14.c514
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-22.c521
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-23.c202
-rw-r--r--gcc/testsuite/gcc.target/i386/testimm-10.c200
-rw-r--r--gcc/testsuite/gcc.target/i386/testround-1.c653
-rw-r--r--gcc/testsuite/gcc.target/i386/testround-2.c57
-rw-r--r--gcc/testsuite/gcc.target/x86_64/abi/avx512f/abi-avx512f.exp61
-rw-r--r--gcc/testsuite/gcc.target/x86_64/abi/avx512f/args.h184
-rw-r--r--gcc/testsuite/gcc.target/x86_64/abi/avx512f/asm-support.S98
-rw-r--r--gcc/testsuite/gcc.target/x86_64/abi/avx512f/avx512f-check.h41
-rw-r--r--gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_m512_returning.c32
-rw-r--r--gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_passing_m512.c168
-rw-r--r--gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_passing_structs.c64
-rw-r--r--gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_passing_unions.c176
-rw-r--r--gcc/testsuite/lib/target-supports.exp13
-rw-r--r--gcc/tree-vect-stmts.c34
-rw-r--r--gcc/tree-vectorizer.h4
768 files changed, 55752 insertions, 951 deletions
diff --git a/gcc/avx512f_description.txt b/gcc/avx512f_description.txt
new file mode 100644
index 00000000000..40c423668fa
--- /dev/null
+++ b/gcc/avx512f_description.txt
@@ -0,0 +1,214 @@
+PART 1a & 1b - Removed.
+
+================================================================================
+PART 2. Adjust register classes.
+
+This patch adds comand-line options for avx512f use and relevant cpuid bits
+detection. Vector registers are now 512-bit wide, so support for new modes
+(e.g. V16SF) is added. AVX512F introduve new 16 registers zmm16-zmm31. Some
+instruction now have EVEX encoding and can now use those new registers while
+old instuctions can't. We introduce new register class for them. We also add
+new constraint "v" which allows zmm0-zmm31. We can't extend "x" constraint
+because it's exposed in inline asm, and so may break some inline asm if we
+assign e. g. xmm21 to non-evex encodable instruction. Idea is to replace all
+uses of "x" for evex-encodable instructions with "v". And allow only scalar and
+512-bit modes for registers 16+ in ix86_hard_regno_mode_ok. We update move
+instructions to use evex-encodable versions of instructions for AVX512F to
+allow usage of new registers. Main problem is with vector mov<mode>_internal
+in sse.md. In AVX512F we have some instructions reading/writing e. g. ymm16+
+(for exmape vinsert64x4/vextract64x4),but there in no ymm mov instruction
+with evex encoding, so we have to use insert/extract instead.
+
+
+================================================================================
+PART 3. Add mask registers.
+
+In this patch we add support for new mask register k0-k7. Changes are mostly
+strightforward, but there are two problems. First we can't use k0 as mask in
+vector instructions, so we have introduce two register classe. One for use in
+vector instruction with "k" constraint - corresponding to k1-k7. And one for
+instruction like kxor which can use all new mask registers. Another problem is
+that we have only 16-bit kmovw, but both 8 and 16 bit wide uses of masks. So
+we don't have memory alternatives in movqi_internal, and hope that register
+allocator will figure out that we need move throught GPR.
+
+
+================================================================================
+PART 4. AVX512F patterns patch
+
+This patch contains pattern changes for AVX512F ISA. Support of some AVX512F
+instructions is added by extending existing SSE/AVX/AVX2 patterns and also
+there're some new patterns. Extending existing patterns involves supporitng code
+changes and/or iterators extension. In our implementation, we tried to avoid
+unneeded iterators; instead we tried to extend existing ones where it's
+possible. Note if some general iterator like VF isn't extended to 512 bits then
+it actually means it is used in patterns that don't have 512-bit widening.
+
+Changes in i386.md add some code attrs and iterators we used to merge patterns.
+Maybe later we should move them to sse.md.
+
+Changes in i386.c are related to pattern merging too. Changes in
+ix86_expand_vector_move_misalign are analogous to
+(TARGET_AVX && GET_MODE_SIZE (mode) == 32) branch; just there's no seperate
+fuction for MODE_VECTOR_FLOAT case.
+
+
+================================================================================
+PART 5. Description for substs.
+
+MASKING
+
+For each insn we wanted to add its original variant, extended to 512 bit, its
+masked variant, unmasked variant with rounding and masked variant with rounding.
+Later we.d probably add variants with and without embedded broadcasting. To do
+that, we introduced define_subst and used it in the following way:
+The most of insn patterns are something like
+(set
+ (match_operand /*dest*/ )
+ (match_operand /*operation*/)),
+so we made next subst for them:
+(define_subst "mask"
+ [(set (match_operand:SUBST_V 0)
+ (match_operand:SUBST_V 1))]
+ "TARGET_AVX512F"
+ [(set (match_dup 0)
+ (vec_merge:SUBST_V
+ (match_dup 1)
+ (match_operand:SUBST_V 2 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k")))])
+
+But that subst wasn.t enough to cover all patterns for instructions with
+masking: we needed 3 more subst for masking. Those are:
+* mask_scalar,
+* mask_scalar_merge,
+* sd
+
+mask_scalar is used for scalar instructions, in which we need to merge not with
+destination but with another source operand. Example of such instruction is
+vsqrtss. We can.t use usual mask-subst here, as we need to firstly do vec_merge
+for masking, and only then take the lowest element with the second vec_merge .
+so the order of vec_merges matters here. So, we specify the pattern in more
+details for this subst:
+(set
+ (match_operand /*dest*/)
+ (vec_merge
+ (match_operand /*operation*/)
+ (match_operand /*src operand from which we take upper bits*/)
+ (const_int 1)))
+We want to add our vec_merge (for masking) upon the operation, not on the
+exsisting vec_merge
+
+The next subst is mask_scalar_merge, it is used for new cmp-instructions, which
+compares vectors and stores the result in a mask. If the operation is masked
+itself, we need to AND the result-mask of comparison with the input mask - and
+that's what the subst does:
+(define_subst "mask_scalar_merge"
+ [(set (match_operand:SUBST_S 0)
+ (match_operand:SUBST_S 1))]
+ "TARGET_AVX512F"
+ [(set (match_dup 0)
+ (and:SUBST_S
+ (match_dup 1)
+ (match_operand:SUBST_S 3 "register_operand" "k")))])
+
+The last subst is called sd (Source-Destination). It's is almost the same, as
+the usual mask-subst, but it's only used for zero-masking. The reason is that
+some patterns already have an operand with constraint "0" and we can't add a new
+operand with the same constraint. So we add only zero-masking here by subst and
+manually write a pattern for merge-masking where we use match_dup instead of an
+operand with constraint "0":
+(define_insn "avx512f_fmadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=T,T")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (match_operand:VF_512 1 "register_operand" "0,0")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,T")
+ (match_operand:VF_512 3 "nonimmediate_operand" "T,<round_constraint>"))
+ (match_dup 1) <<<<<------ Operand for merge
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+ "@
+ vfmadd132
+ ..."
+ [(...)])
+Examples of such instruction are FMA-insns and some permutes.
+
+We also added set of subst-attributes: they are used to:
+1) modify name of insn-pattern:
+(define_subst_attr "mask_name" "mask" "" "_mask")
+2) properly add masking operands
+(define_subst_attr "mask_operand3" "mask" "" "%M4%N3")
+3) adjust operand's constraints
+(define_subst_attr "store_mask_constraint" "mask" "Tm" "T")
+4) hide unmasked version of pattern with '*'
+(define_subst_attr "mask_codefor" "mask" "*" "")
+
+It's not possible to share subst-attrs across different substs, so we created
+such set of subst-attrs for each subst.
+
+ROUNDING
+Rounding is implemented by adding a const_int value in parallel with the
+original pattern - however, that might be not the best approach. define_subst
+for this transformation are quite straightforward - we needed three of them to
+cover all patterns we need to transform. The most interesting part here is how
+to determine which operand corresponds to the rounding immediate. To somehow
+reflect this in the pattern, we allowed using attributes in other attributes,
+i.e. attributes nesting (that's already in the trunk). The problem here arises
+from the next: when subst for rounding is applied to a pattern after subst for
+masking, it's actually applied to two patterns with different number of
+operands. To find that number, we wrote next attributes: (define_subst_attr
+"round_mask_operand2" "mask" "%R2" "%R4") (define_subst_attr "round_mask_op2"
+"round" "" "<round_mask_operand2>") In result, we get either empty string (if
+rounding-subst isn't applied), or "%R2" or "%R4" depending on whether mask-subst
+was applied.
+
+There are other similar attributes for different combinations of round- and
+mask- substs.
+
+
+================================================================================
+PART 7. Add builtins.
+
+New builtins were added to the bdesc_args and bdesc_round_args tables. For
+special cases we created new expanders: ix86_expand_round_builtin and
+ix86_erase_embedded_rounding for instructions with rounding,
+ix86_expand_sse_comi_round for comi instructions. In
+ix86_expand_special_args_builtin we supported kortest instruction, and new
+GATHERs in the subroutine gather_gen. Also scatter_gen for SCATTERs was created
+analogous to gather_gen.
+
+
+================================================================================
+PART 8. Testsuite approach description
+
+While implementing testsuite we were strongly connected to the fact that we
+don't want more then 2 test files per each instruction - a scan assembler test
+and a runtime test.
+
+Consider that in general case for most new instuctions we have a simple
+intrinsic, an intrinsic with merge masking and an intrinsic with zero masking -
+and we need to have scan tests and runtimes test for them all. Also, there may
+be rounding support, i.e. an intrinsic with rounding. For this case we only
+have scan tests and do not have runtime tests because it's unclear how to
+implement a runtime test in this case.
+
+Firstly, scan tests (avx512f-<insn>-1.c). Each test should aggregate all
+intrinsics that generate appropriate instruction <insn>. I.e. simple intrinsic,
+merge masking, zero masking, rounding intrinsics and maybe some aliases that
+worth testing. Tests are written in exactly the same manner as AVX2 scan tests.
+See avx2-*-1.c for reference.
+
+Secondly, runtime tests (avx512f-<insn>-2.c). Basically, the approach was the
+same for AVX2 runtime tests - call an intrinsic with some pre-initialized source
+and destination and check if results meet expectation - except that we have 3-4
+intrinsics with the same semantics. To avoid lots of duplicate code, we use
+macros in runtime tests. Macros are defined in avx512f-helper.h, and every
+runtime test includes this file. Also, avx512f-helper.h contains definition of
+core testing function - avx512f_test. Note that some macros are defined in
+dg-options. This machinery may seem redundand for now, but it will be extremely
+useful for future extensions. There're also some stand-alone AVX512F runtime
+tests that are implemented without our macros machinery just like AVX2 tests.
+
+Finally, we have updated avx-1.c, sse-*.c, testimm-*.c tests with new intrinsics
+and builtins. To check messaging for intrinsics with rouning, we have added
+testround-*.c tests.
diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index b73e369bb32..12aa32460ad 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -57,6 +57,14 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA_XSAVE_SET OPTION_MASK_ISA_XSAVE
#define OPTION_MASK_ISA_XSAVEOPT_SET \
(OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_XSAVE)
+#define OPTION_MASK_ISA_AVX512F_SET \
+ (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX2_SET)
+#define OPTION_MASK_ISA_AVX512CD_SET \
+ (OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512PF_SET \
+ (OPTION_MASK_ISA_AVX512PF | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512ER_SET \
+ (OPTION_MASK_ISA_AVX512ER | OPTION_MASK_ISA_AVX512F_SET)
#define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_SET OPTION_MASK_ISA_RDSEED
@@ -76,9 +84,11 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA_LWP_SET \
OPTION_MASK_ISA_LWP
-/* AES and PCLMUL need SSE2 because they use xmm registers */
+/* AES, SHA and PCLMUL need SSE2 because they use xmm registers. */
#define OPTION_MASK_ISA_AES_SET \
(OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
+#define OPTION_MASK_ISA_SHA_SET \
+ (OPTION_MASK_ISA_SHA | OPTION_MASK_ISA_SSE2_SET)
#define OPTION_MASK_ISA_PCLMUL_SET \
(OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
@@ -128,11 +138,18 @@ along with GCC; see the file COPYING3. If not see
| OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET \
| OPTION_MASK_ISA_AVX2_UNSET | OPTION_MASK_ISA_XSAVE_UNSET)
#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
-#define OPTION_MASK_ISA_AVX2_UNSET OPTION_MASK_ISA_AVX2
#define OPTION_MASK_ISA_FXSR_UNSET OPTION_MASK_ISA_FXSR
#define OPTION_MASK_ISA_XSAVE_UNSET \
(OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_XSAVEOPT_UNSET)
#define OPTION_MASK_ISA_XSAVEOPT_UNSET OPTION_MASK_ISA_XSAVEOPT
+#define OPTION_MASK_ISA_AVX2_UNSET \
+ (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
+#define OPTION_MASK_ISA_AVX512F_UNSET \
+ (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \
+ | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET)
+#define OPTION_MASK_ISA_AVX512CD_UNSET OPTION_MASK_ISA_AVX512CD
+#define OPTION_MASK_ISA_AVX512PF_UNSET OPTION_MASK_ISA_AVX512PF
+#define OPTION_MASK_ISA_AVX512ER_UNSET OPTION_MASK_ISA_AVX512ER
#define OPTION_MASK_ISA_RTM_UNSET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_UNSET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_UNSET OPTION_MASK_ISA_RDSEED
@@ -151,6 +168,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
+#define OPTION_MASK_ISA_SHA_UNSET OPTION_MASK_ISA_SHA
#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
#define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
@@ -313,6 +331,58 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
+ case OPT_mavx512f:
+ if (value)
+ {
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512F_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512F_UNSET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512F_UNSET;
+ }
+ return true;
+
+ case OPT_mavx512cd:
+ if (value)
+ {
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512CD_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512CD_UNSET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512CD_UNSET;
+ }
+ return true;
+
+ case OPT_mavx512pf:
+ if (value)
+ {
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512PF_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512PF_UNSET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512PF_UNSET;
+ }
+ return true;
+
+ case OPT_mavx512er:
+ if (value)
+ {
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512ER_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512ER_UNSET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512ER_UNSET;
+ }
+ return true;
+
case OPT_mfma:
if (value)
{
@@ -544,6 +614,19 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
+ case OPT_msha:
+ if (value)
+ {
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SHA_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SHA_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SHA_UNSET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SHA_UNSET;
+ }
+ return true;
+
case OPT_mpclmul:
if (value)
{
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 177677c8b15..e8b6d0a232e 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -367,9 +367,11 @@ i[34567]86-*-*)
immintrin.h x86intrin.h avxintrin.h xopintrin.h
ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
lzcntintrin.h bmiintrin.h bmi2intrin.h tbmintrin.h
- avx2intrin.h fmaintrin.h f16cintrin.h rtmintrin.h
- xtestintrin.h rdseedintrin.h prfchwintrin.h adxintrin.h
- fxsrintrin.h xsaveintrin.h xsaveoptintrin.h"
+ avx2intrin.h avx512fintrin.h fmaintrin.h f16cintrin.h
+ rtmintrin.h xtestintrin.h rdseedintrin.h prfchwintrin.h
+ adxintrin.h fxsrintrin.h xsaveintrin.h xsaveoptintrin.h
+ avx512cdintrin.h avx512erintrin.h avx512pfintrin.h
+ shaintrin.h"
;;
x86_64-*-*)
cpu_type=i386
@@ -382,9 +384,11 @@ x86_64-*-*)
immintrin.h x86intrin.h avxintrin.h xopintrin.h
ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
lzcntintrin.h bmiintrin.h tbmintrin.h bmi2intrin.h
- avx2intrin.h fmaintrin.h f16cintrin.h rtmintrin.h
- xtestintrin.h rdseedintrin.h prfchwintrin.h adxintrin.h
- fxsrintrin.h xsaveintrin.h xsaveoptintrin.h"
+ avx2intrin.h avx512fintrin.h fmaintrin.h f16cintrin.h
+ rtmintrin.h xtestintrin.h rdseedintrin.h prfchwintrin.h
+ adxintrin.h fxsrintrin.h xsaveintrin.h xsaveoptintrin.h
+ avx512cdintrin.h avx512erintrin.h avx512pfintrin.h
+ shaintrin.h"
need_64bit_hwint=yes
;;
ia64-*-*)
diff --git a/gcc/config/i386/avx512cdintrin.h b/gcc/config/i386/avx512cdintrin.h
new file mode 100644
index 00000000000..42503281bd4
--- /dev/null
+++ b/gcc/config/i386/avx512cdintrin.h
@@ -0,0 +1,185 @@
+/* Copyright (C) 2013
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512cdintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512CDINTRIN_H_INCLUDED
+#define _AVX512CDINTRIN_H_INCLUDED
+
+#ifndef __AVX512CD__
+#pragma GCC push_options
+#pragma GCC target("avx512cd")
+#define __DISABLE_AVX512CD__
+#endif /* __AVX512CD__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef long long __v8di __attribute__ ((__vector_size__ (64)));
+typedef int __v16si __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+
+typedef unsigned char __mmask8;
+typedef unsigned short __mmask16;
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_conflict_epi32 (__m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
+ (__v16si) _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
+ (__v16si) _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_conflict_epi64 (__m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
+ (__v8di) _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
+ (__v8di) _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_lzcnt_epi64 (__m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
+ (__v8di) _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
+ (__v8di) _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_lzcnt_epi32 (__m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
+ (__v16si) _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A)
+{
+ return (__m512i)
+ __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
+ (__v16si) _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastmb_epi64 (__mmask8 __A)
+{
+ return (__m512i) __builtin_ia32_broadcastmb512 (__A);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastmw_epi32 (__mmask16 __A)
+{
+ return (__m512i) __builtin_ia32_broadcastmw512 (__A);
+}
+
+#ifdef __DISABLE_AVX512CD__
+#undef __DISABLE_AVX512CD__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512CD__ */
+
+#endif /* _AVX512CDINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512erintrin.h b/gcc/config/i386/avx512erintrin.h
new file mode 100644
index 00000000000..1722f1eac46
--- /dev/null
+++ b/gcc/config/i386/avx512erintrin.h
@@ -0,0 +1,352 @@
+/* Copyright (C) 2013
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512ERINTRIN_H_INCLUDED
+#define _AVX512ERINTRIN_H_INCLUDED
+
+#ifndef __AVX512ER__
+#pragma GCC push_options
+#pragma GCC target("avx512er")
+#define __DISABLE_AVX512ER__
+#endif /* __AVX512ER__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef double __v8df __attribute__ ((__vector_size__ (64)));
+typedef float __v16sf __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef float __m512 __attribute__ ((__vector_size__ (64),
+ __may_alias__));
+typedef double __m512d __attribute__ ((__vector_size__ (64),
+ __may_alias__));
+
+typedef unsigned char __mmask8;
+typedef unsigned short __mmask16;
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_exp2a23_round_pd (__m512d __A, int __R)
+{
+ __m512d __W;
+ return (__m512d)__builtin_ia32_exp2pd_mask ((__v8df)__A,
+ (__v8df)__W,
+ (__mmask8)-1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
+{
+ return (__m512d)__builtin_ia32_exp2pd_mask ((__v8df)__A,
+ (__v8df)__W,
+ (__mmask8)__U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
+{
+ return (__m512d)__builtin_ia32_exp2pd_mask ((__v8df)__A,
+ (__v8df)_mm512_setzero_pd(),
+ (__mmask8)__U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_exp2a23_round_ps (__m512 __A, int __R)
+{
+ __m512 __W;
+ return (__m512)__builtin_ia32_exp2ps_mask ((__v16sf)__A,
+ (__v16sf)__W,
+ (__mmask16)-1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
+{
+ return (__m512)__builtin_ia32_exp2ps_mask ((__v16sf)__A,
+ (__v16sf)__W,
+ (__mmask16)__U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
+{
+ return (__m512)__builtin_ia32_exp2ps_mask ((__v16sf)__A,
+ (__v16sf)_mm512_setzero_ps(),
+ (__mmask16)__U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp28_round_pd (__m512d __A, int __R)
+{
+ __m512d __W;
+ return (__m512d)__builtin_ia32_rcp28pd_mask ((__v8df)__A,
+ (__v8df)__W,
+ (__mmask8)-1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
+{
+ return (__m512d)__builtin_ia32_rcp28pd_mask ((__v8df)__A,
+ (__v8df)__W,
+ (__mmask8)__U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
+{
+ return (__m512d)__builtin_ia32_rcp28pd_mask ((__v8df)__A,
+ (__v8df)_mm512_setzero_pd(),
+ (__mmask8)__U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp28_round_ps (__m512 __A, int __R)
+{
+ __m512 __W;
+ return (__m512)__builtin_ia32_rcp28ps_mask ((__v16sf)__A,
+ (__v16sf)__W,
+ (__mmask16)-1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
+{
+ return (__m512)__builtin_ia32_rcp28ps_mask ((__v16sf)__A,
+ (__v16sf)__W,
+ (__mmask16)__U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
+{
+ return (__m512)__builtin_ia32_rcp28ps_mask ((__v16sf)__A,
+ (__v16sf)_mm512_setzero_ps(),
+ (__mmask16)__U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt28_round_pd (__m512d __A, int __R)
+{
+ __m512d __W;
+ return (__m512d)__builtin_ia32_rsqrt28pd_mask ((__v8df)__A,
+ (__v8df)__W,
+ (__mmask8)-1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
+{
+ return (__m512d)__builtin_ia32_rsqrt28pd_mask ((__v8df)__A,
+ (__v8df)__W,
+ (__mmask8)__U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
+{
+ return (__m512d)__builtin_ia32_rsqrt28pd_mask ((__v8df)__A,
+ (__v8df)_mm512_setzero_pd(),
+ (__mmask8)__U,
+ __R);
+}
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt28_round_ps (__m512 __A, int __R)
+{
+ __m512 __W;
+ return (__m512)__builtin_ia32_rsqrt28ps_mask ((__v16sf)__A,
+ (__v16sf)__W,
+ (__mmask16)-1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
+{
+ return (__m512)__builtin_ia32_rsqrt28ps_mask ((__v16sf)__A,
+ (__v16sf)__W,
+ (__mmask16)__U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
+{
+ return (__m512)__builtin_ia32_rsqrt28ps_mask ((__v16sf)__A,
+ (__v16sf)_mm512_setzero_ps(),
+ (__mmask16)__U,
+ __R);
+}
+#else
+#define _mm512_exp2a23_round_pd(A, C) \
+ __builtin_ia32_exp2pd_mask((A), (__v8df)_mm512_setzero_pd(), -1, (C))
+
+#define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
+ __builtin_ia32_exp2pd_mask((A), (W), (U), (C))
+
+#define _mm512_maskz_exp2a23_round_pd(U, A, C) \
+ __builtin_ia32_exp2pd_mask((A), (__v8df)_mm512_setzero_pd(), (U), (C))
+
+#define _mm512_exp2a23_round_ps(A, C) \
+ __builtin_ia32_exp2ps_mask((A), (__v16sf)_mm512_setzero_ps(), -1, (C))
+
+#define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
+ __builtin_ia32_exp2ps_mask((A), (W), (U), (C))
+
+#define _mm512_maskz_exp2a23_round_ps(U, A, C) \
+ __builtin_ia32_exp2ps_mask((A), (__v16sf)_mm512_setzero_ps(), (U), (C))
+
+#define _mm512_rcp28_round_pd(A, C) \
+ __builtin_ia32_rcp28pd_mask((A), (__v8df)_mm512_setzero_pd(), -1, (C))
+
+#define _mm512_mask_rcp28_round_pd(W, U, A, C) \
+ __builtin_ia32_rcp28pd_mask((A), (W), (U), (C))
+
+#define _mm512_maskz_rcp28_round_pd(U, A, C) \
+ __builtin_ia32_rcp28pd_mask((A), (__v8df)_mm512_setzero_pd(), (U), (C))
+
+#define _mm512_rcp28_round_ps(A, C) \
+ __builtin_ia32_rcp28ps_mask((A), (__v16sf)_mm512_setzero_ps(), -1, (C))
+
+#define _mm512_mask_rcp28_round_ps(W, U, A, C) \
+ __builtin_ia32_rcp28ps_mask((A), (W), (U), (C))
+
+#define _mm512_maskz_rcp28_round_ps(U, A, C) \
+ __builtin_ia32_rcp28ps_mask((A), (__v16sf)_mm512_setzero_ps(), (U), (C))
+
+#define _mm512_rsqrt28_round_pd(A, C) \
+ __builtin_ia32_rsqrt28pd_mask((A), (__v8df)_mm512_setzero_pd(), -1, (C))
+
+#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
+ __builtin_ia32_rsqrt28pd_mask((A), (W), (U), (C))
+
+#define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
+ __builtin_ia32_rsqrt28pd_mask((A), (__v8df)_mm512_setzero_pd(), (U), (C))
+
+#define _mm512_rsqrt28_round_ps(A, C) \
+ __builtin_ia32_rsqrt28ps_mask((A), (__v16sf)_mm512_setzero_ps(), -1, (C))
+
+#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
+ __builtin_ia32_rsqrt28ps_mask((A), (W), (U), (C))
+
+#define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
+ __builtin_ia32_rsqrt28ps_mask((A), (__v16sf)_mm512_setzero_ps(), (U), (C))
+#endif
+
+#define _mm512_exp2a23_pd(A) \
+ _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_pd(W, U, A) \
+ _mm512_mask_exp2a23_round_pd((W), (U), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_pd(U, A) \
+ _mm512_maskz_exp2a23_round_pd((U), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_exp2a23_ps(A) \
+ _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_ps(W, U, A) \
+ _mm512_mask_exp2a23_round_ps((W), (U), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_ps(U, A) \
+ _mm512_maskz_exp2a23_round_ps((U), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rcp28_pd(A) \
+ _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rcp28_pd(W, U, A) \
+ _mm512_mask_rcp28_round_pd((W), (U), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rcp28_pd(U, A) \
+ _mm512_maskz_rcp28_round_pd((U), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rcp28_ps(A) \
+ _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rcp28_ps(W, U, A) \
+ _mm512_mask_rcp28_round_ps((W), (U), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rcp28_ps(U, A) \
+ _mm512_maskz_rcp28_round_ps((U), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rsqrt28_pd(A) \
+ _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_pd(W, U, A) \
+ _mm512_mask_rsqrt28_round_pd((W), (U), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_pd(U, A) \
+ _mm512_maskz_rsqrt28_round_pd((U), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rsqrt28_ps(A) \
+ _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_ps(W, U, A) \
+ _mm512_mask_rsqrt28_round_ps((W), (U), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_ps(U, A) \
+ _mm512_maskz_rsqrt28_round_ps((U), (A), _MM_FROUND_CUR_DIRECTION)
+
+#ifdef __DISABLE_AVX512ER__
+#undef __DISABLE_AVX512ER__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512ER__ */
+
+#endif /* _AVX512ERINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
new file mode 100644
index 00000000000..be681ce64da
--- /dev/null
+++ b/gcc/config/i386/avx512fintrin.h
@@ -0,0 +1,15334 @@
+/* Copyright (C) 2013
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512FINTRIN_H_INCLUDED
+#define _AVX512FINTRIN_H_INCLUDED
+
+#ifndef __AVX512F__
+#pragma GCC push_options
+#pragma GCC target("avx512f")
+#define __DISABLE_AVX512F__
+#endif /* __AVX512F__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef double __v8df __attribute__ ((__vector_size__ (64)));
+typedef float __v16sf __attribute__ ((__vector_size__ (64)));
+typedef long long __v8di __attribute__ ((__vector_size__ (64)));
+typedef int __v16si __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef float __m512 __attribute__ ((__vector_size__ (64),
+ __may_alias__));
+typedef long long __m512i __attribute__ ((__vector_size__ (64),
+ __may_alias__));
+typedef double __m512d __attribute__ ((__vector_size__ (64),
+ __may_alias__));
+
+typedef unsigned char __mmask8;
+typedef unsigned short __mmask16;
+
+/* Rounding mode macros. */
+#define _MM_FROUND_TO_NEAREST_INT 0x00
+#define _MM_FROUND_TO_NEG_INF 0x01
+#define _MM_FROUND_TO_POS_INF 0x02
+#define _MM_FROUND_TO_ZERO 0x03
+#define _MM_FROUND_CUR_DIRECTION 0x04
+#define _MM_FROUND_NO_EXC 0x05
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set_epi64 (long long __A, long long __B, long long __C,
+ long long __D, long long __E, long long __F,
+ long long __G, long long __H)
+{
+ return __extension__ (__m512i) (__v8di)
+ {
+ __H, __G, __F, __E, __D, __C, __B, __A};
+}
+
+/* Create the vector [A B C D E F G H I J K L M N O P]. */
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set_epi32 (int __A, int __B, int __C, int __D,
+ int __E, int __F, int __G, int __H,
+ int __I, int __J, int __K, int __L,
+ int __M, int __N, int __O, int __P)
+{
+ return __extension__ (__m512i)(__v16si){ __P, __O, __N, __M,
+ __L, __K, __J, __I,
+ __H, __G, __F, __E,
+ __D, __C, __B, __A };
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set_pd (double __A, double __B, double __C, double __D,
+ double __E, double __F, double __G, double __H)
+{
+ return __extension__ (__m512d){ __H, __G, __F, __E,
+ __D, __C, __B, __A };
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set_ps (float __A, float __B, float __C, float __D,
+ float __E, float __F, float __G, float __H,
+ float __I, float __J, float __K, float __L,
+ float __M, float __N, float __O, float __P)
+{
+ return __extension__ (__m512)
+ {
+ __P, __O, __N, __M,
+ __L, __K, __J, __I, __H, __G, __F, __E, __D, __C, __B, __A};
+}
+
+#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
+ _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
+
+#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10, \
+ e11,e12,e13,e14,e15) \
+ _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9), \
+ (e8),(e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
+
+#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
+ _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
+
+#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11, \
+ e12,e13,e14,e15) \
+ _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8), \
+ (e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_ps (void)
+{
+ return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_pd (void)
+{
+ return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_si512 (void)
+{
+ return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_pd (void const *__P)
+{
+ return *(__m512d *) __P;
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_pd (void *__P, __m512d __A)
+{
+ *(__m512d *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
+{
+ __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_ps (void const *__P)
+{
+ return *(__m512 *) __P;
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_ps (void *__P, __m512 __A)
+{
+ *(__m512 *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
+{
+ __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_epi64 (void const *__P)
+{
+ return *(__m512i *) __P;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *)__P,
+ (__v8di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *)__P,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_epi64 (void *__P, __m512i __A)
+{
+ *(__m512i *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
+{
+ __builtin_ia32_movdqa64store512_mask ((__v8di *)__P, (__v8di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_si512 (void const *__P)
+{
+ return *(__m512i *) __P;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_epi32 (void const *__P)
+{
+ return *(__m512i *) __P;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *)__P,
+ (__v16si) __W,
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *)__P,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16)
+ __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_si512 (void *__P, __m512i __A)
+{
+ *(__m512i *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_epi32 (void *__P, __m512i __A)
+{
+ *(__m512i *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
+{
+ __builtin_ia32_movdqa32store512_mask ((__v16si *)__P, (__v16si) __A,
+ (__mmask16) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double *__P)
+{
+ return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_sd (__mmask8 __U, const double *__P)
+{
+ return (__m128d) __builtin_ia32_loadsd_mask (__P,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A)
+{
+ __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float *__P)
+{
+ return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_ss (__mmask8 __U, const float *__P)
+{
+ return (__m128) __builtin_ia32_loadss_mask (__P,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_ss (float *__P, __mmask8 __U, __m128 __A)
+{
+ __builtin_ia32_storess_mask (__P, (__v4sf) __A, (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mullo_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X,
+ __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srav_epi32 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X,
+ __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X,
+ __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X,
+ __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srav_epi64 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X,
+ __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X,
+ __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_epi32 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X,
+ __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_epu32 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X,
+ __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_slli_epi64 (__m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+#else
+#define _mm512_slli_epi64(X, C) \
+ ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask8)-1))
+
+#define _mm512_mask_slli_epi64(W, U, X, C) \
+ ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_slli_epi64(U, X, C) \
+ ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask8)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sll_epi64 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srli_epi64 (__m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
+ __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+#else
+#define _mm512_srli_epi64(X, C) \
+ ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask8)-1))
+
+#define _mm512_mask_srli_epi64(W, U, X, C) \
+ ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_srli_epi64(U, X, C) \
+ ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask8)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srl_epi64 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srai_epi64 (__m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+#else
+#define _mm512_srai_epi64(X, C) \
+ ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask8)-1))
+
+#define _mm512_mask_srai_epi64(W, U, X, C) \
+ ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_srai_epi64(U, X, C) \
+ ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
+ (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask8)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sra_epi64 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
+ (__v2di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_slli_epi32 (__m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+#else
+#define _mm512_slli_epi32(X, C) \
+ ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)-1))
+
+#define _mm512_mask_slli_epi32(W, U, X, C) \
+ ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_slli_epi32(U, X, C) \
+ ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sll_epi32 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m128i __B)
+{
+ return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srli_epi32 (__m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+#else
+#define _mm512_srli_epi32(X, C) \
+ ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)-1))
+
+#define _mm512_mask_srli_epi32(W, U, X, C) \
+ ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_srli_epi32(U, X, C) \
+ ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srl_epi32 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srai_epi32 (__m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
+{
+ return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+#else
+#define _mm512_srai_epi32(X, C) \
+ ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)-1))
+
+#define _mm512_mask_srai_epi32(W, U, X, C) \
+ ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_srai_epi32(U, X, C) \
+ ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sra_epi32 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
+ (__v4si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_addsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_addsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_addsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_addss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_addss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_addss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_subsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_subsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_subsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_subss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_subss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_subss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+#else
+#define _mm_add_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_addsd_mask((A), (B), (__v2df)_mm_setzero_pd(), -1, (C))
+
+#define _mm_mask_add_round_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_addsd_mask((A), (B), (W), (U), (C))
+
+#define _mm_maskz_add_round_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_addsd_mask((A), (B), (__v2df)_mm_setzero_pd(), (U), (C))
+
+#define _mm_add_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_addss_mask((A), (B), (__v4sf)_mm_setzero_ps(), -1, (C))
+
+#define _mm_mask_add_round_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_addss_mask((A), (B), (W), (U), (C))
+
+#define _mm_maskz_add_round_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_addss_mask((A), (B), (__v4sf)_mm_setzero_ps(), (U), (C))
+
+#define _mm_sub_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_subsd_mask((A), (B), (__v2df)_mm_setzero_pd(), -1, (C))
+
+#define _mm_mask_sub_round_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_subsd_mask((A), (B), (W), (U), (C))
+
+#define _mm_maskz_sub_round_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_subsd_mask((A), (B), (__v2df)_mm_setzero_pd(), (U), (C))
+
+#define _mm_sub_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_subss_mask((A), (B), (__v4sf)_mm_setzero_ps(), -1, (C))
+
+#define _mm_mask_sub_round_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_subss_mask((A), (B), (W), (U), (C))
+
+#define _mm_maskz_sub_round_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_subss_mask((A), (B), (__v4sf)_mm_setzero_ps(), (U), (C))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
+ const int imm)
+{
+ return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __C, imm,
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U,
+ __m512i __B, __m512i __C,
+ const int imm)
+{
+ return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __C, imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A,
+ __m512i __B, __m512i __C,
+ const int imm)
+{
+ return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __C,
+ imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
+ const int imm)
+{
+ return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __C,
+ imm,
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U,
+ __m512i __B, __m512i __C,
+ const int imm)
+{
+ return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __C,
+ imm,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A,
+ __m512i __B, __m512i __C,
+ const int imm)
+{
+ return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __C,
+ imm,
+ (__mmask16) __U);
+}
+#else
+#define _mm512_ternarylogic_epi64(A, B, C, I) \
+ ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
+#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
+ ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
+#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
+ ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
+#define _mm512_ternarylogic_epi32(A, B, C, I) \
+ ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
+ (__mmask16)-1))
+#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
+ ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
+ (__mmask16)(U)))
+#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
+ ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
+ (__mmask16)(U)))
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp14_pd (__m512d __A)
+{
+ return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp14_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp14_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp14_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt14_pd (__m512d __A)
+{
+ return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt14_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt14_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt14_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_round_pd (__m512d __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_round_ps (__m512 __A, const int __R)
+{
+ return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_sqrtsd_mask ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_sqrtsd_mask ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_sqrtsd_mask ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_sqrtss_mask ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_sqrtss_mask ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_sqrtss_mask ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+#else
+#define _mm512_sqrt_round_pd(A, C) \
+ (__m512d)__builtin_ia32_sqrtpd512_mask((A), (__v8df)_mm512_setzero_pd(), -1, (C))
+
+#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
+ (__m512d)__builtin_ia32_sqrtpd512_mask((A), (W), (U), (C))
+
+#define _mm512_maskz_sqrt_round_pd(U, A, C) \
+ (__m512d)__builtin_ia32_sqrtpd512_mask((A), (__v8df)_mm512_setzero_pd(), (U), (C))
+
+#define _mm512_sqrt_round_ps(A, C) \
+ (__m512)__builtin_ia32_sqrtps512_mask((A), (__v16sf)_mm512_setzero_ps(), -1, (C))
+
+#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
+ (__m512)__builtin_ia32_sqrtps512_mask((A), (W), (U), (C))
+
+#define _mm512_maskz_sqrt_round_ps(U, A, C) \
+ (__m512)__builtin_ia32_sqrtps512_mask((A), (__v16sf)_mm512_setzero_ps(), (U), (C))
+
+#define _mm_sqrt_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_sqrtsd_mask((A), (B), (__v2df)_mm_setzero_pd(), -1, (C))
+
+#define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_sqrtsd_mask((A), (B), (W), (U), (C))
+
+#define _mm_maskz_sqrt_round_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_sqrtsd_mask((A), (B), (__v2df)_mm_setzero_pd(), (U), (C))
+
+#define _mm_sqrt_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_sqrtss_mask((A), (B), (__v4sf)_mm_setzero_ps(), -1, (C))
+
+#define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_sqrtss_mask((A), (B), (W), (U), (C))
+
+#define _mm_maskz_sqrt_round_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_sqrtss_mask((A), (B), (__v4sf)_mm_setzero_ps(), (U), (C))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi8_epi32 (__m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi8_epi64 (__m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi16_epi32 (__m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi16_epi64 (__m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_epi64 (__m256i __X)
+{
+ return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
+{
+ return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
+{
+ return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu8_epi32 (__m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu8_epi64 (__m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu16_epi32 (__m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu16_epi64 (__m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu32_epi64 (__m256i __X)
+{
+ return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
+{
+ return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
+{
+ return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+#else
+#define _mm512_add_round_pd(A, B, C) \
+ (__m512d)__builtin_ia32_addpd512_mask((A), (B), (__v8df)_mm512_setzero_pd(), -1, (C))
+
+#define _mm512_mask_add_round_pd(W, U, A, B, C) \
+ (__m512d)__builtin_ia32_addpd512_mask((A), (B), (W), (U), (C))
+
+#define _mm512_maskz_add_round_pd(U, A, B, C) \
+ (__m512d)__builtin_ia32_addpd512_mask((A), (B), (__v8df)_mm512_setzero_pd(), (U), (C))
+
+#define _mm512_add_round_ps(A, B, C) \
+ (__m512)__builtin_ia32_addps512_mask((A), (B), (__v16sf)_mm512_setzero_ps(), -1, (C))
+
+#define _mm512_mask_add_round_ps(W, U, A, B, C) \
+ (__m512)__builtin_ia32_addps512_mask((A), (B), (W), (U), (C))
+
+#define _mm512_maskz_add_round_ps(U, A, B, C) \
+ (__m512)__builtin_ia32_addps512_mask((A), (B), (__v16sf)_mm512_setzero_ps(), (U), (C))
+
+#define _mm512_sub_round_pd(A, B, C) \
+ (__m512d)__builtin_ia32_subpd512_mask((A), (B), (__v8df)_mm512_setzero_pd(), -1, (C))
+
+#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
+ (__m512d)__builtin_ia32_subpd512_mask((A), (B), (W), (U), (C))
+
+#define _mm512_maskz_sub_round_pd(U, A, B, C) \
+ (__m512d)__builtin_ia32_subpd512_mask((A), (B), (__v8df)_mm512_setzero_pd(), (U), (C))
+
+#define _mm512_sub_round_ps(A, B, C) \
+ (__m512)__builtin_ia32_subps512_mask((A), (B), (__v16sf)_mm512_setzero_ps(), -1, (C))
+
+#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
+ (__m512)__builtin_ia32_subps512_mask((A), (B), (W), (U), (C))
+
+#define _mm512_maskz_sub_round_ps(U, A, B, C) \
+ (__m512)__builtin_ia32_subps512_mask((A), (B), (__v16sf)_mm512_setzero_ps(), (U), (C))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
+{
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+ (__v8df) __V,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
+ __m512d __V, const int __R)
+{
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+ (__v8df) __V,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+ (__v8df) __V,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_mulsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_mulsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_mulsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_mulss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_mulss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_mulss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_divsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_divsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_divsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_divss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_divss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_divss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+#else
+#define _mm512_mul_round_pd(A, B, C) \
+ (__m512d)__builtin_ia32_mulpd512_mask((A), (B), (__v8df)_mm512_setzero_pd(), -1, (C))
+
+#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
+ (__m512d)__builtin_ia32_mulpd512_mask((A), (B), (W), (U), (C))
+
+#define _mm512_maskz_mul_round_pd(U, A, B, C) \
+ (__m512d)__builtin_ia32_mulpd512_mask((A), (B), (__v8df)_mm512_setzero_pd(), (U), (C))
+
+#define _mm512_mul_round_ps(A, B, C) \
+ (__m512)__builtin_ia32_mulps512_mask((A), (B), (__v16sf)_mm512_setzero_ps(), -1, (C))
+
+#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
+ (__m512)__builtin_ia32_mulps512_mask((A), (B), (W), (U), (C))
+
+#define _mm512_maskz_mul_round_ps(U, A, B, C) \
+ (__m512)__builtin_ia32_mulps512_mask((A), (B), (__v16sf)_mm512_setzero_ps(), (U), (C))
+
+#define _mm512_div_round_pd(A, B, C) \
+ (__m512d)__builtin_ia32_divpd512_mask((A), (B), (__v8df)_mm512_setzero_pd(), -1, (C))
+
+#define _mm512_mask_div_round_pd(W, U, A, B, C) \
+ (__m512d)__builtin_ia32_divpd512_mask((A), (B), (W), (U), (C))
+
+#define _mm512_maskz_div_round_pd(U, A, B, C) \
+ (__m512d)__builtin_ia32_divpd512_mask((A), (B), (__v8df)_mm512_setzero_pd(), (U), (C))
+
+#define _mm512_div_round_ps(A, B, C) \
+ (__m512)__builtin_ia32_divps512_mask((A), (B), (__v16sf)_mm512_setzero_ps(), -1, (C))
+
+#define _mm512_mask_div_round_ps(W, U, A, B, C) \
+ (__m512)__builtin_ia32_divps512_mask((A), (B), (W), (U), (C))
+
+#define _mm512_maskz_div_round_ps(U, A, B, C) \
+ (__m512)__builtin_ia32_divps512_mask((A), (B), (__v16sf)_mm512_setzero_ps(), (U), (C))
+
+#define _mm_mul_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_mulsd_mask((A), (B), (__v2df)_mm_setzero_pd(), -1, (C))
+
+#define _mm_mask_mul_round_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_mulsd_mask((A), (B), (W), (U), (C))
+
+#define _mm_maskz_mul_round_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_mulsd_mask((A), (B), (__v2df)_mm_setzero_pd(), (U), (C))
+
+#define _mm_mul_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_mulss_mask((A), (B), (__v4sf)_mm_setzero_ps(), -1, (C))
+
+#define _mm_mask_mul_round_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_mulss_mask((A), (B), (W), (U), (C))
+
+#define _mm_maskz_mul_round_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_mulss_mask((A), (B), (__v4sf)_mm_setzero_ps(), (U), (C))
+
+#define _mm_div_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_divsd_mask((A), (B), (__v2df)_mm_setzero_pd(), -1, (C))
+
+#define _mm_mask_div_round_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_divsd_mask((A), (B), (W), (U), (C))
+
+#define _mm_maskz_div_round_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_divsd_mask((A), (B), (__v2df)_mm_setzero_pd(), (U), (C))
+
+#define _mm_div_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_divss_mask((A), (B), (__v4sf)_mm_setzero_ps(), -1, (C))
+
+#define _mm_mask_div_round_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_divss_mask((A), (B), (W), (U), (C))
+
+#define _mm_maskz_div_round_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_divss_mask((A), (B), (__v4sf)_mm_setzero_ps(), (U), (C))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+#else
+#define _mm512_max_round_pd(A, B, R) \
+ (__m512d)__builtin_ia32_maxpd512_mask((A), (B), (__v8df)_mm512_setzero_pd(), -1, (R))
+
+#define _mm512_mask_max_round_pd(W, U, A, B, R) \
+ (__m512d)__builtin_ia32_maxpd512_mask((A), (B), (W), (U), (R))
+
+#define _mm512_maskz_max_round_pd(U, A, B, R) \
+ (__m512d)__builtin_ia32_maxpd512_mask((A), (B), (__v8df)_mm512_setzero_pd(), (U), (R))
+
+#define _mm512_max_round_ps(A, B, R) \
+ (__m512)__builtin_ia32_maxps512_mask((A), (B), (__v16sf)_mm512_setzero_pd(), -1, (R))
+
+#define _mm512_mask_max_round_ps(W, U, A, B, R) \
+ (__m512)__builtin_ia32_maxps512_mask((A), (B), (W), (U), (R))
+
+#define _mm512_maskz_max_round_ps(U, A, B, R) \
+ (__m512)__builtin_ia32_maxps512_mask((A), (B), (__v16sf)_mm512_setzero_ps(), (U), (R))
+
+#define _mm512_min_round_pd(A, B, R) \
+ (__m512d)__builtin_ia32_minpd512_mask((A), (B), (__v8df)_mm512_setzero_pd(), -1, (R))
+
+#define _mm512_mask_min_round_pd(W, U, A, B, R) \
+ (__m512d)__builtin_ia32_minpd512_mask((A), (B), (W), (U), (R))
+
+#define _mm512_maskz_min_round_pd(U, A, B, R) \
+ (__m512d)__builtin_ia32_minpd512_mask((A), (B), (__v8df)_mm512_setzero_pd(), (U), (R))
+
+#define _mm512_min_round_ps(A, B, R) \
+ (__m512)__builtin_ia32_minps512_mask((A), (B), (__v16sf)_mm512_setzero_ps(), -1, (R))
+
+#define _mm512_mask_min_round_ps(W, U, A, B, R) \
+ (__m512)__builtin_ia32_minps512_mask((A), (B), (W), (U), (R))
+
+#define _mm512_maskz_min_round_ps(U, A, B, R) \
+ (__m512)__builtin_ia32_minps512_mask((A), (B), (__v16sf)_mm512_setzero_ps(), (U), (R))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_scalefsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_scalefsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_scalefsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_scalefss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_scalefss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_scalefss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+#else
+#define _mm512_scalef_round_pd(A, B, C) \
+ (__m512d)__builtin_ia32_scalefpd512_mask((A), (B), (__v8df)_mm512_setzero_pd(), -1, (C))
+
+#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
+ (__m512d)__builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C))
+
+#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
+ (__m512d)__builtin_ia32_scalefpd512_mask((A), (B), (__v8df)_mm512_setzero_pd(), (U), (C))
+
+#define _mm512_scalef_round_ps(A, B, C) \
+ (__m512)__builtin_ia32_scalefps512_mask((A), (B), (__v16sf)_mm512_setzero_ps(), -1, (C))
+
+#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
+ (__m512)__builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C))
+
+#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
+ (__m512)__builtin_ia32_scalefps512_mask((A), (B), (__v16sf)_mm512_setzero_ps(), (U), (C))
+
+#define _mm_scalef_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_scalefsd_mask((A), (B), (__v2df)_mm_setzero_pd(), -1, (C))
+
+#define _mm_mask_scalef_round_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_scalefsd_mask((A), (B), (W), (U), (C))
+
+#define _mm_maskz_scalef_round_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_scalefsd_mask((A), (B), (__v2df)_mm_setzero_pd(), (U), (C))
+
+#define _mm_scalef_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_scalefss_mask((A), (B), (__v4sf)_mm_setzero_ps(), -1, (C))
+
+#define _mm_mask_scalef_round_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_scalefss_mask((A), (B), (W), (U), (C))
+
+#define _mm_maskz_scalef_round_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_scalefss_mask((A), (B), (__v4sf)_mm_setzero_ps(), (U), (C))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U, const int __R)
+{
+ return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C, const int __R)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ __R);
+}
+#else
+#define _mm512_fmadd_round_pd(A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask((A), (B), (C), -1, (R))
+
+#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask((A), (B), (C), (U), (R))
+
+#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask3((A), (B), (C), (U), (R))
+
+#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_maskz((A), (B), (C), (U), (R))
+
+#define _mm512_fmadd_round_ps(A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask((A), (B), (C), -1, (R))
+
+#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask((A), (B), (C), (U), (R))
+
+#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask3((A), (B), (C), (U), (R))
+
+#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_maskz((A), (B), (C), (U), (R))
+
+#define _mm512_fmsub_round_pd(A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask((A), (B), -(C), -1, (R))
+
+#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask((A), (B), -(C), (U), (R))
+
+#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
+ (__m512d)__builtin_ia32_vfmsubpd512_mask3((A), (B), (C), (U), (R))
+
+#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_maskz((A), (B), -(C), (U), (R))
+
+#define _mm512_fmsub_round_ps(A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask((A), (B), -(C), -1, (R))
+
+#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask((A), (B), -(C), (U), (R))
+
+#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
+ (__m512)__builtin_ia32_vfmsubps512_mask3((A), (B), (C), (U), (R))
+
+#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_maskz((A), (B), -(C), (U), (R))
+
+#define _mm512_fmaddsub_round_pd(A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddsubpd512_mask((A), (B), (C), -1, (R))
+
+#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask((A), (B), (C), (U), (R))
+
+#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
+ (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((A), (B), (C), (U), (R))
+
+#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((A), (B), (C), (U), (R))
+
+#define _mm512_fmaddsub_round_ps(A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddsubps512_mask((A), (B), (C), -1, (R))
+
+#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddsubps512_mask((A), (B), (C), (U), (R))
+
+#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
+ (__m512)__builtin_ia32_vfmaddsubps512_mask3((A), (B), (C), (U), (R))
+
+#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddsubps512_maskz((A), (B), (C), (U), (R))
+
+#define _mm512_fmsubadd_round_pd(A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddsubpd512_mask((A), (B), -(C), -1, (R))
+
+#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddsubpd512_mask((A), (B), -(C), (U), (R))
+
+#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
+ (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((A), (B), (C), (U), (R))
+
+#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((A), (B), -(C), (U), (R))
+
+#define _mm512_fmsubadd_round_ps(A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddsubps512_mask((A), (B), -(C), -1, (R))
+
+#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddsubps512_mask((A), (B), -(C), (U), (R))
+
+#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
+ (__m512)__builtin_ia32_vfmsubaddps512_mask3((A), (B), (C), (U), (R))
+
+#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddsubps512_maskz((A), (B), -(C), (U), (R))
+
+#define _mm512_fnmadd_round_pd(A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), (B), (C), -1, (R))
+
+#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
+ (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), (B), (C), (U), (R))
+
+#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), (B), (C), (U), (R))
+
+#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), (B), (C), (U), (R))
+
+#define _mm512_fnmadd_round_ps(A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask(-(A), (B), (C), -1, (R))
+
+#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
+ (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), (B), (C), (U), (R))
+
+#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), (B), (C), (U), (R))
+
+#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), (B), (C), (U), (R))
+
+#define _mm512_fnmsub_round_pd(A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), (B), -(C), -1, (R))
+
+#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
+ (__m512d)__builtin_ia32_vfnmsubpd512_mask((A), (B), (C), (U), (R))
+
+#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
+ (__m512d)__builtin_ia32_vfnmsubpd512_mask3((A), (B), (C), (U), (R))
+
+#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
+ (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), (B), -(C), (U), (R))
+
+#define _mm512_fnmsub_round_ps(A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_mask(-(A), (B), -(C), -1, (R))
+
+#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
+ (__m512)__builtin_ia32_vfnmsubps512_mask((A), (B), (C), (U), (R))
+
+#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
+ (__m512)__builtin_ia32_vfnmsubps512_mask3((A), (B), (C), (U), (R))
+
+#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
+ (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), (B), -(C), (U), (R))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_abs_epi64 (__m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_abs_epi32 (__m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastss_ps (__m128 __A)
+{
+ __v16sf __O;
+ return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
+{
+ return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
+ (__v16sf) __O, __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
+{
+ return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastsd_pd (__m128d __A)
+{
+ __v8df __O;
+ return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
+{
+ return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
+ (__v8df) __O, __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
+{
+ return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastd_epi32 (__m128i __A)
+{
+ __v16si __O;
+ return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
+ (__v16si) __O, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_epi32 (int __A)
+{
+ __v16si __O;
+ return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, __O,
+ (__mmask16)(-1));
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastq_epi64 (__m128i __A)
+{
+ __v8di __O;
+ return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
+ (__v8di) __O, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_epi64 (long long __A)
+{
+ __v8di __O;
+#ifdef TARGET_64BIT
+ return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, __O,
+ (__mmask8)(-1));
+#else
+ return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, __O,
+ (__mmask8)(-1));
+#endif
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
+{
+#ifdef TARGET_64BIT
+ return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
+ __M);
+#else
+ return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O,
+ __M);
+#endif
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
+{
+#ifdef TARGET_64BIT
+ return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+#else
+ return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+#endif
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_f32x4 (__m128 __A)
+{
+ __v16sf __O;
+ return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
+{
+ return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
+ (__v16sf) __O,
+ __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
+{
+ return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_i32x4 (__m128i __A)
+{
+ __v16si __O;
+ return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
+ __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
+ (__v16si) __O,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_f64x4 (__m256d __A)
+{
+ __v8df __O;
+ return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
+ __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
+{
+ return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
+ (__v8df) __O,
+ __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
+{
+ return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_i64x4 (__m256i __A)
+{
+ __v8di __O;
+ return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
+ __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
+ (__v8di) __O,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+typedef enum
+{
+ _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
+ _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
+ _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
+ _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
+ _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
+ _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
+ _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
+ _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
+ _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
+ _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
+ _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
+ _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
+ _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
+ _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
+ _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
+ _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
+ _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
+ _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
+ _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
+ _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
+ _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
+ _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
+ _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
+ _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
+ _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
+ _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
+ _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
+ _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
+ _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
+ _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
+ _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
+ _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
+ _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
+ _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
+ _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
+ _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
+ _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
+ _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
+ _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
+ _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
+ _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
+ _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
+ _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
+ _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
+ _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
+ _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
+ _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
+ _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
+ _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
+ _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
+ _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
+ _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
+ _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
+ _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
+ _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
+ _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
+ _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
+ _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
+ _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
+ _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
+ _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
+ _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
+ _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
+ _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
+ _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
+ _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
+ _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
+ _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
+ _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
+ _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
+ _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
+ _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
+ _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
+ _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
+ _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
+ _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
+ _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
+ _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
+ _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
+ _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
+ _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
+ _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
+ _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
+ _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
+ _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
+ _MM_PERM_DDDD = 0xFF
+} _MM_PERM_ENUM;
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
+{
+ return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
+ __mask,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ _MM_PERM_ENUM __mask)
+{
+ return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
+ __mask,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A,
+ _MM_PERM_ENUM __mask)
+{
+ return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
+ __mask,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
+ (__v8di) __B, __imm,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
+ (__v8di) __B, __imm,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
+ (__v8di) __B, __imm,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
+ (__v16si) __B,
+ __imm,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
+ (__v16si) __B,
+ __imm,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
+ (__v16si) __B,
+ __imm,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
+{
+ return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
+ (__v8df) __B, __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __imm)
+{
+ return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
+ (__v8df) __B, __imm,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __imm)
+{
+ return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
+ (__v8df) __B, __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
+{
+ return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
+ (__v16sf) __B, __imm,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __imm)
+{
+ return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
+ (__v16sf) __B, __imm,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __imm)
+{
+ return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
+ (__v16sf) __B, __imm,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+#else
+#define _mm512_shuffle_epi32(X, C) \
+ ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)-1))
+
+#define _mm512_mask_shuffle_epi32(W, U, X, C) \
+ ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_shuffle_epi32(U, X, C) \
+ ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)(U)))
+
+#define _mm512_shuffle_i64x2(X, Y, C) \
+ ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(C),\
+ (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask8)-1))
+
+#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
+ ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(C),\
+ (__v8di)(__m512i)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
+ ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(C),\
+ (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask8)(U)))
+
+#define _mm512_shuffle_i32x4(X, Y, C) \
+ ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)-1))
+
+#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
+ ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(C),\
+ (__v16si)(__m512i)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
+ ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)(U)))
+
+#define _mm512_shuffle_f64x2(X, Y, C) \
+ ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(C),\
+ (__v8df)(__m512d)_mm512_setzero_pd(),\
+ (__mmask8)-1))
+
+#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
+ ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(C),\
+ (__v8df)(__m512d)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
+ ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(C),\
+ (__v8df)(__m512d)_mm512_setzero_pd(),\
+ (__mmask8)(U)))
+
+#define _mm512_shuffle_f32x4(X, Y, C) \
+ ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(C),\
+ (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__mmask16)-1))
+
+#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
+ ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(C),\
+ (__v16sf)(__m512)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
+ ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(C),\
+ (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__mmask16)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rolv_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rorv_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rolv_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rorv_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+#else
+#define _mm512_cvtt_roundpd_epi32(A, B) \
+ ((__m256i)__builtin_ia32_cvttpd2dq512_mask((A), (__v8si)_mm256_setzero_si256(), -1, (B)))
+
+#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
+ ((__m256i)__builtin_ia32_cvttpd2dq512_mask((A), (__v8si)(W), (U), (B)))
+
+#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
+ ((__m256i)__builtin_ia32_cvttpd2dq512_mask((A), (__v8si)_mm256_setzero_si256(), (U), (B)))
+
+#define _mm512_cvtt_roundpd_epu32(A, B) \
+ ((__m256i)__builtin_ia32_cvttpd2udq512_mask((A), (__v8si)_mm256_setzero_si256(), -1, (B)))
+
+#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
+ ((__m256i)__builtin_ia32_cvttpd2udq512_mask((A), (__v8si)(W), (U), (B)))
+
+#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
+ ((__m256i)__builtin_ia32_cvttpd2udq512_mask((A), (__v8si)_mm256_setzero_si256(), (U), (B)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
+{
+ return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
+{
+ return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+#else
+#define _mm512_cvt_roundpd_epi32(A, B) \
+ ((__m256i)__builtin_ia32_cvtpd2dq512_mask((A), (__v8si)_mm256_setzero_si256(), -1, (B)))
+
+#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
+ ((__m256i)__builtin_ia32_cvtpd2dq512_mask((A), (__v8si)(W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
+ ((__m256i)__builtin_ia32_cvtpd2dq512_mask((A), (__v8si)_mm256_setzero_si256(), (U), (B)))
+
+#define _mm512_cvt_roundpd_epu32(A, B) \
+ ((__m256i)__builtin_ia32_cvtpd2udq512_mask((A), (__v8si)_mm256_setzero_si256(), -1, (B)))
+
+#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
+ ((__m256i)__builtin_ia32_cvtpd2udq512_mask((A), (__v8si)(W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
+ ((__m256i)__builtin_ia32_cvtpd2udq512_mask((A), (__v8si)_mm256_setzero_si256(), (U), (B)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+#else
+#define _mm512_cvtt_roundps_epi32(A, B) \
+ ((__m512i)__builtin_ia32_cvttps2dq512_mask((A), (__v16si)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2dq512_mask((A), (__v16si)(W), (U), (B)))
+
+#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2dq512_mask((A), (__v16si)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvtt_roundps_epu32(A, B) \
+ ((__m512i)__builtin_ia32_cvttps2udq512_mask((A), (__v16si)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2udq512_mask((A), (__v16si)(W), (U), (B)))
+
+#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2udq512_mask((A), (__v16si)_mm512_setzero_si512 (), (U), (B)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+#else
+#define _mm512_cvt_roundps_epi32(A, B) \
+ ((__m512i)__builtin_ia32_cvtps2dq512_mask((A), (__v16si)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2dq512_mask((A), (__v16si)(W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2dq512_mask((A), (__v16si)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvt_roundps_epu32(A, B) \
+ ((__m512i)__builtin_ia32_cvtps2udq512_mask((A), (__v16si)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2udq512_mask((A), (__v16si)(W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2udq512_mask((A), (__v16si)_mm512_setzero_si512 (), (U), (B)))
+#endif
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu32_sd (__m128d __A, unsigned __B)
+{
+ return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
+}
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
+}
+#else
+#define _mm_cvt_roundu64_sd(A, B, C) \
+ (__m128d)__builtin_ia32_cvtusi2sd64((A), (B), (C))
+
+#define _mm_cvt_roundi64_sd(A, B, C) \
+ (__m128d)__builtin_ia32_cvtsi2sd64((A), (B), (C))
+
+#define _mm_cvt_roundsi64_sd(A, B, C) \
+ (__m128d)__builtin_ia32_cvtsi2sd64((A), (B), (C))
+#endif
+
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
+}
+#else
+#define _mm_cvt_roundu32_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtusi2ss32((A), (B), (C))
+
+#define _mm_cvt_roundi32_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsi2ss32((A), (B), (C))
+
+#define _mm_cvt_roundsi32_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsi2ss32((A), (B), (C))
+#endif
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
+}
+#else
+#define _mm_cvt_roundu64_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtusi2ss64((A), (B), (C))
+
+#define _mm_cvt_roundi64_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsi2ss64((A), (B), (C))
+
+#define _mm_cvt_roundsi64_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsi2ss64((A), (B), (C))
+#endif
+
+#endif
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_epi8 (__m512i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi32_epi8 (__m512i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi32_epi8 (__m512i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_epi16 (__m512i __A)
+{
+ __v16hi __O;
+ return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+ (__v16hi) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi32_epi16 (__m512i __A)
+{
+ __v16hi __O;
+ return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
+ (__v16hi) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi32_epi16 (__m512i __A)
+{
+ __v16hi __O;
+ return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
+ (__v16hi) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi64_epi32 (__m512i __A)
+{
+ __v8si __O;
+ return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+ (__v8si) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi64_epi32 (__m512i __A)
+{
+ __v8si __O;
+ return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
+ (__v8si) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi64_epi32 (__m512i __A)
+{
+ __v8si __O;
+ return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
+ (__v8si) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi64_epi16 (__m512i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi64_epi16 (__m512i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi64_epi16 (__m512i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi64_epi8 (__m512i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi64_epi8 (__m512i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi64_epi8 (__m512i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_pd (__m256i __A)
+{
+ return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
+{
+ return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
+{
+ return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu32_pd (__m256i __A)
+{
+ return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
+{
+ return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
+{
+ return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
+{
+ return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
+{
+ return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ __R);
+}
+
+#else
+#define _mm512_cvt_roundepi32_ps(A, B) \
+ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), -1, (B))
+
+#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
+ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (W), (U), (B))
+
+#define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
+ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), (U), (B))
+
+#define _mm512_cvt_roundepu32_ps(A, B) \
+ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), -1, (B))
+
+#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
+ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (W), (U), (B))
+
+#define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
+ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), (U), (B))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extractf64x4_pd (__m512d __A, const int __imm)
+{
+ return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
+ __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
+ const int __imm)
+{
+ return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
+ __imm,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A,
+ const int __imm)
+{
+ return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
+ __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extractf32x4_ps (__m512 __A, const int __imm)
+{
+ return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
+ __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
+ const int __imm)
+{
+ return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
+ __imm,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A,
+ const int __imm)
+{
+ return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
+ __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
+ __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
+ __imm,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
+ __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
+ __imm,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
+ __imm,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
+ __imm,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+#else
+
+#define _mm512_extractf64x4_pd(X, C) \
+ ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
+ (int) (C),\
+ (__v4df)(__m256d)_mm256_setzero_pd(),\
+ (__mmask8)-1))
+
+#define _mm512_mask_extractf64x4_pd(W, U, X, C) \
+ ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
+ (int) (C),\
+ (__v4df)(__m256d)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_extractf64x4_pd(U, X, C) \
+ ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
+ (int) (C),\
+ (__v4df)(__m256d)_mm256_setzero_pd(),\
+ (__mmask8)(U)))
+
+#define _mm512_extractf32x4_ps(X, C) \
+ ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
+ (int) (C),\
+ (__v4sf)(__m128)_mm_setzero_ps(),\
+ (__mmask8)-1))
+
+#define _mm512_mask_extractf32x4_ps(W, U, X, C) \
+ ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
+ (int) (C),\
+ (__v4sf)(__m128)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_extractf32x4_ps(U, X, C) \
+ ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
+ (int) (C),\
+ (__v4sf)(__m128)_mm_setzero_ps(),\
+ (__mmask8)(U)))
+
+#define _mm512_extracti64x4_epi64(X, C) \
+ ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
+ (int) (C),\
+ (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)-1))
+
+#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
+ ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
+ (int) (C),\
+ (__v4di)(__m256i)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_extracti64x4_epi64(U, X, C) \
+ ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
+ (int) (C),\
+ (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)(U)))
+
+#define _mm512_extracti32x4_epi32(X, C) \
+ ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
+ (int) (C),\
+ (__v4si)(__m128i)_mm_setzero_si128 (),\
+ (__mmask8)-1))
+
+#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
+ ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
+ (int) (C),\
+ (__v4si)(__m128i)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_extracti32x4_epi32(U, X, C) \
+ ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
+ (int) (C),\
+ (__v4si)(__m128i)_mm_setzero_si128 (),\
+ (__mmask8)(U)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
+ (__v4si) __B,
+ __imm,
+ (__v16si) __A, -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
+{
+ return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
+ (__v4sf) __B,
+ __imm,
+ (__v16sf) __A, -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
+ (__v4di) __B,
+ __imm,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m256i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
+ (__v4di) __B,
+ __imm,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
+ (__v4di) __B,
+ __imm,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
+{
+ return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
+ (__v4df) __B,
+ __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
+ __m256d __B, const int __imm)
+{
+ return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
+ (__v4df) __B,
+ __imm,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
+ const int __imm)
+{
+ return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
+ (__v4df) __B,
+ __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+#else
+#define _mm512_insertf32x4(X, Y, C) \
+ ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
+ (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
+
+#define _mm512_inserti32x4(X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
+ (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
+
+#define _mm512_insertf64x4(X, Y, C) \
+ ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
+ (__v4df)(__m256d) (Y), (int) (C), \
+ (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__mmask8)-1))
+
+#define _mm512_mask_insertf64x4(W, U, X, Y, C) \
+ ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
+ (__v4df)(__m256d) (Y), (int) (C), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U)))
+
+#define _mm512_maskz_insertf64x4(U, X, Y, C) \
+ ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
+ (__v4df)(__m256d) (Y), (int) (C), \
+ (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm512_inserti64x4(X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
+ (__v4di)(__m256i) (Y), (int) (C), \
+ (__v8di)(__m512i)_mm512_setzero_si512 (), \
+ (__mmask8)-1))
+
+#define _mm512_mask_inserti64x4(W, U, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
+ (__v4di)(__m256i) (Y), (int) (C),\
+ (__v8di)(__m512i)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_inserti64x4(U, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
+ (__v4di)(__m256i) (Y), (int) (C), \
+ (__v8di)(__m512i)_mm512_setzero_si512 (), \
+ (__mmask8)(U)))
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_pd (void const *__P)
+{
+ return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_pd (void *__P, __m512d __A)
+{
+ __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
+{
+ __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_ps (void const *__P)
+{
+ return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_ps (void *__P, __m512 __A)
+{
+ __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
+ (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
+{
+ __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
+{
+ __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_si512 (void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_si512 (void *__P, __m512i __A)
+{
+ __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
+ (__mmask16) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
+{
+ __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutevar_pd (__m512d __A, __m512i __C)
+{
+ return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
+ (__v8di) __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512i __C)
+{
+ return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
+ (__v8di) __C,
+ (__v8df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
+{
+ return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
+ (__v8di) __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutevar_ps (__m512 __A, __m512i __C)
+{
+ return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
+ (__v16si) __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -
+ 1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512i __C)
+{
+ return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
+ (__v16si) __C,
+ (__v16sf) __W,
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
+{
+ return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
+ (__v16si) __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
+ /* idx */ ,
+ (__v8di) __A,
+ (__v8di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
+ /* idx */ ,
+ (__v8di) __A,
+ (__v8di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
+ __mmask8 __U, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
+ (__v8di) __I
+ /* idx */ ,
+ (__v8di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
+ __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
+ /* idx */ ,
+ (__v8di) __A,
+ (__v8di) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
+ /* idx */ ,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) -
+ 1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
+ __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
+ /* idx */ ,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
+ __mmask16 __U, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
+ (__v16si) __I
+ /* idx */ ,
+ (__v16si) __B,
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
+ __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
+ /* idx */ ,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
+ /* idx */ ,
+ (__v8df) __A,
+ (__v8df) __B,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
+ /* idx */ ,
+ (__v8df) __A,
+ (__v8df) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
+ (__v8di) __I
+ /* idx */ ,
+ (__v8df) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
+ /* idx */ ,
+ (__v8df) __A,
+ (__v8df) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
+{
+ return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
+ /* idx */ ,
+ (__v16sf) __A,
+ (__v16sf) __B,
+ (__mmask16) -
+ 1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I,
+ __m512 __B)
+{
+ return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
+ /* idx */ ,
+ (__v16sf) __A,
+ (__v16sf) __B,
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
+ __m512 __B)
+{
+ return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
+ (__v16si) __I
+ /* idx */ ,
+ (__v16sf) __B,
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
+ __m512 __B)
+{
+ return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
+ /* idx */ ,
+ (__v16sf) __A,
+ (__v16sf) __B,
+ (__mmask16)
+ __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permute_pd (__m512d __X, const int __C)
+{
+ return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X,
+ const int __C)
+{
+ return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
+{
+ return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permute_ps (__m512 __X, const int __C)
+{
+ return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X,
+ const int __C)
+{
+ return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
+{
+ return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+#else
+#define _mm512_permute_pd(X, C) \
+ ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
+ (__v8df)(__m512d)(X), \
+ (__mmask8)(-1)))
+
+#define _mm512_mask_permute_pd(W, U, X, C) \
+ ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U)))
+
+#define _mm512_maskz_permute_pd(U, X, C) \
+ ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
+ (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm512_permute_ps(X, C) \
+ ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
+ (__v16sf)(__m512)(X), \
+ (__mmask16)(-1)))
+
+#define _mm512_mask_permute_ps(W, U, X, C) \
+ ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U)))
+
+#define _mm512_maskz_permute_ps(U, X, C) \
+ ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
+ (__v16sf)(__m512)_mm512_setzero_ps(), \
+ (__mmask16)(U)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex_epi64 (__m512i __X, const int __I)
+{
+ return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X,
+ __I,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) (-1));
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
+ __m512i __X, const int __I)
+{
+ return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X,
+ __I,
+ (__v8di) __W,
+ (__mmask8) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
+{
+ return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X,
+ __I,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex_pd (__m512d __X, const int __M)
+{
+ return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X,
+ const int __M)
+{
+ return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
+{
+ return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+#else
+#define _mm512_permutex_pd(X, M) \
+ ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
+ (__v8df)(__m512d)(X), (__mmask8)-1))
+
+#define _mm512_mask_permutex_pd(W, U, X, M) \
+ ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
+ (__v8df)(__m512d)(W), (__mmask8)(U)))
+
+#define _mm512_maskz_permutex_pd(U, X, M) \
+ ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
+ (__v8df)(__m512d)_mm512_setzero_pd(),\
+ (__mmask8)(U)))
+
+#define _mm512_permutex_epi64(X, I) \
+ ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
+ (int)(I), \
+ (__v8di)(__m512i)(X), \
+ (__mmask8)(-1)))
+
+#define _mm512_maskz_permutex_epi64(M, X, I) \
+ ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
+ (int)(I), \
+ (__v8di)(__m512i) \
+ (_mm512_setzero_si512 ()),\
+ (__mmask8)(M)))
+
+#define _mm512_mask_permutex_epi64(W, M, X, I) \
+ ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
+ (int)(I), \
+ (__v8di)(__m512i)(W), \
+ (__mmask8)(M)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
+ __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X,
+ (__v8di) __Y,
+ (__v8di) __W,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X,
+ __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
+ __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X,
+ (__v16si) __Y,
+ (__v16si) __W,
+ __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
+{
+ return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
+ (__v8di) __X,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X,
+ __m512d __Y)
+{
+ return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
+ (__v8di) __X,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
+{
+ return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
+ (__v8di) __X,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
+{
+ return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
+ (__v16si) __X,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X,
+ __m512 __Y)
+{
+ return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
+ (__v16si) __X,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
+{
+ return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
+ (__v16si) __X,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
+{
+ return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
+ (__v16sf) __V, __imm,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
+ __m512 __V, const int __imm)
+{
+ return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
+ (__v16sf) __V, __imm,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V,
+ const int __imm)
+{
+ return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
+ (__v16sf) __V, __imm,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
+{
+ return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
+ (__v8df) __V, __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
+ __m512d __V, const int __imm)
+{
+ return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
+ (__v8df) __V, __imm,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
+ const int __imm)
+{
+ return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
+ (__v8df) __V, __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
+ const int __imm, const int __R)
+{
+ return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8di) __C,
+ __imm,
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512i __C, const int __imm,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8di) __C,
+ __imm,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512i __C, const int __imm,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8di) __C,
+ __imm,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
+ const int __imm, const int __R)
+{
+ return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16si) __C,
+ __imm,
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512i __C, const int __imm,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16si) __C,
+ __imm,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512i __C, const int __imm,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16si) __C,
+ __imm,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
+ const int __imm, const int __R)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C, __imm,
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128i __C, const int __imm,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C, __imm,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128i __C, const int __imm,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C,
+ __imm,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
+ const int __imm, const int __R)
+{
+ return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
+ __m128i __C, const int __imm,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ __m128i __C, const int __imm,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) __U,
+ __R);
+}
+
+#else
+#define _mm512_shuffle_pd(X, Y, C) \
+ ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(C),\
+ (__v8df)(__m512d)_mm512_setzero_pd(),\
+ (__mmask8)-1))
+
+#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
+ ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(C),\
+ (__v8df)(__m512d)(W),\
+ (__mmask8)(U)))
+
+#define _mm512_maskz_shuffle_pd(U, X, Y, C) \
+ ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(C),\
+ (__v8df)(__m512d)_mm512_setzero_pd(),\
+ (__mmask8)(U)))
+
+#define _mm512_shuffle_ps(X, Y, C) \
+ ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(C),\
+ (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__mmask16)-1))
+
+#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
+ ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(C),\
+ (__v16sf)(__m512)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_shuffle_ps(U, X, Y, C) \
+ ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(C),\
+ (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__mmask16)(U)))
+
+#define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
+ ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
+ (__mmask8)(-1), (R)))
+
+#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
+ ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+
+#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
+ ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+
+#define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
+ ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
+ (__mmask16)(-1), (R)))
+
+#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
+ ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
+ (__mmask16)(U), (R)))
+
+#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
+ ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
+ (__mmask16)(U), (R)))
+
+#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
+ ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(-1), (R)))
+
+#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
+ ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+
+#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
+ ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+
+#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
+ ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(-1), (R)))
+
+#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
+ ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+
+#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
+ ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+#endif
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movehdup_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_moveldup_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_or_si512 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_or_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_or_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_xor_si512 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_xor_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_xor_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rol_epi32 (__m512i __A, const int __B)
+{
+ return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ const int __B)
+{
+ return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
+{
+ return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ror_epi32 (__m512i __A, int __B)
+{
+ return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ int __B)
+{
+ return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
+{
+ return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rol_epi64 (__m512i __A, const int __B)
+{
+ return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ const int __B)
+{
+ return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
+{
+ return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ror_epi64 (__m512i __A, int __B)
+{
+ return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
+{
+ return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
+{
+ return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#else
+#define _mm512_rol_epi32(A, B) \
+ ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
+ (int)(B), \
+ (__v16si)_mm512_setzero_si512 (), \
+ (__mmask16)(-1)))
+#define _mm512_mask_rol_epi32(W, U, A, B) \
+ ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
+ (int)(B), \
+ (__v16si)(__m512i)(W), \
+ (__mmask16)(U)))
+#define _mm512_maskz_rol_epi32(U, A, B) \
+ ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
+ (int)(B), \
+ (__v16si)_mm512_setzero_si512 (), \
+ (__mmask16)(U)))
+#define _mm512_ror_epi32(A, B) \
+ ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
+ (int)(B), \
+ (__v16si)_mm512_setzero_si512 (), \
+ (__mmask16)(-1)))
+#define _mm512_mask_ror_epi32(W, U, A, B) \
+ ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
+ (int)(B), \
+ (__v16si)(__m512i)(W), \
+ (__mmask16)(U)))
+#define _mm512_maskz_ror_epi32(U, A, B) \
+ ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
+ (int)(B), \
+ (__v16si)_mm512_setzero_si512 (), \
+ (__mmask16)(U)))
+#define _mm512_rol_epi64(A, B) \
+ ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
+ (int)(B), \
+ (__v8di)_mm512_setzero_si512 (), \
+ (__mmask8)(-1)))
+#define _mm512_mask_rol_epi64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
+ (int)(B), \
+ (__v8di)(__m512i)(W), \
+ (__mmask8)(U)))
+#define _mm512_maskz_rol_epi64(U, A, B) \
+ ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
+ (int)(B), \
+ (__v8di)_mm512_setzero_si512 (), \
+ (__mmask8)(U)))
+
+#define _mm512_ror_epi64(A, B) \
+ ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
+ (int)(B), \
+ (__v8di)_mm512_setzero_si512 (), \
+ (__mmask8)(-1)))
+#define _mm512_mask_ror_epi64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
+ (int)(B), \
+ (__v8di)(__m512i)(W), \
+ (__mmask8)(U)))
+#define _mm512_maskz_ror_epi64(U, A, B) \
+ ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
+ (int)(B), \
+ (__v8di)_mm512_setzero_si512 (), \
+ (__mmask8)(U)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_and_si512 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_and_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_and_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W, __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_pd (),
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_andnot_si512 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_andnot_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_andnot_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W, __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_pd (),
+ __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_test_epi32_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
+ (__v16si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_test_epi64_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
+ (__v8di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
+ (__v8di) __B, __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
+ (__v16si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
+ (__v8di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
+ (__v8di) __B, __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_u64 (__m128 __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
+ __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_si64 (__m128 __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_i64 (__m128 __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
+ __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
+}
+#else
+#define _mm_cvt_roundss_u64(A, B) \
+ ((unsigned long long)__builtin_ia32_vcvtss2usi64((A), (B)))
+
+#define _mm_cvt_roundss_si64(A, B) \
+ ((long long)__builtin_ia32_vcvtss2si64((A), (B)))
+
+#define _mm_cvt_roundss_i64(A, B) \
+ ((long long)__builtin_ia32_vcvtss2si64((A), (B)))
+
+#define _mm_cvtt_roundss_u64(A, B) \
+ ((unsigned long long)__builtin_ia32_vcvttss2usi64((A), (B)))
+
+#define _mm_cvtt_roundss_i64(A, B) \
+ ((long long)__builtin_ia32_vcvttss2si64((A), (B)))
+
+#define _mm_cvtt_roundss_si64(A, B) \
+ ((long long)__builtin_ia32_vcvttss2si64((A), (B)))
+#endif
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_u32 (__m128 __A, const int __R)
+{
+ return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_si32 (__m128 __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_i32 (__m128 __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
+{
+ return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
+}
+#else
+#define _mm_cvt_roundss_u32(A, B) \
+ ((unsigned)__builtin_ia32_vcvtss2usi32((A), (B)))
+
+#define _mm_cvt_roundss_si32(a, b) \
+ ((int)__builtin_ia32_vcvtss2si32((a), (b)))
+
+#define _mm_cvt_roundss_i32(a, b) \
+ ((int)__builtin_ia32_vcvtss2si32((a), (b)))
+
+#define _mm_cvtt_roundss_u32(A, B) \
+ ((unsigned)__builtin_ia32_vcvttss2usi32((A), (B)))
+
+#define _mm_cvtt_roundss_si32(A, B) \
+ ((int)__builtin_ia32_vcvttss2si32((A), (B)))
+
+#define _mm_cvtt_roundss_i32(A, B) \
+ ((int)__builtin_ia32_vcvttss2si32((A), (B)))
+#endif
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
+ __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
+ __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
+}
+#else
+#define _mm_cvt_roundsd_u64(A, B) \
+ ((unsigned long long)__builtin_ia32_vcvtsd2usi64((A), (B)))
+
+#define _mm_cvt_roundsd_si64(A, B) \
+ ((long long)__builtin_ia32_vcvtsd2si64((A), (B)))
+
+#define _mm_cvt_roundsd_i64(A, B) \
+ ((long long)__builtin_ia32_vcvtsd2si64((A), (B)))
+
+#define _mm_cvtt_roundsd_u64(A, B) \
+ ((unsigned long long)__builtin_ia32_vcvttsd2usi64((A), (B)))
+
+#define _mm_cvtt_roundsd_si64(A, B) \
+ ((long long)__builtin_ia32_vcvttsd2si64((A), (B)))
+
+#define _mm_cvtt_roundsd_i64(A, B) \
+ ((long long)__builtin_ia32_vcvttsd2si64((A), (B)))
+#endif
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
+{
+ return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
+{
+ return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
+}
+#else
+#define _mm_cvt_roundsd_u32(A, B) \
+ ((unsigned)__builtin_ia32_vcvtsd2usi32((A), (B)))
+
+#define _mm_cvt_roundsd_si32(A, B) \
+ ((int)__builtin_ia32_vcvtsd2si32((A), (B)))
+
+#define _mm_cvt_roundsd_i32(A, B) \
+ ((int)__builtin_ia32_vcvtsd2si32((A), (B)))
+
+#define _mm_cvtt_roundsd_u32(A, B) \
+ ((unsigned)__builtin_ia32_vcvttsd2usi32((A), (B)))
+
+#define _mm_cvtt_roundsd_si32(A, B) \
+ ((int)__builtin_ia32_vcvttsd2si32((A), (B)))
+
+#define _mm_cvtt_roundsd_i32(A, B) \
+ ((int)__builtin_ia32_vcvttsd2si32((A), (B)))
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movedup_pd (__m512d __A)
+{
+ return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B)
+{
+ return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_pd (__m256 __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundph_ps (__m256i __A, const int __R)
+{
+ return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
+{
+ return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_ph (__m512 __A, const int __I)
+{
+ return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+ __I,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_ph (__m512 __A, const int __I)
+{
+ return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+ __I,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
+ const int __I)
+{
+ return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+ __I,
+ (__v16hi) __U,
+ (__mmask16) __W);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A,
+ const int __I)
+{
+ return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+ __I,
+ (__v16hi) __U,
+ (__mmask16) __W);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
+{
+ return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+ __I,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __W);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
+{
+ return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+ __I,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __W);
+}
+#else
+#define _mm512_cvt_roundps_pd(A, B) \
+ (__m512d)__builtin_ia32_cvtps2pd512_mask((A), (__v8df)_mm512_setzero_pd(), -1, (B))
+
+#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
+ (__m512d)__builtin_ia32_cvtps2pd512_mask((A), (__v8df)(W), (U), (B))
+
+#define _mm512_maskz_cvt_roundps_pd(U, A, B) \
+ (__m512d)__builtin_ia32_cvtps2pd512_mask((A), (__v8df)_mm512_setzero_pd(), (U), (B))
+
+#define _mm512_cvt_roundph_ps(A, B) \
+ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), -1, (B))
+
+#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
+ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), (U), (B))
+
+#define _mm512_maskz_cvt_roundph_ps(U, A, B) \
+ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), (U), (B))
+
+#define _mm512_cvt_roundps_ph(A, I) \
+ ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+ (__v16hi)_mm256_setzero_si256 (), -1))
+#define _mm512_cvtps_ph(A, I) \
+ ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+ (__v16hi)_mm256_setzero_si256 (), -1))
+#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
+ ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+ (__v16hi)(__m256i)(U), (__mmask16) (W)))
+#define _mm512_mask_cvtps_ph(U, W, A, I) \
+ ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+ (__v16hi)(__m256i)(U), (__mmask16) (W)))
+#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
+ ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+ (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
+#define _mm512_maskz_cvtps_ph(W, A, I) \
+ ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
+ (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
+{
+ return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+ (__v8sf) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
+{
+ return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsd2ss_mask ((__v4sf) __A,
+ (__v2df) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsd_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128d __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsd2ss_mask ((__v4sf) __A,
+ (__v2df) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsd_ss (__mmask8 __U, __m128 __A, __m128d __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsd2ss_mask ((__v4sf) __A,
+ (__v2df) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtss2sd_mask ((__v2df) __A,
+ (__v4sf) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundss_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128 __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtss2sd_mask ((__v2df) __A,
+ (__v4sf) __B,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundss_sd (__mmask8 __U, __m128d __A, __m128 __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtss2sd_mask ((__v2df) __A,
+ (__v4sf) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+#else
+#define _mm512_cvt_roundpd_ps(A, B) \
+ (__m256)__builtin_ia32_cvtpd2ps512_mask((A), (__v8sf)_mm256_setzero_ps(), -1, (B))
+
+#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
+ (__m256)__builtin_ia32_cvtpd2ps512_mask((A), (__v8sf)(W), (U), (B))
+
+#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
+ (__m256)__builtin_ia32_cvtpd2ps512_mask((A), (__v8sf)_mm256_setzero_ps(), (U), (B))
+
+#define _mm_cvt_roundsd_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsd2ss_mask((A), (B), (__v4sf)_mm_setzero_ps(), -1, (C))
+
+#define _mm_mask_cvt_roundsd_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_cvtsd2ss_mask((A), (B), (__v4sf)(W), (U), (C))
+
+#define _mm_maskz_cvt_roundsd_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_cvtsd2ss_mask((A), (B), (__v4sf)_mm_setzero_ps(), (U), (C))
+
+#define _mm_cvt_roundss_sd(A, B, C) \
+ (__m128d)__builtin_ia32_cvtss2sd_mask((A), (B), (__v2df)_mm_setzero_pd(), -1, (C))
+
+#define _mm_mask_cvt_roundss_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_cvtss2sd_mask((A), (B), (__v2df)(W), (U), (C))
+
+#define _mm_maskz_cvt_roundss_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_cvtss2sd_mask((A), (B), (__v2df)_mm_setzero_pd(), (U), (C))
+#endif
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_stream_si512 (__m512i * __P, __m512i __A)
+{
+ __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_stream_ps (float *__P, __m512 __A)
+{
+ __builtin_ia32_movntps512 (__P, (__v16sf) __A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_stream_pd (double *__P, __m512d __A)
+{
+ __builtin_ia32_movntpd512 (__P, (__v8df) __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __round)
+{
+ return (__m128) __builtin_ia32_getexpss128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1,
+ __round);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __round)
+{
+ return (__m128) __builtin_ia32_getexpss128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ __round);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __round)
+{
+ return (__m128) __builtin_ia32_getexpss128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ __round);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __round)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1,
+ __round);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __round)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ __round);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __round)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ __round);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_round_ps (__m512 __A, const int __round)
+{
+ return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ __round);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ const int __round)
+{
+ return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ __round);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A,
+ const int __round)
+{
+ return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ __round);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_round_pd (__m512d __A, const int __round)
+{
+ return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ __round);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ const int __round)
+{
+ return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ __round);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A,
+ const int __round)
+{
+ return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ __round);
+}
+
+/* Constants for mantissa extraction */
+typedef enum
+{
+ _MM_MANT_NORM_1_2, /* interval [1, 2) */
+ _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
+ _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
+ _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
+} _MM_MANTISSA_NORM_ENUM;
+
+typedef enum
+{
+ _MM_MANT_SIGN_src, /* sign = sign(SRC) */
+ _MM_MANT_SIGN_zero, /* sign = 0 */
+ _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
+} _MM_MANTISSA_SIGN_ENUM;
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C, const int __round)
+{
+ return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+ (__C << 2) | __B,
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ __round);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C,
+ const int __round)
+{
+ return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+ (__C << 2) | __B,
+ (__v8df) __W, __U,
+ __round);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C,
+ const int __round)
+{
+ return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+ (__C << 2) | __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ __U, __round);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C, const int __round)
+{
+ return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+ (__C << 2) | __B,
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ __round);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C,
+ const int __round)
+{
+ return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+ (__C << 2) | __B,
+ (__v16sf) __W, __U,
+ __round);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C,
+ const int __round)
+{
+ return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+ (__C << 2) | __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __U, __round);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_round_sd (__m128d __A, __m128d __B,
+ _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D, const int __round)
+{
+ return (__m128d) __builtin_ia32_getmantsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ _mm_setzero_pd (),
+ (__mmask8) -1,
+ __round);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D,
+ const int __round)
+{
+ return (__m128d) __builtin_ia32_getmantsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ (__v2df) __W, __U,
+ __round);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D,
+ const int __round)
+{
+ return (__m128d) __builtin_ia32_getmantsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ __U, __round);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_round_ss (__m128 __A, __m128 __B,
+ _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D, const int __round)
+{
+ return (__m128) __builtin_ia32_getmantss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ _mm_setzero_ps (),
+ (__mmask8) -1,
+ __round);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D,
+ const int __round)
+{
+ return (__m128) __builtin_ia32_getmantss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ (__v4sf) __W, __U,
+ __round);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D,
+ const int __round)
+{
+ return (__m128) __builtin_ia32_getmantss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ __U, __round);
+}
+
+#else
+#define _mm512_getmant_round_pd(X, B, C, R) \
+ ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__mmask8)-1,\
+ (R)))
+
+#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
+ ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U),\
+ (R)))
+
+#define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
+ ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__mmask8)(U),\
+ (R)))
+#define _mm512_getmant_round_ps(X, B, C, R) \
+ ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v16sf)(__m512)_mm512_setzero_ps(), \
+ (__mmask16)-1,\
+ (R)))
+
+#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
+ ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U),\
+ (R)))
+
+#define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
+ ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v16sf)(__m512)_mm512_setzero_ps(), \
+ (__mmask16)(U),\
+ (R)))
+#define _mm_getmant_round_sd(X, Y, C, D, R) \
+ ((__m128d)__builtin_ia32_getmantsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__mmask8)-1,\
+ (R)))
+
+#define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \
+ ((__m128d)__builtin_ia32_getmantsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U),\
+ (R)))
+
+#define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \
+ ((__m128d)__builtin_ia32_getmantsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__mmask8)(U),\
+ (R)))
+#define _mm_getmant_round_ss(X, Y, C, D, R) \
+ ((__m128)__builtin_ia32_getmantss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)-1,\
+ (R)))
+
+#define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \
+ ((__m128)__builtin_ia32_getmantss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U),\
+ (R)))
+
+#define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \
+ ((__m128)__builtin_ia32_getmantss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)(U),\
+ (R)))
+
+#define _mm_getexp_round_ss(A, B, R) \
+ ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), (__mmask8)-1, (R)))
+
+#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
+ ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), (__mmask8)(U), (R)))
+
+#define _mm_maskz_getexp_round_ss(U, A, B, R) \
+ ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), (__mmask8)(U), (R)))
+
+#define _mm_getexp_round_sd(A, B, R) \
+ ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
+ (__v2df)_mm_setzero_pd(), (__mmask8)-1, (R)))
+
+#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
+ ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
+ (__v2df)(__m128d)(W), (__mmask8)(U), (R)))
+
+#define _mm_maskz_getexp_round_sd(U, A, B, R) \
+ ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
+ (__v2df)_mm_setzero_pd(), (__mmask8)(U), (R)))
+
+#define _mm512_getexp_round_ps(A, R) \
+ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, (R)))
+
+#define _mm512_mask_getexp_round_ps(W, U, A, R) \
+ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(W), (__mmask16)(U), (R)))
+
+#define _mm512_maskz_getexp_round_ps(U, A, R) \
+ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (R)))
+
+#define _mm512_getexp_round_pd(A, R) \
+ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), (__mmask8)-1, (R)))
+
+#define _mm512_mask_getexp_round_pd(W, U, A, R) \
+ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(W), (__mmask8)(U), (R)))
+
+#define _mm512_maskz_getexp_round_pd(U, A, R) \
+ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (R)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
+ (__v16sf) __A, -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
+ const int __imm, const int __R)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
+ (__v16sf) __A,
+ (__mmask16) __B,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
+ const int __imm, const int __R)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
+ __imm,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __A,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_round_pd (__m512d __A, const int __imm,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
+ (__v8df) __A, -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
+ __m512d __C, const int __imm,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
+ (__v8df) __A,
+ (__mmask8) __B,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
+ const int __imm, const int __R)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
+ __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __A,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_rndscaless_mask ((__v4sf) __A,
+ (__v4sf) __B, __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __imm,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_rndscaless_mask ((__v4sf) __A,
+ (__v4sf) __B, __imm,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_round_ss (__mmask8 __U, __m128 __A,
+ __m128 __B, const int __imm,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_rndscaless_mask ((__v4sf) __A,
+ (__v4sf) __B, __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_rndscalesd_mask ((__v2df) __A,
+ (__v2df) __B, __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __imm,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_rndscalesd_mask ((__v2df) __A,
+ (__v2df) __B, __imm,
+ (__v2df) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_round_sd (__mmask8 __U, __m128d __A,
+ __m128d __B, const int __imm,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_rndscalesd_mask ((__v2df) __A,
+ (__v2df) __B, __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ __R);
+}
+#else
+#define _mm512_roundscale_round_ps(A, B, R) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
+ (__v16sf)(__m512)(A), (__mmask16)(-1), (R)))
+#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
+ (int)(D), \
+ (__v16sf)(__m512)(A), \
+ (__mmask16)(B), (R)))
+#define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
+ (int)(C), \
+ (__v16sf)_mm512_setzero_ps(),\
+ (__mmask16)(A), (R)))
+#define _mm512_roundscale_round_pd(A, B, R) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
+ (__v8df)(__m512d)(A), (__mmask8)(-1), (R)))
+#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
+ (int)(D), \
+ (__v8df)(__m512d)(A), \
+ (__mmask8)(B), (R)))
+#define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
+ (int)(C), \
+ (__v8df)_mm512_setzero_pd(),\
+ (__mmask8)(A), (R)))
+#define _mm_roundscale_round_ss(A, B, C, R) \
+ ((__m128) __builtin_ia32_rndscaless_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(-1), (R)))
+#define _mm_mask_roundscale_round_ss(W, U, A, B, C, R) \
+ ((__m128) __builtin_ia32_rndscaless_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W),(__mmask8)(U), (R)))
+#define _mm_maskz_roundscale_round_ss(U, A, B, C, R) \
+ ((__m128) __builtin_ia32_rndscaless_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (R)))
+#define _mm_roundscale_round_sd(A, B, C, R) \
+ ((__m128d) __builtin_ia32_rndscalesd_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(-1), (R)))
+#define _mm_mask_roundscale_round_sd(W, U, A, B, C, R) \
+ ((__m128d) __builtin_ia32_rndscalesd_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), (__mmask8)(U), (R)))
+#define _mm_maskz_roundscale_round_sd(U, A, B, C, R) \
+ ((__m128d) __builtin_ia32_rndscalesd_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (R)))
+#endif
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_floor_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_FLOOR,
+ (__v16sf) __A, -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_floor_pd (__m512d __A)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_FLOOR,
+ (__v8df) __A, -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ceil_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_CEIL,
+ (__v16sf) __A, -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ceil_pd (__m512d __A)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_CEIL,
+ (__v8df) __A, -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_FLOOR,
+ (__v16sf) __W, __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_FLOOR,
+ (__v8df) __W, __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_CEIL,
+ (__v16sf) __W, __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_CEIL,
+ (__v8df) __W, __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_floor_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_FLOOR,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_floor_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_FLOOR,
+ (__v8df)
+ _mm512_setzero_pd (),
+ __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ceil_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_CEIL,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ceil_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_CEIL,
+ (__v8df)
+ _mm512_setzero_pd (),
+ __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_floor_round_ps (__m512 __A, const int __R)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_FLOOR,
+ (__v16sf) __A, -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_floor_round_pd (__m512d __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_FLOOR,
+ (__v8df) __A, -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ceil_round_ps (__m512 __A, const int __R)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_CEIL,
+ (__v16sf) __A, -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ceil_round_pd (__m512d __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_CEIL,
+ (__v8df) __A, -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_floor_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_FLOOR,
+ (__v16sf) __W, __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_floor_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_FLOOR,
+ (__v8df) __W, __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ceil_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_CEIL,
+ (__v16sf) __W, __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ceil_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_CEIL,
+ (__v8df) __W, __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_floor_round_ps (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_FLOOR,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_floor_round_pd (__mmask8 __U, __m512d __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_FLOOR,
+ (__v8df)
+ _mm512_setzero_pd (),
+ __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ceil_round_ps (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+ _MM_FROUND_CEIL,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ceil_round_pd (__mmask8 __U, __m512d __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+ _MM_FROUND_CEIL,
+ (__v8df)
+ _mm512_setzero_pd (),
+ __U, __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
+ (__v16si) __B, __imm,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
+ (__v16si) __B, __imm,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
+ (__v16si) __B, __imm,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
+ (__v8di) __B, __imm,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
+ (__v8di) __B, __imm,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
+ (__v8di) __B, __imm,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+#else
+#define _mm512_floor_round_ps(A, R) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), \
+ _MM_FROUND_FLOOR, \
+ (__v16sf)(__m512)(A), \
+ (__mmask16)(-1), (R)))
+#define _mm512_mask_floor_round_ps(A, B, C, R) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
+ _MM_FROUND_FLOOR, \
+ (__v16sf)(__m512)(A), \
+ (__mmask16)(B), (R)))
+#define _mm512_maskz_floor_round_ps(A, B, R) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
+ _MM_FROUND_FLOOR, \
+ (__v16sf)_mm512_setzero_ps(),\
+ (__mmask16)(A), (R)))
+#define _mm512_floor_round_pd(A, R) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), \
+ _MM_FROUND_FLOOR, \
+ (__v8df)(__m512d)(A), \
+ (__mmask8)(-1), (R)))
+#define _mm512_mask_floor_round_pd(A, B, C, R) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
+ _MM_FROUND_FLOOR, \
+ (__v8df)(__m512d)(A), \
+ (__mmask8)(B), (R)))
+#define _mm512_maskz_floor_round_pd(A, B, R) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
+ _MM_FROUND_FLOOR, \
+ (__v8df)_mm512_setzero_pd(),\
+ (__mmask8)(A), (R)))
+#define _mm512_ceil_round_ps(A, R) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), \
+ _MM_FROUND_CEIL, \
+ (__v16sf)(__m512)(A), \
+ (__mmask16)(-1), (R)))
+#define _mm512_mask_ceil_round_ps(A, B, C, R) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
+ _MM_FROUND_CEIL, \
+ (__v16sf)(__m512)(A), \
+ (__mmask16)(B), (R)))
+#define _mm512_maskz_ceil_round_ps(A, B, R) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
+ _MM_FROUND_CEIL, \
+ (__v16sf)_mm512_setzero_ps(),\
+ (__mmask16)(A), (R)))
+#define _mm512_ceil_round_pd(A, R) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), \
+ _MM_FROUND_CEIL, \
+ (__v8df)(__m512d)(A), \
+ (__mmask8)(-1), (R)))
+#define _mm512_mask_ceil_round_pd(A, B, C, R) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
+ _MM_FROUND_CEIL, \
+ (__v8df)(__m512d)(A), \
+ (__mmask8)(B), (R)))
+#define _mm512_maskz_ceil_round_pd(A, B, R) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
+ _MM_FROUND_CEIL, \
+ (__v8df)_mm512_setzero_pd(),\
+ (__mmask8)(A), (R)))
+
+#define _mm512_alignr_epi32(X, Y, C) \
+ ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(X), \
+ (__mmask16)-1))
+
+#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
+ ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
+ (__mmask16)(U)))
+
+#define _mm512_maskz_alignr_epi32(U, X, Y, C) \
+ ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)(U)))
+
+#define _mm512_alignr_epi64(X, Y, C) \
+ ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(X), (__mmask8)-1))
+
+#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
+ ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
+
+#define _mm512_maskz_alignr_epi64(U, X, Y, C) \
+ ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask8)(U)))
+#endif
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
+ (__v8di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
+ (__v8di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__mmask8) -1);
+}
+
+#define _MM_CMPINT_EQ 0x0
+#define _MM_CMPINT_LT 0x1
+#define _MM_CMPINT_LE 0x2
+#define _MM_CMPINT_UNUSED 0x3
+#define _MM_CMPINT_NE 0x4
+#define _MM_CMPINT_NLT 0x5
+#define _MM_CMPINT_GE 0x5
+#define _MM_CMPINT_NLE 0x6
+#define _MM_CMPINT_GT 0x6
+
+#ifdef __OPTIMIZE__
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, __P,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, __P,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
+ const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, __P,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P,
+ const int __R)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, __P,
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, __P,
+ (__mmask16) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
+ (__v8di) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
+ (__v16si) __Y, __P,
+ (__mmask16) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
+ const int __P, const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, __P,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
+ const int __P, const int __R)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, __P,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P,
+ const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
+ const int __P, const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) __M, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P,
+ const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
+ const int __P, const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) __M, __R);
+}
+
+#else
+#define _mm512_cmp_epi64_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm512_cmp_epi32_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(P),\
+ (__mmask16)-1))
+
+#define _mm512_cmp_epu64_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm512_cmp_epu32_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(P),\
+ (__mmask16)-1))
+
+#define _mm512_cmp_round_pd_mask(X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(P),\
+ (__mmask8)-1,(R)))
+
+#define _mm512_cmp_round_ps_mask(X, Y, P, R) \
+ ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(P),\
+ (__mmask16)-1,(R)))
+
+#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(P),\
+ (__mmask8)M))
+
+#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(P),\
+ (__mmask16)M))
+
+#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(P),\
+ (__mmask8)M))
+
+#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(P),\
+ (__mmask16)M))
+
+#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(P),\
+ (__mmask8)M, (R)))
+
+#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
+ ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(P),\
+ (__mmask16)M,(R)))
+
+#define _mm_cmp_round_sd_mask(X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ (__mmask8)-1,(R)))
+
+#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ (M),(R)))
+
+#define _mm_cmp_round_ss_mask(X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ (__mmask8)-1,(R)))
+
+#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ (M),(R)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
+{
+ __m512 v1_old = _mm512_setzero_ps ();
+ __mmask16 mask = 0xFFFF;
+
+ return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
+ __addr,
+ (__v16si) __index,
+ mask, __scale);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
+ __m512i __index, float const *__addr,
+ int __scale)
+{
+ return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
+ __addr,
+ (__v16si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32gather_pd (__m256i __index, double const *__addr,
+ int __scale)
+{
+ __m512d v1_old = _mm512_setzero_pd ();
+ __mmask8 mask = 0xFF;
+
+ return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
+ __addr,
+ (__v8si) __index, mask,
+ __scale);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
+ __m256i __index, double const *__addr,
+ int __scale)
+{
+ return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
+ __addr,
+ (__v8si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
+{
+ __m256 v1_old = _mm256_setzero_ps ();
+ __mmask8 mask = 0xFF;
+
+ return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
+ __addr,
+ (__v8di) __index, mask,
+ __scale);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
+ __m512i __index, float const *__addr,
+ int __scale)
+{
+ return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
+ __addr,
+ (__v8di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64gather_pd (__m512i __index, double const *__addr,
+ int __scale)
+{
+ __m512d v1_old = _mm512_setzero_pd ();
+ __mmask8 mask = 0xFF;
+
+ return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
+ __addr,
+ (__v8di) __index, mask,
+ __scale);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
+ __m512i __index, double const *__addr,
+ int __scale)
+{
+ return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
+ __addr,
+ (__v8di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32gather_epi32 (__m512i __index, int const *__addr,
+ int __scale)
+{
+ __m512i v1_old = _mm512_setzero_si512 ();
+ __mmask16 mask = 0xFFFF;
+
+ return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
+ __addr,
+ (__v16si) __index,
+ mask, __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
+ __m512i __index, int const *__addr,
+ int __scale)
+{
+ return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
+ __addr,
+ (__v16si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32gather_epi64 (__m256i __index, long long const *__addr,
+ int __scale)
+{
+ __m512i v1_old = _mm512_setzero_si512 ();
+ __mmask8 mask = 0xFF;
+
+ return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
+ __addr,
+ (__v8si) __index, mask,
+ __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
+ __m256i __index, long long const *__addr,
+ int __scale)
+{
+ return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
+ __addr,
+ (__v8si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64gather_epi32 (__m512i __index, int const *__addr,
+ int __scale)
+{
+ __m256i v1_old = _mm256_setzero_si256 ();
+ __mmask8 mask = 0xFF;
+
+ return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
+ __addr,
+ (__v8di) __index,
+ mask, __scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
+ __m512i __index, int const *__addr,
+ int __scale)
+{
+ return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
+ __addr,
+ (__v8di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64gather_epi64 (__m512i __index, long long const *__addr,
+ int __scale)
+{
+ __m512i v1_old = _mm512_setzero_si512 ();
+ __mmask8 mask = 0xFF;
+
+ return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
+ __addr,
+ (__v8di) __index, mask,
+ __scale);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
+ __m512i __index, long long const *__addr,
+ int __scale)
+{
+ return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
+ __addr,
+ (__v8di) __index,
+ __mask, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1,
+ int __scale)
+{
+ __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
+ (__v16si) __index, (__v16sf) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
+ __m512i __index, __m512 __v1, int __scale)
+{
+ __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
+ (__v16sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
+ int __scale)
+{
+ __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
+ (__v8si) __index, (__v8df) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
+ __m256i __index, __m512d __v1, int __scale)
+{
+ __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
+ (__v8df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1,
+ int __scale)
+{
+ __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
+ (__v8di) __index, (__v8sf) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
+ __m512i __index, __m256 __v1, int __scale)
+{
+ __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
+ (__v8sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
+ int __scale)
+{
+ __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
+ (__v8di) __index, (__v8df) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
+ __m512i __index, __m512d __v1, int __scale)
+{
+ __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
+ (__v8df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_epi32 (int *__addr, __m512i __index,
+ __m512i __v1, int __scale)
+{
+ __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
+ (__v16si) __index, (__v16si) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
+ __m512i __index, __m512i __v1,
+ int __scale)
+{
+ __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
+ (__v16si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
+ __m512i __v1, int __scale)
+{
+ __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
+ (__v8si) __index, (__v8di) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
+ __m256i __index, __m512i __v1,
+ int __scale)
+{
+ __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
+ (__v8di) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64scatter_epi32 (int *__addr, __m512i __index,
+ __m256i __v1, int __scale)
+{
+ __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
+ (__v8di) __index, (__v8si) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
+ __m512i __index, __m256i __v1,
+ int __scale)
+{
+ __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
+ (__v8si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
+ __m512i __v1, int __scale)
+{
+ __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
+ (__v8di) __index, (__v8di) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
+ __m512i __index, __m512i __v1,
+ int __scale)
+{
+ __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
+ (__v8di) __v1, __scale);
+}
+#else
+#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
+ (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_setzero_ps(), \
+ (float const *)ADDR, \
+ (__v16si)(__m512i)INDEX, \
+ (__mmask16)0xFFFF, (int)SCALE)
+
+#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
+ (float const *)ADDR, \
+ (__v16si)(__m512i)INDEX, \
+ (__mmask16)MASK, (int)SCALE)
+
+#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
+ (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_setzero_pd(), \
+ (double const *)ADDR, \
+ (__v8si)(__m256i)INDEX, \
+ (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
+ (double const *)ADDR, \
+ (__v8si)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
+ (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_setzero_ps(), \
+ (float const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
+ (float const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
+ (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_setzero_pd(), \
+ (double const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
+ (double const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
+ (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_setzero_si512 (), \
+ (int const *)ADDR, \
+ (__v16si)(__m512i)INDEX, \
+ (__mmask16)0xFFFF, (int)SCALE)
+
+#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
+ (int const *)ADDR, \
+ (__v16si)(__m512i)INDEX, \
+ (__mmask16)MASK, (int)SCALE)
+
+#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
+ (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_setzero_si512 (), \
+ (long long const *)ADDR, \
+ (__v8si)(__m256i)INDEX, \
+ (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
+ (long long const *)ADDR, \
+ (__v8si)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
+ (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_setzero_si256(), \
+ (int const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
+ (int const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
+ (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_setzero_si512 (), \
+ (long long const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)0xFF, (int)SCALE)
+
+#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
+ (long long const *)ADDR, \
+ (__v8di)(__m512i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \
+ (__v16si)(__m512i)INDEX, \
+ (__v16sf)(__m512)V1, (int)SCALE)
+
+#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \
+ (__v16si)(__m512i)INDEX, \
+ (__v16sf)(__m512)V1, (int)SCALE)
+
+#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8df)(__m512d)V1, (int)SCALE)
+
+#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8df)(__m512d)V1, (int)SCALE)
+
+#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8sf)(__m256)V1, (int)SCALE)
+
+#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8sf)(__m256)V1, (int)SCALE)
+
+#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8df)(__m512d)V1, (int)SCALE)
+
+#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8df)(__m512d)V1, (int)SCALE)
+
+#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \
+ (__v16si)(__m512i)INDEX, \
+ (__v16si)(__m512i)V1, (int)SCALE)
+
+#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \
+ (__v16si)(__m512i)INDEX, \
+ (__v16si)(__m512i)V1, (int)SCALE)
+
+#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8di)(__m512i)V1, (int)SCALE)
+
+#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8di)(__m512i)V1, (int)SCALE)
+
+#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8si)(__m256i)V1, (int)SCALE)
+
+#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8si)(__m256i)V1, (int)SCALE)
+
+#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8di)(__m512i)V1, (int)SCALE)
+
+#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \
+ (__v8di)(__m512i)INDEX, \
+ (__v8di)(__m512i)V1, (int)SCALE)
+#endif
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
+{
+ __builtin_ia32_compressstoredf512_mask ((__v8df *)__P, (__v8df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
+{
+ __builtin_ia32_compressstoresf512_mask ((__v16sf *)__P, (__v16sf) __A,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
+{
+ __builtin_ia32_compressstoredi512_mask ((__v8di *)__P, (__v8di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U,
+ __m512i __A)
+{
+ __builtin_ia32_compressstoresi512_mask ((__v16si *)__P, (__v16si) __A,
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
+ (__v8df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *)__P,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
+ (__v16sf) __W,
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *)__P,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U,
+ void const *__P)
+{
+ return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
+ (__v8di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_expandloaddi512_maskz ((const __v8di *)__P,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U,
+ void const *__P)
+{
+ return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
+ (__v16si) __W,
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *)__P,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16)
+ __U);
+}
+
+/* Mask arithmetic operations */
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kand (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A,
+ (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kandn (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
+ (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kor (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A,
+ (__mmask16) __B);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kortestz (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
+ (__mmask16) __B);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kortestc (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
+ (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kxnor (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A,
+ (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kxor (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A,
+ (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_knot (__mmask16 __A)
+{
+ return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A,
+ (__mmask16) __B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
+ (__v4si) __D,
+ __imm,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __B);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
+ const int __imm)
+{
+ return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
+ (__v4sf) __D,
+ __imm,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __B);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
+ __m128i __D, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
+ (__v4si) __D,
+ __imm,
+ (__v16si) __A,
+ __B);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
+ __m128 __D, const int __imm)
+{
+ return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
+ (__v4sf) __D,
+ __imm,
+ (__v16sf) __A, __B);
+}
+#else
+#define _mm512_maskz_insertf32x4(A, X, Y, C) \
+ ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
+ (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
+ (__mmask8)(A)))
+
+#define _mm512_maskz_inserti32x4(A, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
+ (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
+ (__mmask8)(A)))
+
+#define _mm512_mask_insertf32x4(A, B, X, Y, C) \
+ ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
+ (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
+ (__mmask8)(B)))
+
+#define _mm512_mask_inserti32x4(A, B, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
+ (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
+ (__mmask8)(B)))
+#endif
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epu64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epu64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epu32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epu32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v16si) __W, __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B)
+{
+ return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_round_ss (__mmask8 __M, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_minss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (), __M,
+ __R);
+}
+
+extern __inline __m128 __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_round_ss (__m128 __W, __mmask8 __M, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_minss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W, __M, __R);
+}
+
+extern __inline __m128d __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_round_sd (__mmask8 __M, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_minsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (), __M,
+ __R);
+}
+
+extern __inline __m128d __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_round_sd (__m128d __W, __mmask8 __M, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_minsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W, __M, __R);
+}
+
+extern __inline __m128 __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_round_ss (__mmask8 __M, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_maxss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (), __M,
+ __R);
+}
+
+extern __inline __m128 __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_round_ss (__m128 __W, __mmask8 __M, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_maxss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W, __M, __R);
+}
+
+extern __inline __m128d __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_round_sd (__mmask8 __M, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_maxsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (), __M,
+ __R);
+}
+
+extern __inline __m128d __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_round_sd (__m128d __W, __mmask8 __M, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_maxsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W, __M, __R);
+}
+#else
+#define _mm_maskz_min_round_ss(M, A, B, R) \
+(__m128)__builtin_ia32_minss_mask((A), (B), (__v4sf)_mm_setzero_ps(), (M), (R))
+
+#define _mm_mask_min_round_ss(W, M, A, B, R) \
+(__m128)__builtin_ia32_minss_mask((A), (B), (W), (M), (R))
+
+#define _mm_maskz_min_round_sd(M, A, B, R) \
+(__m128d)__builtin_ia32_minsd_mask((A), (B), (__v2df)_mm_setzero_pd(), (M), (R))
+
+#define _mm_mask_min_round_sd(W, M, A, B, R) \
+(__m128d)__builtin_ia32_minsd_mask((A), (B), (W), (M), (R))
+
+#define _mm_maskz_max_round_ss(M, A, B, R) \
+(__m128)__builtin_ia32_maxss_mask((A), (B), (__v4sf)_mm_setzero_ps(), (M), (R))
+
+#define _mm_mask_max_round_ss(W, M, A, B, R) \
+(__m128)__builtin_ia32_maxss_mask((A), (B), (W), (M), (R))
+
+#define _mm_maskz_max_round_sd(M, A, B, R) \
+(__m128d)__builtin_ia32_maxsd_mask((A), (B), (__v2df)_mm_setzero_pd(), (M), (R))
+
+#define _mm_mask_max_round_sd(W, M, A, B, R) \
+(__m128d)__builtin_ia32_maxsd_mask((A), (B), (W), (M), (R))
+#endif
+
+#define _mm_max_round_ss(A, B, R) \
+ _mm_maskz_max_round_ss((__mmask8)-1, A, B, R)
+
+#define _mm_max_round_sd(A, B, R) \
+ _mm_maskz_max_round_sd((__mmask8)-1, A, B, R)
+
+#define _mm_min_round_ss(A, B, R) \
+ _mm_maskz_min_round_ss((__mmask8)-1, A, B, R)
+
+#define _mm_min_round_sd(A, B, R) \
+ _mm_maskz_min_round_sd((__mmask8)-1, A, B, R)
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
+{
+ return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
+{
+ return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
+{
+ return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
+{
+ return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_round_sd (__m128d __A, __m128d __B, __m128d __W,
+ __mmask8 __U, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_round_ss (__m128 __A, __m128 __B, __m128 __W,
+ __mmask8 __U, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ __m128 __C, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ (__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ (__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_round_sd (__m128d __A, __m128d __B, __m128d __W,
+ __mmask8 __U, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ (__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ (__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_round_ss (__m128 __A, __m128 __B, __m128 __W,
+ __mmask8 __U, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ __m128 __C, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B,
+ __mmask8 __U, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B,
+ __mmask8 __U, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ -(__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ -(__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B,
+ __mmask8 __U, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+ -(__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ -(__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ -(__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B,
+ __mmask8 __U, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+ -(__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+#else
+#define _mm_fmadd_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask((A), (B), (C), -1, (R))
+
+#define _mm_mask_fmadd_round_sd(A, U, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask((A), (B), (C), (U), (R))
+
+#define _mm_mask3_fmadd_round_sd(A, B, C, U, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask3((A), (B), (C), (U), (R))
+
+#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_maskz((A), (B), (C), (U), (R))
+
+#define _mm_fmadd_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_mask((A), (B), (C), -1, (R))
+
+#define _mm_mask_fmadd_round_ss(A, U, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_mask((A), (B), (C), (U), (R))
+
+#define _mm_mask3_fmadd_round_ss(A, B, C, U, R) \
+ (__m128)__builtin_ia32_vfmaddss3_mask3((A), (B), (C), (U), (R))
+
+#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_maskz((A), (B), (C), (U), (R))
+
+#define _mm_fmsub_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask((A), (B), -(C), -1, (R))
+
+#define _mm_mask_fmsub_round_sd(A, U, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask((A), (B), -(C), (U), (R))
+
+#define _mm_mask3_fmsub_round_sd(A, B, C, U, R) \
+ (__m128d)__builtin_ia32_vfmsubsd3_mask3((A), (B), (C), (U), (R))
+
+#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_maskz((A), (B), -(C), (U), (R))
+
+#define _mm_fmsub_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_mask((A), (B), -(C), -1, (R))
+
+#define _mm_mask_fmsub_round_ss(A, U, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_mask((A), (B), -(C), (U), (R))
+
+#define _mm_mask3_fmsub_round_ss(A, B, C, U, R) \
+ (__m128)__builtin_ia32_vfmsubss3_mask3((A), (B), (C), (U), (R))
+
+#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_maskz((A), (B), -(C), (U), (R))
+
+#define _mm_fnmadd_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask((A), -(B), (C), -1, (R))
+
+#define _mm_mask_fnmadd_round_sd(A, U, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask((A), -(B), (C), (U), (R))
+
+#define _mm_mask3_fnmadd_round_sd(A, B, C, U, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask3((A), -(B), (C), (U), (R))
+
+#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_maskz((A), -(B), (C), (U), (R))
+
+#define _mm_fnmadd_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_mask((A), -(B), (C), -1, (R))
+
+#define _mm_mask_fnmadd_round_ss(A, U, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_mask((A), -(B), (C), (U), (R))
+
+#define _mm_mask3_fnmadd_round_ss(A, B, C, U, R) \
+ (__m128)__builtin_ia32_vfmaddss3_mask3((A), -(B), (C), (U), (R))
+
+#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_maskz((A), -(B), (C), (U), (R))
+
+#define _mm_fnmsub_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask((A), -(B), -(C), -1, (R))
+
+#define _mm_mask_fnmsub_round_sd(A, U, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_mask((A), -(B), -(C), (U), (R))
+
+#define _mm_mask3_fnmsub_round_sd(A, B, C, U, R) \
+ (__m128d)__builtin_ia32_vfmsubsd3_mask3((A), -(B), (C), (U), (R))
+
+#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_maskz((A), -(B), -(C), (U), (R))
+
+#define _mm_fnmsub_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_mask((A), -(B), -(C), -1, (R))
+
+#define _mm_mask_fnmsub_round_ss(A, U, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_mask((A), -(B), -(C), (U), (R))
+
+#define _mm_mask3_fnmsub_round_ss(A, B, C, U, R) \
+ (__m128)__builtin_ia32_vfmsubss3_mask3((A), -(B), (C), (U), (R))
+
+#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_maskz((A), -(B), -(C), (U), (R))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P,
+ const int __R)
+{
+ return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P,
+ const int __R)
+{
+ return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
+}
+#else
+#define _mm_comi_round_ss(A, B, C, D)\
+__builtin_ia32_vcomiss((A), (B), (C), (D))
+#define _mm_comi_round_sd(A, B, C, D)\
+__builtin_ia32_vcomisd((A), (B), (C), (D))
+#endif
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_addsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_addsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_addss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_addss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_subsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_subsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_subss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_subss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_pd (__m512d __A)
+{
+ return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_sqrtsd_mask ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_sqrtsd_mask ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_sqrtss_mask ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_sqrtss_mask ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_pd (__m512d __M, __m512d __V)
+{
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+ (__v8df) __V,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M,
+ __m512d __V)
+{
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+ (__v8df) __V,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
+{
+ return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
+ (__v8df) __V,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_mulsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_mulsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_mulss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_mulss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_divsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_divsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_divss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_divss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B)
+{
+ return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_scalefsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_scalefsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_scalefsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_scalefss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_scalefss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_scalefss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U)
+{
+ return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U)
+{
+ return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C,
+ __mmask8 __U)
+{
+ return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512d __C)
+{
+ return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+ (__v8df) __B,
+ -(__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C,
+ __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512 __C)
+{
+ return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+ (__v16sf) __B,
+ -(__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttpd_epi32 (__m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttpd_epu32 (__m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpd_epi32 (__m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpd_epu32 (__m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttps_epi32 (__m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttps_epu32 (__m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_epi32 (__m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_epu32 (__m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
+{
+ return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
+{
+ return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#endif
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu32_ss (__m128 __A, unsigned __B)
+{
+ return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_ps (__m512i __A)
+{
+ return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
+{
+ return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu32_ps (__m512i __A)
+{
+ return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
+{
+ return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
+{
+ return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C,
+ const int __imm)
+{
+ return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8di) __C,
+ __imm,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
+ __m512i __C, const int __imm)
+{
+ return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8di) __C,
+ __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ __m512i __C, const int __imm)
+{
+ return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8di) __C,
+ __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C,
+ const int __imm)
+{
+ return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16si) __C,
+ __imm,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
+ __m512i __C, const int __imm)
+{
+ return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16si) __C,
+ __imm,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ __m512i __C, const int __imm)
+{
+ return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16si) __C,
+ __imm,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C,
+ const int __imm)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C, __imm,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C, __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C,
+ __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
+{
+ return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#else
+#define _mm512_fixupimm_pd(X, Y, Z, C) \
+ ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
+ (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
+ ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
+ ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_fixupimm_ps(X, Y, Z, C) \
+ ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
+ (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
+ ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
+ (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
+ ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
+ (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_fixupimm_sd(X, Y, Z, C) \
+ ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
+ ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
+ ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_fixupimm_ss(X, Y, Z, C) \
+ ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
+ ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
+ ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+#endif
+
+#ifdef __x86_64__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_u64 (__m128 __A)
+{
+ return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_u64 (__m128 __A)
+{
+ return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_i64 (__m128 __A)
+{
+ return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#endif /* __x86_64__ */
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_u32 (__m128 __A)
+{
+ return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_u32 (__m128 __A)
+{
+ return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_i32 (__m128 __A)
+{
+ return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_u64 (__m128d __A)
+{
+ return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_u64 (__m128d __A)
+{
+ return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_i64 (__m128d __A)
+{
+ return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#endif /* __x86_64__ */
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_u32 (__m128d __A)
+{
+ return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_u32 (__m128d __A)
+{
+ return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_i32 (__m128d __A)
+{
+ return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_pd (__m256 __A)
+{
+ return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
+{
+ return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
+{
+ return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_ps (__m256i __A)
+{
+ return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
+{
+ return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
+{
+ return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpd_ps (__m512d __A)
+{
+ return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
+{
+ return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+ (__v8sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
+{
+ return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
+{
+ return (__m128) __builtin_ia32_cvtsd2ss_mask ((__v4sf) __A,
+ (__v2df) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
+{
+ return (__m128) __builtin_ia32_cvtsd2ss_mask ((__v4sf) __A,
+ (__v2df) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
+{
+ return (__m128d) __builtin_ia32_cvtss2sd_mask ((__v2df) __A,
+ (__v4sf) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
+{
+ return (__m128d) __builtin_ia32_cvtss2sd_mask ((__v2df) __A,
+ (__v4sf) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_ps (__m512 __A)
+{
+ return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
+{
+ return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_pd (__m512d __A)
+{
+ return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
+{
+ return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_getexpss128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_getexpss128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_getexpss128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+ (__C << 2) | __B,
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+ (__C << 2) | __B,
+ (__v8df) __W, __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
+ (__C << 2) | __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+ (__C << 2) | __B,
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+ (__C << 2) | __B,
+ (__v16sf) __W, __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
+ (__C << 2) | __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128d) __builtin_ia32_getmantsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ _mm_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128d) __builtin_ia32_getmantsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ (__v2df) __W, __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128d) __builtin_ia32_getmantsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128) __builtin_ia32_getmantss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ _mm_setzero_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128) __builtin_ia32_getmantss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ (__v4sf) __W, __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128) __builtin_ia32_getmantss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#else
+#define _mm512_getmant_pd(X, B, C) \
+ ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__mmask8)-1,\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_getmant_pd(W, U, X, B, C) \
+ ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_getmant_pd(U, X, B, C) \
+ ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__mmask8)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+#define _mm512_getmant_ps(X, B, C) \
+ ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v16sf)(__m512)_mm512_setzero_ps(), \
+ (__mmask16)-1,\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_getmant_ps(W, U, X, B, C) \
+ ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_getmant_ps(U, X, B, C) \
+ ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v16sf)(__m512)_mm512_setzero_ps(), \
+ (__mmask16)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+#define _mm_getmant_sd(X, Y, C, D) \
+ ((__m128d)__builtin_ia32_getmantsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__mmask8)-1,\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_getmant_sd(W, U, X, Y, C, D) \
+ ((__m128d)__builtin_ia32_getmantsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_getmant_sd(U, X, Y, C, D) \
+ ((__m128d)__builtin_ia32_getmantsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__mmask8)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+#define _mm_getmant_ss(X, Y, C, D) \
+ ((__m128)__builtin_ia32_getmantss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)-1,\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_getmant_ss(W, U, X, Y, C, D) \
+ ((__m128)__builtin_ia32_getmantss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_getmant_ss(U, X, Y, C, D) \
+ ((__m128)__builtin_ia32_getmantss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_getexp_ss(A, B) \
+ ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_getexp_ss(W, U, A, B) \
+ ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_getexp_ss(U, A, B) \
+ ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_getexp_sd(A, B) \
+ ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
+ (__v2df)_mm_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_getexp_sd(W, U, A, B) \
+ ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
+ (__v2df)(__m128d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_getexp_sd(U, A, B) \
+ ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
+ (__v2df)_mm_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_getexp_ps(A) \
+ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_getexp_ps(W, U, A) \
+ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_getexp_ps(U, A) \
+ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_getexp_pd(A) \
+ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_getexp_pd(W, U, A) \
+ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_getexp_pd(U, A) \
+ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_ps (__m512 __A, const int __imm)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
+ (__v16sf) __A, -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
+ const int __imm)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
+ (__v16sf) __A,
+ (__mmask16) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
+{
+ return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
+ __imm,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_pd (__m512d __A, const int __imm)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
+ (__v8df) __A, -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
+ const int __imm)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
+ (__v8df) __A,
+ (__mmask8) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
+{
+ return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
+ __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
+{
+ return (__m128) __builtin_ia32_rndscaless_mask ((__v4sf) __A,
+ (__v4sf) __B, __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __imm)
+{
+ return (__m128) __builtin_ia32_rndscaless_mask ((__v4sf) __A,
+ (__v4sf) __B, __imm,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_ss (__mmask8 __U, __m128 __A,
+ __m128 __B, const int __imm)
+{
+ return (__m128) __builtin_ia32_rndscaless_mask ((__v4sf) __A,
+ (__v4sf) __B, __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
+{
+ return (__m128d) __builtin_ia32_rndscalesd_mask ((__v2df) __A,
+ (__v2df) __B, __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __imm)
+{
+ return (__m128d) __builtin_ia32_rndscalesd_mask ((__v2df) __A,
+ (__v2df) __B, __imm,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_sd (__mmask8 __U, __m128d __A,
+ __m128d __B, const int __imm)
+{
+ return (__m128d) __builtin_ia32_rndscalesd_mask ((__v2df) __A,
+ (__v2df) __B, __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#else
+#define _mm512_roundscale_ps(A, B) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
+ (__v16sf)(__m512)(A), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_mask_roundscale_ps(A, B, C, D) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
+ (int)(D), \
+ (__v16sf)(__m512)(A), \
+ (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_maskz_roundscale_ps(A, B, C) \
+ ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
+ (int)(C), \
+ (__v16sf)_mm512_setzero_ps(),\
+ (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_roundscale_pd(A, B) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
+ (__v8df)(__m512d)(A), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_mask_roundscale_pd(A, B, C, D) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
+ (int)(D), \
+ (__v8df)(__m512d)(A), \
+ (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_maskz_roundscale_pd(A, B, C) \
+ ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
+ (int)(C), \
+ (__v8df)_mm512_setzero_pd(),\
+ (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
+#define _mm_roundscale_ss(A, B, C) \
+ ((__m128) __builtin_ia32_rndscaless_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+#define _mm_mask_roundscale_ss(W, U, A, B, C) \
+ ((__m128) __builtin_ia32_rndscaless_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W),(__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+#define _mm_maskz_roundscale_ss(U, A, B, C) \
+ ((__m128) __builtin_ia32_rndscaless_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+#define _mm_roundscale_sd(A, B, C) \
+ ((__m128d) __builtin_ia32_rndscalesd_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+#define _mm_mask_roundscale_sd(W, U, A, B, C) \
+ ((__m128d) __builtin_ia32_rndscalesd_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+#define _mm_maskz_roundscale_sd(U, A, B, C) \
+ ((__m128d) __builtin_ia32_rndscalesd_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, __P,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, __P,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
+ const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, __P,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, __P,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) __M,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) __M,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#else
+#define _mm512_cmp_pd_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(P),\
+ (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_cmp_ps_mask(X, Y, P) \
+ ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(P),\
+ (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)(Y), (int)(P),\
+ (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
+ ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)(Y), (int)(P),\
+ (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm_cmp_sd_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ (M),_MM_FROUND_CUR_DIRECTION))
+
+#define _mm_cmp_ss_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ (M),_MM_FROUND_CUR_DIRECTION))
+#endif
+
+extern __inline __m128 __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_ss (__mmask8 __M, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_minss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (), __M,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128 __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_ss (__m128 __W, __mmask8 __M, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_minss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ __M,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_sd (__mmask8 __M, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_minsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (), __M,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_sd (__m128d __W, __mmask8 __M, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_minsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ __M,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128 __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_ss (__mmask8 __M, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_maxss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (), __M,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128 __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_ss (__m128 __W, __mmask8 __M, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_maxss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ __M,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_sd (__mmask8 __M, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_maxsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (), __M,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d __attribute__
+ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_sd (__m128d __W, __mmask8 __M, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_maxsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ __M,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_sd (__m128d __A, __m128d __B, __m128d __W,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_ss (__m128 __A, __m128 __B, __m128 __W, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ (__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_sd (__m128d __A, __m128d __B, __m128d __W,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ (__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_ss (__m128 __A, __m128 __B, __m128 __W, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_sd (__m128d __W, __m128d __A, __m128d __B,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __W, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ -(__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_sd (__m128d __W, __m128d __A, __m128d __B,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __W, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+ -(__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ -(__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+ -(__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __DISABLE_AVX512F__
+#undef __DISABLE_AVX512F__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512F__ */
+
+#endif /* _AVX512FINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512pfintrin.h b/gcc/config/i386/avx512pfintrin.h
new file mode 100644
index 00000000000..c59d2b40d40
--- /dev/null
+++ b/gcc/config/i386/avx512pfintrin.h
@@ -0,0 +1,130 @@
+/* Copyright (C) 2013
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512PFINTRIN_H_INCLUDED
+#define _AVX512PFINTRIN_H_INCLUDED
+
+#ifndef __AVX512PF__
+#pragma GCC push_options
+#pragma GCC target("avx512pf")
+#define __DISABLE_AVX512PF__
+#endif /* __AVX512PF__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef long long __v8di __attribute__ ((__vector_size__ (64)));
+typedef int __v16si __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
+
+typedef unsigned char __mmask8;
+typedef unsigned short __mmask16;
+
+#ifdef __OPTIMIZE__
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32gather_ps (__m512i index, __mmask16 mask,
+ int const *addr, int scale, int hint)
+{
+ __builtin_ia32_gatherpfdps (mask, (__v16si) index, addr, scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64gather_ps (__m512i index, __mmask8 mask,
+ int const *addr, int scale, int hint)
+{
+ __builtin_ia32_gatherpfqps (mask, (__v8di) index, addr, scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i32scatter_ps (int const *addr, __m512i index, int scale,
+ int hint)
+{
+ __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) index, addr, scale,
+ hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32scatter_ps (int const *addr, __mmask16 mask,
+ __m512i index, int scale, int hint)
+{
+ __builtin_ia32_scatterpfdps (mask, (__v16si) index, addr, scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i64scatter_ps (int const *addr, __m512i index, int scale,
+ int hint)
+{
+ __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) index, addr, scale,
+ hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64scatter_ps (int const *addr, __mmask16 mask,
+ __m512i index, int scale, int hint)
+{
+ __builtin_ia32_scatterpfqps (mask, (__v8di) index, addr, scale, hint);
+}
+#else
+#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
+ __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \
+ (int const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
+ __builtin_ia32_gatherpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
+ (int const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \
+ (int const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \
+ (int const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
+ (int const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
+ (int const *)ADDR, (int)SCALE, (int)HINT)
+#endif
+
+#ifdef __DISABLE_AVX512PF__
+#undef __DISABLE_AVX512PF__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512PF__ */
+
+#endif /* _AVX512PFINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 6cb53b8aafb..92e0c053fac 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -19,7 +19,7 @@
;;; Unused letters:
;;; B H T
-;;; h jk v
+;;; h j
;; Integer register constraints.
;; It is not necessary to define 'r' here.
@@ -78,6 +78,12 @@
"TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_SECOND_REG : NO_REGS"
"Second from top of 80387 floating-point stack (@code{%st(1)}).")
+(define_register_constraint "k" "TARGET_AVX512F ? MASK_EVEX_REGS : NO_REGS"
+"@internal Any mask register that can be used as predicate, i.e. k1-k7.")
+
+(define_register_constraint "Yk" "TARGET_AVX512F ? MASK_REGS : NO_REGS"
+"@internal Any mask register.")
+
;; Vector registers (also used for plain floating point nowadays).
(define_register_constraint "y" "TARGET_MMX ? MMX_REGS : NO_REGS"
"Any MMX register.")
@@ -101,11 +107,11 @@
"First SSE register (@code{%xmm0}).")
(define_register_constraint "Yi"
- "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC ? SSE_REGS : NO_REGS"
+ "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC ? ALL_SSE_REGS : NO_REGS"
"@internal Any SSE register, when SSE2 and inter-unit moves to vector registers are enabled.")
(define_register_constraint "Yj"
- "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC ? SSE_REGS : NO_REGS"
+ "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC ? ALL_SSE_REGS : NO_REGS"
"@internal Any SSE register, when SSE2 and inter-unit moves from vector registers are enabled.")
(define_register_constraint "Ym"
@@ -138,6 +144,9 @@
"(ix86_fpmath & FPMATH_387) ? FLOAT_REGS : NO_REGS"
"@internal Any x87 register when 80387 FP arithmetic is enabled.")
+(define_register_constraint "v" "TARGET_SSE ? ALL_SSE_REGS : NO_REGS"
+ "Any EVEX encodable SSE register (@code{%xmm0-%xmm31}).")
+
(define_constraint "z"
"@internal Constant call address operand."
(match_operand 0 "constant_call_address_operand"))
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index c1e1eba12f1..de1a463d6b7 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -71,8 +71,13 @@
#define bit_AVX2 (1 << 5)
#define bit_BMI2 (1 << 8)
#define bit_RTM (1 << 11)
+#define bit_AVX512F (1 << 16)
#define bit_RDSEED (1 << 18)
#define bit_ADX (1 << 19)
+#define bit_AVX512PF (1 << 26)
+#define bit_AVX512ER (1 << 27)
+#define bit_AVX512CD (1 << 28)
+#define bit_SHA (1 << 29)
/* Extended State Enumeration Sub-leaf (%eax == 13, %ecx == 1) */
#define bit_XSAVEOPT (1 << 0)
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index c8b71c8edf9..9076bee24a0 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -390,6 +390,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
+ unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
+ unsigned int has_avx512f = 0, has_sha = 0;
bool arch;
@@ -461,6 +463,11 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_fsgsbase = ebx & bit_FSGSBASE;
has_rdseed = ebx & bit_RDSEED;
has_adx = ebx & bit_ADX;
+ has_avx512f = ebx & bit_AVX512F;
+ has_avx512er = ebx & bit_AVX512ER;
+ has_avx512pf = ebx & bit_AVX512PF;
+ has_avx512cd = ebx & bit_AVX512CD;
+ has_sha = ebx & bit_SHA;
}
if (max_level >= 13)
@@ -802,6 +809,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
const char *aes = has_aes ? " -maes" : " -mno-aes";
+ const char *sha = has_sha ? " -msha" : " -mno-sha";
const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
const char *abm = has_abm ? " -mabm" : " -mno-abm";
@@ -828,13 +836,18 @@ const char *host_detect_local_cpu (int argc, const char **argv)
const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
+ const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
+ const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
+ const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
+ const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
- sse4a, cx16, sahf, movbe, aes, pclmul,
+ sse4a, cx16, sahf, movbe, aes, sha, pclmul,
popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
- fxsr, xsave, xsaveopt, NULL);
+ fxsr, xsave, xsaveopt, avx512f, avx512er,
+ avx512cd, avx512pf, NULL);
}
done:
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 314f3e888d8..73da45c203e 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -99,6 +99,15 @@ DEF_VECTOR_TYPE (V16HI, HI)
DEF_VECTOR_TYPE (V32QI, QI)
DEF_VECTOR_TYPE (V4UDI, UDI, V4DI)
DEF_VECTOR_TYPE (V8USI, USI, V8SI)
+DEF_VECTOR_TYPE (V16UHI, UHI, V16HI)
+
+# AVX512F vectors
+DEF_VECTOR_TYPE (V32SF, FLOAT)
+DEF_VECTOR_TYPE (V16SF, FLOAT)
+DEF_VECTOR_TYPE (V8DF, DOUBLE)
+DEF_VECTOR_TYPE (V8DI, DI)
+DEF_VECTOR_TYPE (V16SI, SI)
+DEF_VECTOR_TYPE (V64QI, QI)
DEF_POINTER_TYPE (PCCHAR, CHAR, CONST)
DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST)
@@ -123,21 +132,29 @@ DEF_POINTER_TYPE (PV2SF, V2SF)
DEF_POINTER_TYPE (PV4DF, V4DF)
DEF_POINTER_TYPE (PV4DI, V4DI)
DEF_POINTER_TYPE (PV4SF, V4SF)
+DEF_POINTER_TYPE (PV8DF, V8DF)
DEF_POINTER_TYPE (PV8SF, V8SF)
DEF_POINTER_TYPE (PV4SI, V4SI)
DEF_POINTER_TYPE (PV8SI, V8SI)
+DEF_POINTER_TYPE (PV8DI, V8DI)
+DEF_POINTER_TYPE (PV16SI, V16SI)
+DEF_POINTER_TYPE (PV16SF, V16SF)
DEF_POINTER_TYPE (PCV2SI, V2SI, CONST)
DEF_POINTER_TYPE (PCV2DF, V2DF, CONST)
DEF_POINTER_TYPE (PCV2SF, V2SF, CONST)
DEF_POINTER_TYPE (PCV4DF, V4DF, CONST)
DEF_POINTER_TYPE (PCV4SF, V4SF, CONST)
+DEF_POINTER_TYPE (PCV8DF, V8DF, CONST)
DEF_POINTER_TYPE (PCV8SF, V8SF, CONST)
+DEF_POINTER_TYPE (PCV16SF, V16SF, CONST)
DEF_POINTER_TYPE (PCV2DI, V2DI, CONST)
DEF_POINTER_TYPE (PCV4SI, V4SI, CONST)
DEF_POINTER_TYPE (PCV4DI, V4DI, CONST)
DEF_POINTER_TYPE (PCV8SI, V8SI, CONST)
+DEF_POINTER_TYPE (PCV8DI, V8DI, CONST)
+DEF_POINTER_TYPE (PCV16SI, V16SI, CONST)
DEF_FUNCTION_TYPE (FLOAT128)
DEF_FUNCTION_TYPE (UINT64)
@@ -165,6 +182,7 @@ DEF_FUNCTION_TYPE (UINT16, UINT16)
DEF_FUNCTION_TYPE (UINT64, PUNSIGNED)
DEF_FUNCTION_TYPE (V16QI, PCCHAR)
DEF_FUNCTION_TYPE (V16QI, V16QI)
+DEF_FUNCTION_TYPE (V16QI, V16SI)
DEF_FUNCTION_TYPE (V2DF, PCDOUBLE)
DEF_FUNCTION_TYPE (V2DF, V2DF)
DEF_FUNCTION_TYPE (V2DF, V2SI)
@@ -190,6 +208,8 @@ DEF_FUNCTION_TYPE (V4DF, V2DF)
DEF_FUNCTION_TYPE (V4DF, V4DF)
DEF_FUNCTION_TYPE (V4DF, V4SF)
DEF_FUNCTION_TYPE (V4DF, V4SI)
+DEF_FUNCTION_TYPE (V8DF, V8SI)
+DEF_FUNCTION_TYPE (V8DF, V8DF)
DEF_FUNCTION_TYPE (V4HI, V4HI)
DEF_FUNCTION_TYPE (V4SF, PCFLOAT)
DEF_FUNCTION_TYPE (V4SF, V2DF)
@@ -207,6 +227,7 @@ DEF_FUNCTION_TYPE (V4SI, V4SI)
DEF_FUNCTION_TYPE (V4SI, V8HI)
DEF_FUNCTION_TYPE (V4SI, V8SI)
DEF_FUNCTION_TYPE (V8HI, V16QI)
+DEF_FUNCTION_TYPE (V8HI, V8DI)
DEF_FUNCTION_TYPE (V8HI, V8HI)
DEF_FUNCTION_TYPE (V8QI, V8QI)
DEF_FUNCTION_TYPE (V8SF, PCFLOAT)
@@ -216,10 +237,15 @@ DEF_FUNCTION_TYPE (V8SF, V4SF)
DEF_FUNCTION_TYPE (V8SF, V8SF)
DEF_FUNCTION_TYPE (V8SF, V8SI)
DEF_FUNCTION_TYPE (V8SF, V8HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF)
+DEF_FUNCTION_TYPE (V8SI, V8DI)
DEF_FUNCTION_TYPE (V8SI, V4SI)
+DEF_FUNCTION_TYPE (V8SF, V8DF)
+DEF_FUNCTION_TYPE (V8SF, V8DF, V8SF, QI)
DEF_FUNCTION_TYPE (V8SI, V8SF)
DEF_FUNCTION_TYPE (V32QI, V32QI)
DEF_FUNCTION_TYPE (V32QI, V16QI)
+DEF_FUNCTION_TYPE (V16HI, V16SI)
DEF_FUNCTION_TYPE (V16HI, V16HI)
DEF_FUNCTION_TYPE (V16HI, V8HI)
DEF_FUNCTION_TYPE (V8SI, V8SI)
@@ -238,6 +264,28 @@ DEF_FUNCTION_TYPE (V4DI, V8HI)
DEF_FUNCTION_TYPE (V4DI, V4SI)
DEF_FUNCTION_TYPE (V4DI, PV4DI)
DEF_FUNCTION_TYPE (V4DI, V2DI)
+DEF_FUNCTION_TYPE (V16SF, FLOAT)
+DEF_FUNCTION_TYPE (V16SI, INT)
+DEF_FUNCTION_TYPE (V8DF, DOUBLE)
+DEF_FUNCTION_TYPE (V8DI, INT64)
+DEF_FUNCTION_TYPE (V16SF, V4SF)
+DEF_FUNCTION_TYPE (V8DF, V4DF)
+DEF_FUNCTION_TYPE (V8DI, V4DI)
+DEF_FUNCTION_TYPE (V16QI, V8DI)
+DEF_FUNCTION_TYPE (UINT, V4SF)
+DEF_FUNCTION_TYPE (UINT64, V4SF)
+DEF_FUNCTION_TYPE (UINT, V2DF)
+DEF_FUNCTION_TYPE (UINT64, V2DF)
+DEF_FUNCTION_TYPE (V16SI, V16SI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V8DI, V8DI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V16SI, PV4SI)
+DEF_FUNCTION_TYPE (V16SF, PV4SF)
+DEF_FUNCTION_TYPE (V8DI, PV4DI)
+DEF_FUNCTION_TYPE (V8DF, PV4DF)
+DEF_FUNCTION_TYPE (V8UHI, V8UHI)
+DEF_FUNCTION_TYPE (V8USI, V8USI)
DEF_FUNCTION_TYPE (DI, V2DI, INT)
DEF_FUNCTION_TYPE (DOUBLE, V2DF, INT)
@@ -269,6 +317,8 @@ DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI)
DEF_FUNCTION_TYPE (V1DI, V2SI, V2SI)
DEF_FUNCTION_TYPE (V1DI, V8QI, V8QI)
DEF_FUNCTION_TYPE (V2DF, PCV2DF, V2DI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, UINT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, UINT64)
DEF_FUNCTION_TYPE (V2DF, V2DF, DI)
DEF_FUNCTION_TYPE (V2DF, V2DF, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE)
@@ -294,16 +344,23 @@ DEF_FUNCTION_TYPE (V2SI, V2SI, V2SI)
DEF_FUNCTION_TYPE (V2SI, V4HI, V4HI)
DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DI)
DEF_FUNCTION_TYPE (V4DF, V4DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V8DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V8DF, INT, V4DF, QI)
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF)
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DI)
DEF_FUNCTION_TYPE (V4HI, V2SI, V2SI)
DEF_FUNCTION_TYPE (V4HI, V4HI, INT)
DEF_FUNCTION_TYPE (V4HI, V4HI, SI)
DEF_FUNCTION_TYPE (V4HI, V4HI, V4HI)
DEF_FUNCTION_TYPE (V4HI, V8QI, V8QI)
DEF_FUNCTION_TYPE (V4SF, PCV4SF, V4SI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, UINT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, UINT64)
DEF_FUNCTION_TYPE (V4SF, V4SF, DI)
DEF_FUNCTION_TYPE (V4SF, V4SF, INT)
+DEF_FUNCTION_TYPE (INT, V4SF, V4SF, INT, INT)
+DEF_FUNCTION_TYPE (INT, V2DF, V2DF, INT, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, PCV2SF)
DEF_FUNCTION_TYPE (V4SF, V4SF, SI)
DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF)
@@ -330,30 +387,75 @@ DEF_FUNCTION_TYPE (V8QI, V4HI, V4HI)
DEF_FUNCTION_TYPE (V8QI, V8QI, V8QI)
DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SI)
DEF_FUNCTION_TYPE (V8SF, V8SF, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, INT)
+DEF_FUNCTION_TYPE (V4SF, V16SF, INT)
+DEF_FUNCTION_TYPE (V4SF, V16SF, INT, V4SF, QI)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SI)
DEF_FUNCTION_TYPE (V32QI, V16HI, V16HI)
DEF_FUNCTION_TYPE (V16HI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V4DF, INT, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, INT, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DI, INT, QI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, INT, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI, INT, V16SI, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT, HI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT, QI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT, QI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V4SF, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V4SF, INT, V16SF, HI)
DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI)
DEF_FUNCTION_TYPE (V16HI, V32QI, V32QI)
DEF_FUNCTION_TYPE (V16HI, V16HI, V8HI)
DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI)
DEF_FUNCTION_TYPE (V16HI, V16HI, INT)
+DEF_FUNCTION_TYPE (V16HI, V16SF, INT)
+DEF_FUNCTION_TYPE (V16HI, V16SF, INT, V16HI, HI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, INT, V16HI, HI)
DEF_FUNCTION_TYPE (V16HI, V16HI, SI)
DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, INT)
DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, INT)
DEF_FUNCTION_TYPE (V8SI, V4DF, V4DF)
DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V16SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V16SI, INT, V4SI, QI)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, INT, V16SI, HI)
DEF_FUNCTION_TYPE (V8SI, V16HI, V16HI)
DEF_FUNCTION_TYPE (V8SI, V8SI, INT)
DEF_FUNCTION_TYPE (V8SI, V8SI, SI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, SI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, INT)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, INT, V16SI, HI)
DEF_FUNCTION_TYPE (V8SI, PCV8SI, V8SI)
DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI)
+DEF_FUNCTION_TYPE (V16SI, V8DF, V8DF)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, INT, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V4DI, INT, V8DI, QI)
DEF_FUNCTION_TYPE (V4DI, V8SI, V8SI)
DEF_FUNCTION_TYPE (V4UDI, V8USI, V8USI)
DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI)
DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI)
+DEF_FUNCTION_TYPE (V4DI, V8DI, INT)
+DEF_FUNCTION_TYPE (V4DI, V8DI, INT, V4DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, INT, V8DI, QI)
DEF_FUNCTION_TYPE (V4DI, V4DI, INT)
DEF_FUNCTION_TYPE (V2DI, V4DI, INT)
DEF_FUNCTION_TYPE (VOID, PVOID, INT64)
@@ -361,8 +463,10 @@ DEF_FUNCTION_TYPE (VOID, PCHAR, V16QI)
DEF_FUNCTION_TYPE (VOID, PCHAR, V32QI)
DEF_FUNCTION_TYPE (VOID, PDOUBLE, V2DF)
DEF_FUNCTION_TYPE (VOID, PDOUBLE, V4DF)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, V8DF)
DEF_FUNCTION_TYPE (VOID, PFLOAT, V4SF)
DEF_FUNCTION_TYPE (VOID, PFLOAT, V8SF)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, V16SF)
DEF_FUNCTION_TYPE (VOID, PINT, INT)
DEF_FUNCTION_TYPE (VOID, PLONGLONG, LONGLONG)
DEF_FUNCTION_TYPE (VOID, PULONGLONG, ULONGLONG)
@@ -373,6 +477,34 @@ DEF_FUNCTION_TYPE (VOID, PV4DI, V4DI)
DEF_FUNCTION_TYPE (VOID, PV4SF, V4SF)
DEF_FUNCTION_TYPE (VOID, PV8SF, V8SF)
DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED)
+DEF_FUNCTION_TYPE (VOID, PV8DI, V8DI)
+
+# Instructions returning mask
+DEF_FUNCTION_TYPE (HI, HI)
+DEF_FUNCTION_TYPE (HI, HI, HI)
+DEF_FUNCTION_TYPE (HI, HI, INT)
+DEF_FUNCTION_TYPE (QI, V8DI, V8DI)
+DEF_FUNCTION_TYPE (QI, V8DI, V8DI, QI)
+DEF_FUNCTION_TYPE (HI, V16SI, V16SI)
+DEF_FUNCTION_TYPE (HI, V16SI, V16SI, HI)
+DEF_FUNCTION_TYPE (QI, V8DI, V8DI, INT)
+DEF_FUNCTION_TYPE (QI, V8DI, V8DI, INT, QI)
+DEF_FUNCTION_TYPE (HI, V16SI, V16SI, INT)
+DEF_FUNCTION_TYPE (HI, V16SI, V16SI, INT ,HI)
+DEF_FUNCTION_TYPE (QI, V8DF, V8DF, INT)
+DEF_FUNCTION_TYPE (QI, V8DF, V8DF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V8DF, V8DF, INT, QI, INT)
+DEF_FUNCTION_TYPE (HI, V16SF, V16SF, INT)
+DEF_FUNCTION_TYPE (HI, V16SF, V16SF, INT, HI)
+DEF_FUNCTION_TYPE (HI, V16SF, V16SF, INT, HI, INT)
+DEF_FUNCTION_TYPE (QI, V2DF, V2DF, INT)
+DEF_FUNCTION_TYPE (QI, V2DF, V2DF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V2DF, V2DF, INT, QI, INT)
+DEF_FUNCTION_TYPE (QI, V4SF, V4SF, INT)
+DEF_FUNCTION_TYPE (QI, V4SF, V4SF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V4SF, V4SF, INT, QI, INT)
+DEF_FUNCTION_TYPE (V16SI, HI)
+DEF_FUNCTION_TYPE (V8DI, QI)
DEF_FUNCTION_TYPE (INT, V16QI, V16QI, INT)
DEF_FUNCTION_TYPE (UCHAR, UINT, UINT, UINT)
@@ -412,11 +544,69 @@ DEF_FUNCTION_TYPE (V8SF, V8SF, V4SF, INT)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, INT)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SF)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF)
DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI, INT)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, INT)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI)
DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, INT)
DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI, INT)
+
+# Instructions with masking
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8SF, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8SI, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DI, V8SI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8HI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V16QI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, V8DF)
+DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DI, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, QI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, V2DF, QI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, V16SF)
+DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SI, V16SF, HI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI)
+DEF_FUNCTION_TYPE (V16SF, V4SF, V16SF, HI)
+DEF_FUNCTION_TYPE (V8DF, V4DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V2DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V16SI, V4SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V16SI, SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V16SI, V16HI, V16SI, HI)
+DEF_FUNCTION_TYPE (V16SI, V16QI, V16SI, HI)
+DEF_FUNCTION_TYPE (V8SI, V8DF, V8SI, QI)
+DEF_FUNCTION_TYPE (V8DI, V4DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V2DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V16SF, PCV16SF, V16SF, HI)
+DEF_FUNCTION_TYPE (V8DF, PCV8DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V16SI, PCV16SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V8DI, PCV8DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V2DF, PCDOUBLE, V2DF, QI)
+DEF_FUNCTION_TYPE (V4SF, PCFLOAT, V4SF, QI)
+DEF_FUNCTION_TYPE (V16QI, V16SI, V16QI, HI)
+DEF_FUNCTION_TYPE (V16HI, V16SI, V16HI, HI)
+DEF_FUNCTION_TYPE (V8SI, V8DI, V8SI, QI)
+DEF_FUNCTION_TYPE (V8HI, V8DI, V8HI, QI)
+DEF_FUNCTION_TYPE (V16QI, V8DI, V16QI, QI)
+DEF_FUNCTION_TYPE (VOID, PV8DF, V8DF, QI)
+DEF_FUNCTION_TYPE (VOID, PV16SF, V16SF, HI)
+DEF_FUNCTION_TYPE (VOID, PV8DI, V8DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV16SI, V16SI, HI)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, V2DF, QI)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, V4SF, QI)
+DEF_FUNCTION_TYPE (V16SI, V16SF, V16SI, HI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, INT, QI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, INT, HI)
+
DEF_FUNCTION_TYPE (VOID, PCVOID, UNSIGNED, UNSIGNED)
DEF_FUNCTION_TYPE (VOID, PV2DF, V2DI, V2DF)
DEF_FUNCTION_TYPE (VOID, PV4DF, V4DI, V4DF)
@@ -438,6 +628,13 @@ DEF_FUNCTION_TYPE (V8UHI, V8UHI, V8UHI, V8UHI)
DEF_FUNCTION_TYPE (V16UQI, V16UQI, V16UQI, V16UQI)
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DI, V8DF)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SI, V16SF)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, V4SF, QI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT, V2DF, QI)
+DEF_FUNCTION_TYPE (V8DI, V16SI, V16SI, V8DI, QI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, UINT, UINT)
DEF_FUNCTION_TYPE (V4HI, HI, HI, HI, HI)
@@ -450,6 +647,43 @@ DEF_FUNCTION_TYPE (V8QI, QI, QI, QI, QI, QI, QI, QI, QI)
DEF_FUNCTION_TYPE (UCHAR, UCHAR, UINT, UINT, PUNSIGNED)
DEF_FUNCTION_TYPE (UCHAR, UCHAR, ULONGLONG, ULONGLONG, PULONGLONG)
+# Instructions with rounding
+DEF_FUNCTION_TYPE (UINT64, V2DF, INT)
+DEF_FUNCTION_TYPE (UINT64, V4SF, INT)
+DEF_FUNCTION_TYPE (UINT, V2DF, INT)
+DEF_FUNCTION_TYPE (UINT, V4SF, INT)
+DEF_FUNCTION_TYPE (INT64, V2DF, INT)
+DEF_FUNCTION_TYPE (INT64, V4SF, INT)
+DEF_FUNCTION_TYPE (INT, V2DF, INT)
+DEF_FUNCTION_TYPE (INT, V4SF, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, UINT64, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, UINT64, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, UINT, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, INT64, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, INT64, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, INT, INT)
+DEF_FUNCTION_TYPE (V16SI, V16SF, V16SI, HI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, HI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, HI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16HI, V16SF, HI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8DF, V8SI, QI, INT)
+DEF_FUNCTION_TYPE (V8SF, V8DF, V8SF, QI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, QI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8SF, V8DF, QI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, HI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF, QI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, V2DF, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, INT)
+
+DEF_FUNCTION_TYPE (V16SF, V16SF, INT, V16SF, HI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, INT, V8DF, QI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, V4SF, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT, V2DF, QI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, SI, V8DI, V8DI)
+
DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE, V4SI, V2DF, INT)
DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V4SI, V4DF, INT)
DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V8SI, V4DF, INT)
@@ -471,6 +705,30 @@ DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V2DI, V4SI, INT)
DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V4DI, V4SI, INT)
DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V4DI, V8SI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, PCFLOAT, V16SI, HI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, PCFLOAT, V8DI, HI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, PCDOUBLE, V8SI, QI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, PCDOUBLE, V16SI, QI, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, PCFLOAT, V8DI, QI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, PCDOUBLE, V8DI, QI, INT)
+DEF_FUNCTION_TYPE (V16SI, V16SI, PCINT, V16SI, HI, INT)
+DEF_FUNCTION_TYPE (V16SI, V16SI, PCINT, V8DI, HI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, PCINT64, V8SI, QI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, PCINT64, V16SI, QI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V8DI, QI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, PCINT64, V8DI, QI, INT)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, HI, V16SI, V16SF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V8SI, V8DF, INT)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V8DI, V8SF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V8DI, V8DF, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, HI, V16SI, V16SI, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8SI, V8DI, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, QI, V8DI, V8SI, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8DI, V8DI, INT)
+
+DEF_FUNCTION_TYPE (VOID, HI, V16SI, PCINT, INT, INT)
+DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT, INT, INT)
+
DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND)
DEF_FUNCTION_TYPE_ALIAS (V4DF_FTYPE_V4DF, ROUND)
DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF, ROUND)
@@ -478,6 +736,7 @@ DEF_FUNCTION_TYPE_ALIAS (V8SF_FTYPE_V8SF, ROUND)
DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V2DF_V2DF, ROUND)
DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V4DF_V4DF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V16SI_FTYPE_V8DF_V8DF, ROUND)
DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SF, ROUND)
DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V8SF, ROUND)
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 31dd28a94cb..dfc7d10c0f2 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -300,12 +300,22 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__SSE4_2__");
if (isa_flag & OPTION_MASK_ISA_AES)
def_or_undef (parse_in, "__AES__");
+ if (isa_flag & OPTION_MASK_ISA_SHA)
+ def_or_undef (parse_in, "__SHA__");
if (isa_flag & OPTION_MASK_ISA_PCLMUL)
def_or_undef (parse_in, "__PCLMUL__");
if (isa_flag & OPTION_MASK_ISA_AVX)
def_or_undef (parse_in, "__AVX__");
if (isa_flag & OPTION_MASK_ISA_AVX2)
def_or_undef (parse_in, "__AVX2__");
+ if (isa_flag & OPTION_MASK_ISA_AVX512F)
+ def_or_undef (parse_in, "__AVX512F__");
+ if (isa_flag & OPTION_MASK_ISA_AVX512ER)
+ def_or_undef (parse_in, "__AVX512ER__");
+ if (isa_flag & OPTION_MASK_ISA_AVX512CD)
+ def_or_undef (parse_in, "__AVX512CD__");
+ if (isa_flag & OPTION_MASK_ISA_AVX512PF)
+ def_or_undef (parse_in, "__AVX512PF__");
if (isa_flag & OPTION_MASK_ISA_FMA)
def_or_undef (parse_in, "__FMA__");
if (isa_flag & OPTION_MASK_ISA_RTM)
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 393cd4a23be..e0b8fc826ab 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -76,16 +76,19 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */
VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */
VECTOR_MODES (INT, 64); /* V64QI V32HI V16SI V8DI */
+VECTOR_MODES (INT, 128); /* V128QI V64HI V32SI V16DI */
VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */
VECTOR_MODES (FLOAT, 64); /* V32HF V16SF V8DF */
+VECTOR_MODES (FLOAT, 128); /* V64HF V32SF V16DF */
VECTOR_MODE (INT, TI, 1); /* V1TI */
VECTOR_MODE (INT, DI, 1); /* V1DI */
VECTOR_MODE (INT, SI, 1); /* V1SI */
VECTOR_MODE (INT, QI, 2); /* V2QI */
INT_MODE (OI, 32);
+INT_MODE (XI, 64);
/* The symbol Pmode stands for one of the above machine modes (usually SImode).
The tm.h file specifies which one. It is not a distinct mode. */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 0c546af00b2..73854fc9beb 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2188,6 +2188,14 @@ enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
/* SSE REX registers */
SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
SSE_REGS, SSE_REGS,
+ /* New 16 SSE registers */
+ EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+ EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+ EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+ EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+ /* Mask registers. */
+ MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
+ MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
};
/* The "default" register map used in 32bit mode. */
@@ -2201,6 +2209,9 @@ int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
-1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
-1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* new SSE registers 16-23*/
+ -1, -1, -1, -1, -1, -1, -1, -1, /* new SSE registers 24-31*/
+ 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
};
/* The "default" register map used in 64bit mode. */
@@ -2214,6 +2225,9 @@ int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
8,9,10,11,12,13,14,15, /* extended integer registers */
25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
+ 67, 68, 69, 70, 71, 72, 73, 74, /* new SSE registers 16-23 */
+ 75, 76, 77, 78, 79, 80, 81, 82, /* new SSE registers 24-31 */
+ 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
};
/* Define the register numbers to be used in Dwarf debugging information.
@@ -2279,6 +2293,9 @@ int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
-1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
-1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* new SSE registers 16-23*/
+ -1, -1, -1, -1, -1, -1, -1, -1, /* new SSE registers 24-31*/
+ 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
};
/* Define parameter passing and return registers. */
@@ -2439,7 +2456,7 @@ enum x86_64_reg_class
X86_64_MEMORY_CLASS
};
-#define MAX_CLASSES 4
+#define MAX_CLASSES 8
/* Table of constants used by fldpi, fldln2, etc.... */
static REAL_VALUE_TYPE ext_80387_constants_table [5];
@@ -2578,7 +2595,7 @@ static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
static bool
gate_insert_vzeroupper (void)
{
- return TARGET_AVX && TARGET_VZEROUPPER;
+ return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
}
static unsigned int
@@ -2651,6 +2668,10 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
{ "-mfma", OPTION_MASK_ISA_FMA },
{ "-mxop", OPTION_MASK_ISA_XOP },
{ "-mlwp", OPTION_MASK_ISA_LWP },
+ { "-mavx512f", OPTION_MASK_ISA_AVX512F },
+ { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
+ { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
+ { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
{ "-msse4a", OPTION_MASK_ISA_SSE4A },
{ "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
{ "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
@@ -2675,6 +2696,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
{ "-mmovbe", OPTION_MASK_ISA_MOVBE },
{ "-mcrc32", OPTION_MASK_ISA_CRC32 },
{ "-maes", OPTION_MASK_ISA_AES },
+ { "-msha", OPTION_MASK_ISA_SHA },
{ "-mpclmul", OPTION_MASK_ISA_PCLMUL },
{ "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
{ "-mrdrnd", OPTION_MASK_ISA_RDRND },
@@ -2955,6 +2977,11 @@ ix86_option_override_internal (bool main_args_p)
#define PTA_FXSR (HOST_WIDE_INT_1 << 37)
#define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
#define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
+#define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
+#define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
+#define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
+#define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
+#define PTA_SHA (HOST_WIDE_INT_1 << 44)
/* if this reaches 64, need to widen struct pta flags below */
@@ -3440,6 +3467,9 @@ ix86_option_override_internal (bool main_args_p)
if (processor_alias_table[i].flags & PTA_AES
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
ix86_isa_flags |= OPTION_MASK_ISA_AES;
+ if (processor_alias_table[i].flags & PTA_SHA
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
+ ix86_isa_flags |= OPTION_MASK_ISA_SHA;
if (processor_alias_table[i].flags & PTA_PCLMUL
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
@@ -3476,6 +3506,18 @@ ix86_option_override_internal (bool main_args_p)
if (processor_alias_table[i].flags & PTA_XSAVEOPT
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
+ if (processor_alias_table[i].flags & PTA_AVX512F
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
+ ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
+ if (processor_alias_table[i].flags & PTA_AVX512ER
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
+ ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
+ if (processor_alias_table[i].flags & PTA_AVX512PF
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
+ ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
+ if (processor_alias_table[i].flags & PTA_AVX512CD
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
+ ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
x86_prefetch_sse = true;
@@ -3940,22 +3982,22 @@ ix86_option_override_internal (bool main_args_p)
TARGET_AVX with -fexpensive-optimizations and split 32-byte
AVX unaligned load/store. */
if (!optimize_size)
- {
- if (flag_expensive_optimizations
- && !(target_flags_explicit & MASK_VZEROUPPER))
+ {
+ if (flag_expensive_optimizations
+ && !(target_flags_explicit & MASK_VZEROUPPER))
target_flags |= MASK_VZEROUPPER;
- if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
- && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
+ if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
+ && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
- if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
- && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
+ if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
+ && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
- /* Enable 128-bit AVX instruction generation
- for the auto-vectorizer. */
- if (TARGET_AVX128_OPTIMAL
- && !(target_flags_explicit & MASK_PREFER_AVX128))
+ /* Enable 128-bit AVX instruction generation
+ for the auto-vectorizer. */
+ if (TARGET_AVX128_OPTIMAL
+ && !(target_flags_explicit & MASK_PREFER_AVX128))
target_flags |= MASK_PREFER_AVX128;
- }
+ }
if (ix86_recip_name)
{
@@ -4060,6 +4102,8 @@ ix86_conditional_register_usage (void)
fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+ for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
+ fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
}
/* See the definition of CALL_USED_REGISTERS in i386.h. */
@@ -4100,6 +4144,13 @@ ix86_conditional_register_usage (void)
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+
+ /* If AVX512F is disabled, squash the registers. */
+ if (! TARGET_AVX512F)
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (TEST_HARD_REG_BIT (reg_class_contents[(int)MASK_REGS], i)
+ || TEST_HARD_REG_BIT (reg_class_contents[(int)EVEX_SSE_REGS], i))
+ fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
}
@@ -4242,8 +4293,13 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
IX86_ATTR_ISA ("tbm", OPT_mtbm),
IX86_ATTR_ISA ("aes", OPT_maes),
+ IX86_ATTR_ISA ("sha", OPT_msha),
IX86_ATTR_ISA ("avx", OPT_mavx),
IX86_ATTR_ISA ("avx2", OPT_mavx2),
+ IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
+ IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
+ IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
+ IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
IX86_ATTR_ISA ("mmx", OPT_mmmx),
IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
@@ -6014,7 +6070,7 @@ merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
sized containers, classes[0] will be NO_CLASS and 1 is returned.
BIT_OFFSET is used internally for handling records and specifies offset
- of the offset in bits modulo 256 to avoid overflow cases.
+ of the offset in bits modulo 512 to avoid overflow cases.
See the x86-64 PS ABI for details.
*/
@@ -6042,8 +6098,8 @@ classify_argument (enum machine_mode mode, const_tree type,
tree field;
enum x86_64_reg_class subclasses[MAX_CLASSES];
- /* On x86-64 we pass structures larger than 32 bytes on the stack. */
- if (bytes > 32)
+ /* On x86-64 we pass structures larger than 64 bytes on the stack. */
+ if (bytes > 64)
return 0;
for (i = 0; i < words; i++)
@@ -6114,7 +6170,7 @@ classify_argument (enum machine_mode mode, const_tree type,
num = classify_argument (TYPE_MODE (type), type,
subclasses,
(int_bit_position (field)
- + bit_offset) % 256);
+ + bit_offset) % 512);
if (!num)
return 0;
pos = (int_bit_position (field)
@@ -6364,6 +6420,21 @@ classify_argument (enum machine_mode mode, const_tree type,
classes[2] = X86_64_SSEUP_CLASS;
classes[3] = X86_64_SSEUP_CLASS;
return 4;
+ case V8DFmode:
+ case V16SFmode:
+ case V8DImode:
+ case V16SImode:
+ case V32HImode:
+ case V64QImode:
+ classes[0] = X86_64_SSE_CLASS;
+ classes[1] = X86_64_SSEUP_CLASS;
+ classes[2] = X86_64_SSEUP_CLASS;
+ classes[3] = X86_64_SSEUP_CLASS;
+ classes[4] = X86_64_SSEUP_CLASS;
+ classes[5] = X86_64_SSEUP_CLASS;
+ classes[6] = X86_64_SSEUP_CLASS;
+ classes[7] = X86_64_SSEUP_CLASS;
+ return 8;
case V4SFmode:
case V4SImode:
case V16QImode:
@@ -6549,6 +6620,18 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode,
&& mode != BLKmode)
return gen_reg_or_parallel (mode, orig_mode,
SSE_REGNO (sse_regno));
+ if (n == 8
+ && regclass[0] == X86_64_SSE_CLASS
+ && regclass[1] == X86_64_SSEUP_CLASS
+ && regclass[2] == X86_64_SSEUP_CLASS
+ && regclass[3] == X86_64_SSEUP_CLASS
+ && regclass[4] == X86_64_SSEUP_CLASS
+ && regclass[5] == X86_64_SSEUP_CLASS
+ && regclass[6] == X86_64_SSEUP_CLASS
+ && regclass[7] == X86_64_SSEUP_CLASS
+ && mode != BLKmode)
+ return gen_reg_or_parallel (mode, orig_mode,
+ SSE_REGNO (sse_regno));
if (n == 2
&& regclass[0] == X86_64_X87_CLASS
&& regclass[1] == X86_64_X87UP_CLASS)
@@ -6630,6 +6713,18 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode,
tmpmode = OImode;
i += 3;
break;
+ case 8:
+ gcc_assert (i == 0
+ && regclass[1] == X86_64_SSEUP_CLASS
+ && regclass[2] == X86_64_SSEUP_CLASS
+ && regclass[3] == X86_64_SSEUP_CLASS
+ && regclass[4] == X86_64_SSEUP_CLASS
+ && regclass[5] == X86_64_SSEUP_CLASS
+ && regclass[6] == X86_64_SSEUP_CLASS
+ && regclass[7] == X86_64_SSEUP_CLASS);
+ tmpmode = XImode;
+ i += 7;
+ break;
default:
gcc_unreachable ();
}
@@ -6703,6 +6798,12 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
case V8SFmode:
case V8SImode:
+ case V64QImode:
+ case V32HImode:
+ case V16SImode:
+ case V8DImode:
+ case V16SFmode:
+ case V8DFmode:
case V32QImode:
case V16HImode:
case V4DFmode:
@@ -6754,8 +6855,9 @@ function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
{
int int_nregs, sse_nregs;
- /* Unnamed 256bit vector mode parameters are passed on stack. */
- if (!named && VALID_AVX256_REG_MODE (mode))
+ /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
+ if (!named && (VALID_AVX512F_REG_MODE (mode)
+ || VALID_AVX256_REG_MODE (mode)))
return;
if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
@@ -6906,9 +7008,16 @@ function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
break;
case OImode:
- /* OImode shouldn't be used directly. */
+ case XImode:
+ /* OImode and XImode shouldn't be used directly. */
gcc_unreachable ();
+ case V64QImode:
+ case V32HImode:
+ case V16SImode:
+ case V8DImode:
+ case V16SFmode:
+ case V8DFmode:
case V8SFmode:
case V8SImode:
case V32QImode:
@@ -6971,7 +7080,13 @@ function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
case V16HImode:
case V4DFmode:
case V4DImode:
- /* Unnamed 256bit vector mode parameters are passed on stack. */
+ case V16SFmode:
+ case V16SImode:
+ case V64QImode:
+ case V32HImode:
+ case V8DFmode:
+ case V8DImode:
+ /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
if (!named)
return NULL;
break;
@@ -7367,6 +7482,10 @@ function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
regno = FIRST_SSE_REG;
+ /* 64-byte vector modes in %zmm0. */
+ else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
+ regno = FIRST_SSE_REG;
+
/* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
regno = FIRST_FLOAT_REG;
@@ -7574,6 +7693,10 @@ return_in_memory_32 (const_tree type, enum machine_mode mode)
/* AVX values are returned in YMM0, except when it doesn't exist. */
if (size == 32)
return !TARGET_AVX;
+
+ /* AVX512F values are returned in ZMM0, except when it doesn't exist. */
+ if (size == 64)
+ return !TARGET_AVX512F;
}
if (mode == XFmode)
@@ -8110,7 +8233,13 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
case V16HImode:
case V4DFmode:
case V4DImode:
- /* Unnamed 256bit vector mode parameters are passed on stack. */
+ case V16SFmode:
+ case V16SImode:
+ case V64QImode:
+ case V32HImode:
+ case V8DFmode:
+ case V8DImode:
+ /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
if (!TARGET_64BIT_MS_ABI)
{
container = NULL;
@@ -8525,6 +8654,12 @@ standard_sse_constant_p (rtx x)
case V4DImode:
if (TARGET_AVX2)
return 2;
+ case V64QImode:
+ case V32HImode:
+ case V16SImode:
+ case V8DImode:
+ if (TARGET_AVX512F)
+ return 2;
default:
break;
}
@@ -8543,6 +8678,11 @@ standard_sse_constant_opcode (rtx insn, rtx x)
case 1:
switch (get_attr_mode (insn))
{
+ case MODE_XI:
+ case MODE_V16SF:
+ return "vpxord\t%g0, %g0, %g0";
+ case MODE_V8DF:
+ return "vpxorq\t%g0, %g0, %g0";
case MODE_TI:
return "%vpxor\t%0, %d0";
case MODE_V2DF:
@@ -8562,6 +8702,10 @@ standard_sse_constant_opcode (rtx insn, rtx x)
}
case 2:
+ if (get_attr_mode (insn) == MODE_XI
+ || get_attr_mode (insn) == MODE_V8DF
+ || get_attr_mode (insn) == MODE_V16SF)
+ return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
if (TARGET_AVX)
return "vpcmpeqd\t%0, %0, %0";
else
@@ -14077,6 +14221,7 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
If CODE is 'q', pretend the mode is DImode.
If CODE is 'x', pretend the mode is V4SFmode.
If CODE is 't', pretend the mode is V8SFmode.
+ If CODE is 'g', pretend the mode is V16SFmode.
If CODE is 'h', pretend the reg is the 'high' byte register.
If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
If CODE is 'd', duplicate the operand for AVX instruction.
@@ -14122,6 +14267,8 @@ print_reg (rtx x, int code, FILE *file)
code = 16;
else if (code == 't')
code = 32;
+ else if (code == 'g')
+ code = 64;
else
code = GET_MODE_SIZE (GET_MODE (x));
@@ -14169,7 +14316,7 @@ print_reg (rtx x, int code, FILE *file)
case 8:
case 4:
case 12:
- if (! ANY_FP_REG_P (x))
+ if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
/* FALLTHRU */
case 16:
@@ -14195,6 +14342,14 @@ print_reg (rtx x, int code, FILE *file)
fputs (hi_reg_name[regno] + 1, file);
return;
}
+ case 64:
+ if (SSE_REG_P (x))
+ {
+ gcc_assert (!duplicated);
+ putc ('z', file);
+ fputs (hi_reg_name[REGNO (x)] + 1, file);
+ return;
+ }
break;
default:
gcc_unreachable ();
@@ -14268,6 +14423,7 @@ get_some_local_dynamic_name (void)
q -- likewise, print the DImode name of the register.
x -- likewise, print the V4SFmode name of the register.
t -- likewise, print the V8SFmode name of the register.
+ g -- likewise, print the V16SFmode name of the register.
h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
y -- print "st(0)" instead of "st" as a register.
d -- print duplicated register operand for AVX instruction.
@@ -14497,6 +14653,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
case 'q':
case 'h':
case 't':
+ case 'g':
case 'y':
case 'x':
case 'X':
@@ -14695,6 +14852,43 @@ ix86_print_operand (FILE *file, rtx x, int code)
/* We do not want to print value of the operand. */
return;
+ case 'N':
+ if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
+ fputs ("{z}", file);
+ return;
+
+ case 'R':
+ gcc_assert (CONST_INT_P (x));
+
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ fputs (", ", file);
+
+ switch (INTVAL (x))
+ {
+ case ROUND_NEAREST_INT:
+ fputs ("{rn-sae}", file);
+ break;
+ case ROUND_NEG_INF:
+ fputs ("{rd-sae}", file);
+ break;
+ case ROUND_POS_INF:
+ fputs ("{ru-sae}", file);
+ break;
+ case ROUND_ZERO:
+ fputs ("{rz-sae}", file);
+ break;
+ case ROUND_SAE:
+ fputs ("{sae}", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ fputs (", ", file);
+
+ return;
+
case '*':
if (ASSEMBLER_DIALECT == ASM_ATT)
putc ('*', file);
@@ -14805,6 +14999,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
size = "XMMWORD";
break;
case 32: size = "YMMWORD"; break;
+ case 64: size = "ZMMWORD"; break;
default:
gcc_unreachable ();
}
@@ -16371,8 +16566,8 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
gcc_unreachable ();
case V32QImode:
extract = gen_avx_vextractf128v32qi;
- load_unaligned = gen_avx_loaddqu256;
- store_unaligned = gen_avx_storedqu256;
+ load_unaligned = gen_avx_loaddquv32qi;
+ store_unaligned = gen_avx_storedquv32qi;
mode = V16QImode;
break;
case V8SFmode:
@@ -16475,10 +16670,56 @@ void
ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
{
rtx op0, op1, m;
+ rtx (*load_unaligned) (rtx, rtx);
+ rtx (*store_unaligned) (rtx, rtx);
op0 = operands[0];
op1 = operands[1];
+ if (GET_MODE_SIZE (mode) == 64)
+ {
+ switch (GET_MODE_CLASS (mode))
+ {
+ case MODE_VECTOR_INT:
+ case MODE_INT:
+ op0 = gen_lowpart (V16SImode, op0);
+ op1 = gen_lowpart (V16SImode, op1);
+ /* FALLTHRU */
+
+ case MODE_VECTOR_FLOAT:
+ switch (GET_MODE (op0))
+ {
+ default:
+ gcc_unreachable ();
+ case V16SImode:
+ load_unaligned = gen_avx512f_loaddquv16si;
+ store_unaligned = gen_avx512f_storedquv16si;
+ break;
+ case V16SFmode:
+ load_unaligned = gen_avx512f_loadups512;
+ store_unaligned = gen_avx512f_storeups512;
+ break;
+ case V8DFmode:
+ load_unaligned = gen_avx512f_loadupd512;
+ store_unaligned = gen_avx512f_storeupd512;
+ break;
+ }
+
+ if (MEM_P (op1))
+ emit_insn (load_unaligned (op0, op1));
+ else if (MEM_P (op0))
+ emit_insn (store_unaligned (op0, op1));
+ else
+ gcc_unreachable ();
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return;
+ }
+
if (TARGET_AVX
&& GET_MODE_SIZE (mode) == 32)
{
@@ -16511,7 +16752,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
/* We will eventually emit movups based on insn attributes. */
- emit_insn (gen_sse2_loaddqu (op0, op1));
+ emit_insn (gen_sse2_loaddquv16qi (op0, op1));
}
else if (TARGET_SSE2 && mode == V2DFmode)
{
@@ -16586,7 +16827,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
/* We will eventually emit movups based on insn attributes. */
- emit_insn (gen_sse2_storedqu (op0, op1));
+ emit_insn (gen_sse2_storedquv16qi (op0, op1));
}
else if (TARGET_SSE2 && mode == V2DFmode)
{
@@ -18128,6 +18369,11 @@ ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
enum machine_mode fltmode = GET_MODE (target);
rtx (*cvt) (rtx, rtx);
+ if (intmode == V16SImode)
+ {
+ emit_insn (gen_ufloatv16siv16sf2 (target, val));
+ return;
+ }
if (intmode == V4SImode)
cvt = gen_floatv4siv4sf2;
else
@@ -18218,17 +18464,23 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
switch (mode)
{
+ case V64QImode:
case V32QImode:
case V16QImode:
+ case V32HImode:
case V16HImode:
case V8HImode:
+ case V16SImode:
case V8SImode:
case V4SImode:
+ case V8DImode:
case V4DImode:
case V2DImode:
gcc_assert (vect);
+ case V16SFmode:
case V8SFmode:
case V4SFmode:
+ case V8DFmode:
case V4DFmode:
case V2DFmode:
n_elt = GET_MODE_NUNITS (mode);
@@ -18265,6 +18517,8 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
/* Find the sign bit, sign extended to 2*HWI. */
switch (mode)
{
+ case V16SImode:
+ case V16SFmode:
case V8SImode:
case V4SImode:
case V8SFmode:
@@ -18275,8 +18529,10 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
lo = 0x80000000, hi = lo < 0;
break;
+ case V8DImode:
case V4DImode:
case V2DImode:
+ case V8DFmode:
case V4DFmode:
case V2DFmode:
vec_mode = mode;
@@ -20138,22 +20394,63 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
rtx op_true, rtx op_false)
{
enum machine_mode mode = GET_MODE (dest);
- enum machine_mode cmp_mode = GET_MODE (cmp_op0);
+ enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
+
+ /* In general case result of comparison can differ from operands' type. */
+ enum machine_mode cmp_mode;
+
+ /* In AVX512F the result of comparison is an integer mask. */
+ bool maskcmp = false;
rtx x;
- cmp_op0 = force_reg (cmp_mode, cmp_op0);
- if (!nonimmediate_operand (cmp_op1, cmp_mode))
- cmp_op1 = force_reg (cmp_mode, cmp_op1);
+ if (GET_MODE_SIZE (cmp_ops_mode) == 64)
+ {
+ cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
+ gcc_assert (cmp_mode != BLKmode);
+
+ maskcmp = true;
+ }
+ else
+ cmp_mode = cmp_ops_mode;
+
+
+ cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
+ if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
+ cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
if (optimize
|| reg_overlap_mentioned_p (dest, op_true)
|| reg_overlap_mentioned_p (dest, op_false))
- dest = gen_reg_rtx (mode);
+ dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
+ /* Compare patterns for int modes are unspec in AVX512F only. */
+ if (maskcmp && (code == GT || code == EQ))
+ {
+ rtx (*gen)(rtx, rtx, rtx);
+
+ switch (cmp_ops_mode)
+ {
+ case V16SImode:
+ gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
+ break;
+ case V8DImode:
+ gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
+ break;
+ default:
+ gen = NULL;
+ }
+
+ if (gen)
+ {
+ emit_insn (gen (dest, cmp_op0, cmp_op1));
+ return dest;
+ }
+ }
x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
- if (cmp_mode != mode)
+
+ if (cmp_mode != mode && !maskcmp)
{
- x = force_reg (cmp_mode, x);
+ x = force_reg (cmp_ops_mode, x);
convert_move (dest, x, false);
}
else
@@ -20169,33 +20466,43 @@ static void
ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
{
enum machine_mode mode = GET_MODE (dest);
+ enum machine_mode cmpmode = GET_MODE (cmp);
+
+ /* In AVX512F the result of comparison is an integer mask. */
+ bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
+
rtx t2, t3, x;
if (vector_all_ones_operand (op_true, mode)
- && rtx_equal_p (op_false, CONST0_RTX (mode)))
+ && rtx_equal_p (op_false, CONST0_RTX (mode))
+ && !maskcmp)
{
emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
}
- else if (op_false == CONST0_RTX (mode))
+ else if (op_false == CONST0_RTX (mode)
+ && !maskcmp)
{
op_true = force_reg (mode, op_true);
x = gen_rtx_AND (mode, cmp, op_true);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
}
- else if (op_true == CONST0_RTX (mode))
+ else if (op_true == CONST0_RTX (mode)
+ && !maskcmp)
{
op_false = force_reg (mode, op_false);
x = gen_rtx_NOT (mode, cmp);
x = gen_rtx_AND (mode, x, op_false);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
}
- else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
+ else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
+ && !maskcmp)
{
op_false = force_reg (mode, op_false);
x = gen_rtx_IOR (mode, cmp, op_false);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
}
- else if (TARGET_XOP)
+ else if (TARGET_XOP
+ && !maskcmp)
{
op_true = force_reg (mode, op_true);
@@ -20260,6 +20567,20 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
cmp = gen_lowpart (V32QImode, cmp);
}
break;
+
+ case V16SImode:
+ gen = gen_avx512f_blendmv16si;
+ break;
+ case V8DImode:
+ gen = gen_avx512f_blendmv8di;
+ break;
+ case V8DFmode:
+ gen = gen_avx512f_blendmv8df;
+ break;
+ case V16SFmode:
+ gen = gen_avx512f_blendmv16sf;
+ break;
+
default:
break;
}
@@ -20523,6 +20844,8 @@ ix86_expand_int_vcond (rtx operands[])
switch (mode)
{
+ case V16SImode:
+ case V8DImode:
case V8SImode:
case V4DImode:
case V4SImode:
@@ -20533,6 +20856,8 @@ ix86_expand_int_vcond (rtx operands[])
switch (mode)
{
+ case V16SImode: gen_sub3 = gen_subv16si3; break;
+ case V8DImode: gen_sub3 = gen_subv8di3; break;
case V8SImode: gen_sub3 = gen_subv8si3; break;
case V4DImode: gen_sub3 = gen_subv4di3; break;
case V4SImode: gen_sub3 = gen_subv4si3; break;
@@ -20589,7 +20914,8 @@ ix86_expand_int_vcond (rtx operands[])
x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
code, cop0, cop1,
operands[1+negate], operands[2-negate]);
- x = gen_lowpart (data_mode, x);
+ if (GET_MODE (x) == mode)
+ x = gen_lowpart (data_mode, x);
}
ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
@@ -20597,6 +20923,35 @@ ix86_expand_int_vcond (rtx operands[])
return true;
}
+static void
+ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
+{
+ enum machine_mode mode = GET_MODE (op0);
+ switch (mode)
+ {
+ case V16SImode:
+ emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
+ force_reg (V16SImode, mask),
+ op1));
+ return;
+ case V16SFmode:
+ emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
+ force_reg (V16SImode, mask),
+ op1));
+ return;
+ case V8DImode:
+ emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
+ force_reg (V8DImode, mask), op1));
+ return;
+ case V8DFmode:
+ emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
+ force_reg (V8DImode, mask), op1));
+ return;
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Expand a variable vector permutation. */
void
@@ -20615,7 +20970,14 @@ ix86_expand_vec_perm (rtx operands[])
/* Number of elements in the vector. */
w = GET_MODE_NUNITS (mode);
e = GET_MODE_UNIT_SIZE (mode);
- gcc_assert (w <= 32);
+ gcc_assert (w <= 64);
+
+ if (mode == V8DFmode || mode == V8DImode
+ || mode == V16SFmode || mode == V16SImode)
+ {
+ ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1);
+ return;
+ }
if (TARGET_AVX2)
{
@@ -20969,6 +21331,15 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
extract
= high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
break;
+ case V32HImode:
+ if (unsigned_p)
+ unpack = gen_avx512f_zero_extendv16hiv16si2;
+ else
+ unpack = gen_avx512f_sign_extendv16hiv16si2;
+ halfmode = V16HImode;
+ extract
+ = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
+ break;
case V16HImode:
if (unsigned_p)
unpack = gen_avx2_zero_extendv8hiv8si2;
@@ -20978,6 +21349,15 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
extract
= high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
break;
+ case V16SImode:
+ if (unsigned_p)
+ unpack = gen_avx512f_zero_extendv8siv8di2;
+ else
+ unpack = gen_avx512f_sign_extendv8siv8di2;
+ halfmode = V8SImode;
+ extract
+ = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
+ break;
case V8SImode:
if (unsigned_p)
unpack = gen_avx2_zero_extendv4siv4di2;
@@ -21009,7 +21389,7 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
gcc_unreachable ();
}
- if (GET_MODE_SIZE (imode) == 32)
+ if (GET_MODE_SIZE (imode) >= 32)
{
tmp = gen_reg_rtx (halfmode);
emit_insn (extract (tmp, src));
@@ -25364,7 +25744,8 @@ ix86_constant_alignment (tree exp, int align)
int
ix86_data_alignment (tree type, int align, bool opt)
{
- int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
+ int max_align = optimize_size ? BITS_PER_WORD
+ : MIN (512, MAX_OFILE_ALIGNMENT);
if (opt
&& AGGREGATE_TYPE_P (type)
@@ -26821,12 +27202,421 @@ enum ix86_builtins
IX86_BUILTIN_GATHERDIV4SI,
IX86_BUILTIN_GATHERDIV8SI,
- /* Alternate 4 element gather for the vectorizer where
- all operands are 32-byte wide. */
+ /* AVX512F */
+ IX86_BUILTIN_ADDPD512,
+ IX86_BUILTIN_ADDPS512,
+ IX86_BUILTIN_ADDSD_MASK,
+ IX86_BUILTIN_ADDSS_MASK,
+ IX86_BUILTIN_ALIGND512,
+ IX86_BUILTIN_ALIGNQ512,
+ IX86_BUILTIN_BLENDMD512,
+ IX86_BUILTIN_BLENDMPD512,
+ IX86_BUILTIN_BLENDMPS512,
+ IX86_BUILTIN_BLENDMQ512,
+ IX86_BUILTIN_BROADCASTF32X4_512,
+ IX86_BUILTIN_BROADCASTF64X4_512,
+ IX86_BUILTIN_BROADCASTI32X4_512,
+ IX86_BUILTIN_BROADCASTI64X4_512,
+ IX86_BUILTIN_BROADCASTSD512,
+ IX86_BUILTIN_BROADCASTSS512,
+ IX86_BUILTIN_CMPD512,
+ IX86_BUILTIN_CMPPD512,
+ IX86_BUILTIN_CMPPS512,
+ IX86_BUILTIN_CMPQ512,
+ IX86_BUILTIN_CMPSD_MASK,
+ IX86_BUILTIN_CMPSS_MASK,
+ IX86_BUILTIN_COMIDF,
+ IX86_BUILTIN_COMISF,
+ IX86_BUILTIN_COMPRESSPD512,
+ IX86_BUILTIN_COMPRESSPDSTORE512,
+ IX86_BUILTIN_COMPRESSPS512,
+ IX86_BUILTIN_COMPRESSPSSTORE512,
+ IX86_BUILTIN_CVTDQ2PD512,
+ IX86_BUILTIN_CVTDQ2PS512,
+ IX86_BUILTIN_CVTPD2DQ512,
+ IX86_BUILTIN_CVTPD2PS512,
+ IX86_BUILTIN_CVTPD2UDQ512,
+ IX86_BUILTIN_CVTPH2PS512,
+ IX86_BUILTIN_CVTPS2DQ512,
+ IX86_BUILTIN_CVTPS2PD512,
+ IX86_BUILTIN_CVTPS2PH512,
+ IX86_BUILTIN_CVTPS2UDQ512,
+ IX86_BUILTIN_CVTSD2SS_MASK,
+ IX86_BUILTIN_CVTSI2SD64,
+ IX86_BUILTIN_CVTSI2SS32,
+ IX86_BUILTIN_CVTSI2SS64,
+ IX86_BUILTIN_CVTSS2SD_MASK,
+ IX86_BUILTIN_CVTTPD2DQ512,
+ IX86_BUILTIN_CVTTPD2UDQ512,
+ IX86_BUILTIN_CVTTPS2DQ512,
+ IX86_BUILTIN_CVTTPS2UDQ512,
+ IX86_BUILTIN_CVTUDQ2PD512,
+ IX86_BUILTIN_CVTUDQ2PS512,
+ IX86_BUILTIN_CVTUSI2SD32,
+ IX86_BUILTIN_CVTUSI2SD64,
+ IX86_BUILTIN_CVTUSI2SS32,
+ IX86_BUILTIN_CVTUSI2SS64,
+ IX86_BUILTIN_DIVPD512,
+ IX86_BUILTIN_DIVPS512,
+ IX86_BUILTIN_DIVSD_MASK,
+ IX86_BUILTIN_DIVSS_MASK,
+ IX86_BUILTIN_EXPANDPD512,
+ IX86_BUILTIN_EXPANDPD512Z,
+ IX86_BUILTIN_EXPANDPDLOAD512,
+ IX86_BUILTIN_EXPANDPDLOAD512Z,
+ IX86_BUILTIN_EXPANDPS512,
+ IX86_BUILTIN_EXPANDPS512Z,
+ IX86_BUILTIN_EXPANDPSLOAD512,
+ IX86_BUILTIN_EXPANDPSLOAD512Z,
+ IX86_BUILTIN_EXTRACTF32X4,
+ IX86_BUILTIN_EXTRACTF64X4,
+ IX86_BUILTIN_EXTRACTI32X4,
+ IX86_BUILTIN_EXTRACTI64X4,
+ IX86_BUILTIN_FIXUPIMMPD512_MASK,
+ IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
+ IX86_BUILTIN_FIXUPIMMPS512_MASK,
+ IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
+ IX86_BUILTIN_FIXUPIMMSD128_MASK,
+ IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
+ IX86_BUILTIN_FIXUPIMMSS128_MASK,
+ IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
+ IX86_BUILTIN_GETEXPPD512,
+ IX86_BUILTIN_GETEXPPS512,
+ IX86_BUILTIN_GETEXPSD128,
+ IX86_BUILTIN_GETEXPSS128,
+ IX86_BUILTIN_GETMANTPD512,
+ IX86_BUILTIN_GETMANTPS512,
+ IX86_BUILTIN_GETMANTSD128,
+ IX86_BUILTIN_GETMANTSS128,
+ IX86_BUILTIN_INSERTF32X4,
+ IX86_BUILTIN_INSERTF64X4,
+ IX86_BUILTIN_INSERTI32X4,
+ IX86_BUILTIN_INSERTI64X4,
+ IX86_BUILTIN_LOADAPD512,
+ IX86_BUILTIN_LOADAPS512,
+ IX86_BUILTIN_LOADDQUDI512,
+ IX86_BUILTIN_LOADDQUSI512,
+ IX86_BUILTIN_LOADSD,
+ IX86_BUILTIN_LOADSS,
+ IX86_BUILTIN_LOADUPD512,
+ IX86_BUILTIN_LOADUPS512,
+ IX86_BUILTIN_MAXPD512,
+ IX86_BUILTIN_MAXPS512,
+ IX86_BUILTIN_MAXSD_MASK,
+ IX86_BUILTIN_MAXSS_MASK,
+ IX86_BUILTIN_MINPD512,
+ IX86_BUILTIN_MINPS512,
+ IX86_BUILTIN_MINSD_MASK,
+ IX86_BUILTIN_MINSS_MASK,
+ IX86_BUILTIN_MOVAPD512,
+ IX86_BUILTIN_MOVAPS512,
+ IX86_BUILTIN_MOVDDUP512,
+ IX86_BUILTIN_MOVDQA32LOAD512,
+ IX86_BUILTIN_MOVDQA32STORE512,
+ IX86_BUILTIN_MOVDQA32_512,
+ IX86_BUILTIN_MOVDQA64LOAD512,
+ IX86_BUILTIN_MOVDQA64STORE512,
+ IX86_BUILTIN_MOVDQA64_512,
+ IX86_BUILTIN_MOVESD,
+ IX86_BUILTIN_MOVESS,
+ IX86_BUILTIN_MOVNTDQ512,
+ IX86_BUILTIN_MOVNTPD512,
+ IX86_BUILTIN_MOVNTPS512,
+ IX86_BUILTIN_MOVSHDUP512,
+ IX86_BUILTIN_MOVSLDUP512,
+ IX86_BUILTIN_MULPD512,
+ IX86_BUILTIN_MULPS512,
+ IX86_BUILTIN_MULSD_MASK,
+ IX86_BUILTIN_MULSS_MASK,
+ IX86_BUILTIN_PABSD512,
+ IX86_BUILTIN_PABSQ512,
+ IX86_BUILTIN_PADDD512,
+ IX86_BUILTIN_PADDQ512,
+ IX86_BUILTIN_PANDD512,
+ IX86_BUILTIN_PANDND512,
+ IX86_BUILTIN_PANDNQ512,
+ IX86_BUILTIN_PANDQ512,
+ IX86_BUILTIN_PBROADCASTD512,
+ IX86_BUILTIN_PBROADCASTD512_GPR,
+ IX86_BUILTIN_PBROADCASTMB512,
+ IX86_BUILTIN_PBROADCASTMW512,
+ IX86_BUILTIN_PBROADCASTQ512,
+ IX86_BUILTIN_PBROADCASTQ512_GPR,
+ IX86_BUILTIN_PBROADCASTQ512_MEM,
+ IX86_BUILTIN_PCMPEQD512_MASK,
+ IX86_BUILTIN_PCMPEQQ512_MASK,
+ IX86_BUILTIN_PCMPGTD512_MASK,
+ IX86_BUILTIN_PCMPGTQ512_MASK,
+ IX86_BUILTIN_PCOMPRESSD512,
+ IX86_BUILTIN_PCOMPRESSDSTORE512,
+ IX86_BUILTIN_PCOMPRESSQ512,
+ IX86_BUILTIN_PCOMPRESSQSTORE512,
+ IX86_BUILTIN_PEXPANDD512,
+ IX86_BUILTIN_PEXPANDD512Z,
+ IX86_BUILTIN_PEXPANDDLOAD512,
+ IX86_BUILTIN_PEXPANDDLOAD512Z,
+ IX86_BUILTIN_PEXPANDQ512,
+ IX86_BUILTIN_PEXPANDQ512Z,
+ IX86_BUILTIN_PEXPANDQLOAD512,
+ IX86_BUILTIN_PEXPANDQLOAD512Z,
+ IX86_BUILTIN_PMAXSD512,
+ IX86_BUILTIN_PMAXSQ512,
+ IX86_BUILTIN_PMAXUD512,
+ IX86_BUILTIN_PMAXUQ512,
+ IX86_BUILTIN_PMINSD512,
+ IX86_BUILTIN_PMINSQ512,
+ IX86_BUILTIN_PMINUD512,
+ IX86_BUILTIN_PMINUQ512,
+ IX86_BUILTIN_PMOVDB512,
+ IX86_BUILTIN_PMOVDW512,
+ IX86_BUILTIN_PMOVQB512,
+ IX86_BUILTIN_PMOVQD512,
+ IX86_BUILTIN_PMOVQW512,
+ IX86_BUILTIN_PMOVSDB512,
+ IX86_BUILTIN_PMOVSDW512,
+ IX86_BUILTIN_PMOVSQB512,
+ IX86_BUILTIN_PMOVSQD512,
+ IX86_BUILTIN_PMOVSQW512,
+ IX86_BUILTIN_PMOVSXBD512,
+ IX86_BUILTIN_PMOVSXBQ512,
+ IX86_BUILTIN_PMOVSXDQ512,
+ IX86_BUILTIN_PMOVSXWD512,
+ IX86_BUILTIN_PMOVSXWQ512,
+ IX86_BUILTIN_PMOVUSDB512,
+ IX86_BUILTIN_PMOVUSDW512,
+ IX86_BUILTIN_PMOVUSQB512,
+ IX86_BUILTIN_PMOVUSQD512,
+ IX86_BUILTIN_PMOVUSQW512,
+ IX86_BUILTIN_PMOVZXBD512,
+ IX86_BUILTIN_PMOVZXBQ512,
+ IX86_BUILTIN_PMOVZXDQ512,
+ IX86_BUILTIN_PMOVZXWD512,
+ IX86_BUILTIN_PMOVZXWQ512,
+ IX86_BUILTIN_PMULDQ512,
+ IX86_BUILTIN_PMULLD512,
+ IX86_BUILTIN_PMULUDQ512,
+ IX86_BUILTIN_PORD512,
+ IX86_BUILTIN_PORQ512,
+ IX86_BUILTIN_PROLD512,
+ IX86_BUILTIN_PROLQ512,
+ IX86_BUILTIN_PROLVD512,
+ IX86_BUILTIN_PROLVQ512,
+ IX86_BUILTIN_PRORD512,
+ IX86_BUILTIN_PRORQ512,
+ IX86_BUILTIN_PRORVD512,
+ IX86_BUILTIN_PRORVQ512,
+ IX86_BUILTIN_PSHUFD512,
+ IX86_BUILTIN_PSLLD512,
+ IX86_BUILTIN_PSLLDI512,
+ IX86_BUILTIN_PSLLQ512,
+ IX86_BUILTIN_PSLLQI512,
+ IX86_BUILTIN_PSLLVV16SI,
+ IX86_BUILTIN_PSLLVV8DI,
+ IX86_BUILTIN_PSRAD512,
+ IX86_BUILTIN_PSRADI512,
+ IX86_BUILTIN_PSRAQ512,
+ IX86_BUILTIN_PSRAQI512,
+ IX86_BUILTIN_PSRAVV16SI,
+ IX86_BUILTIN_PSRAVV8DI,
+ IX86_BUILTIN_PSRLD512,
+ IX86_BUILTIN_PSRLDI512,
+ IX86_BUILTIN_PSRLQ512,
+ IX86_BUILTIN_PSRLQI512,
+ IX86_BUILTIN_PSRLVV16SI,
+ IX86_BUILTIN_PSRLVV8DI,
+ IX86_BUILTIN_PSUBD512,
+ IX86_BUILTIN_PSUBQ512,
+ IX86_BUILTIN_PTESTMD512,
+ IX86_BUILTIN_PTESTMQ512,
+ IX86_BUILTIN_PTESTNMD512,
+ IX86_BUILTIN_PTESTNMQ512,
+ IX86_BUILTIN_PUNPCKHDQ512,
+ IX86_BUILTIN_PUNPCKHQDQ512,
+ IX86_BUILTIN_PUNPCKLDQ512,
+ IX86_BUILTIN_PUNPCKLQDQ512,
+ IX86_BUILTIN_PXORD512,
+ IX86_BUILTIN_PXORQ512,
+ IX86_BUILTIN_RCP14PD512,
+ IX86_BUILTIN_RCP14PS512,
+ IX86_BUILTIN_RCP14SD,
+ IX86_BUILTIN_RCP14SS,
+ IX86_BUILTIN_RNDSCALEPD,
+ IX86_BUILTIN_RNDSCALEPS,
+ IX86_BUILTIN_RNDSCALESD,
+ IX86_BUILTIN_RNDSCALESS,
+ IX86_BUILTIN_RSQRT14PD512,
+ IX86_BUILTIN_RSQRT14PS512,
+ IX86_BUILTIN_RSQRT14SD,
+ IX86_BUILTIN_RSQRT14SS,
+ IX86_BUILTIN_SCALEFPD512,
+ IX86_BUILTIN_SCALEFPS512,
+ IX86_BUILTIN_SCALEFSD,
+ IX86_BUILTIN_SCALEFSS,
+ IX86_BUILTIN_SHUFPD512,
+ IX86_BUILTIN_SHUFPS512,
+ IX86_BUILTIN_SHUF_F32x4,
+ IX86_BUILTIN_SHUF_F64x2,
+ IX86_BUILTIN_SHUF_I32x4,
+ IX86_BUILTIN_SHUF_I64x2,
+ IX86_BUILTIN_SQRTPD512,
+ IX86_BUILTIN_SQRTPD512_MASK,
+ IX86_BUILTIN_SQRTPS512_MASK,
+ IX86_BUILTIN_SQRTPS_NR512,
+ IX86_BUILTIN_SQRTSD_MASK,
+ IX86_BUILTIN_SQRTSS_MASK,
+ IX86_BUILTIN_STOREAPD512,
+ IX86_BUILTIN_STOREAPS512,
+ IX86_BUILTIN_STOREDQUDI512,
+ IX86_BUILTIN_STOREDQUSI512,
+ IX86_BUILTIN_STORESD,
+ IX86_BUILTIN_STORESS,
+ IX86_BUILTIN_STOREUPD512,
+ IX86_BUILTIN_STOREUPS512,
+ IX86_BUILTIN_SUBPD512,
+ IX86_BUILTIN_SUBPS512,
+ IX86_BUILTIN_SUBSD_MASK,
+ IX86_BUILTIN_SUBSS_MASK,
+ IX86_BUILTIN_UCMPD512,
+ IX86_BUILTIN_UCMPQ512,
+ IX86_BUILTIN_UNPCKHPD512,
+ IX86_BUILTIN_UNPCKHPS512,
+ IX86_BUILTIN_UNPCKLPD512,
+ IX86_BUILTIN_UNPCKLPS512,
+ IX86_BUILTIN_VCVTSD2SI32,
+ IX86_BUILTIN_VCVTSD2SI64,
+ IX86_BUILTIN_VCVTSD2USI32,
+ IX86_BUILTIN_VCVTSD2USI64,
+ IX86_BUILTIN_VCVTSS2SI32,
+ IX86_BUILTIN_VCVTSS2SI64,
+ IX86_BUILTIN_VCVTSS2USI32,
+ IX86_BUILTIN_VCVTSS2USI64,
+ IX86_BUILTIN_VCVTTSD2SI32,
+ IX86_BUILTIN_VCVTTSD2SI64,
+ IX86_BUILTIN_VCVTTSD2USI32,
+ IX86_BUILTIN_VCVTTSD2USI64,
+ IX86_BUILTIN_VCVTTSS2SI32,
+ IX86_BUILTIN_VCVTTSS2SI64,
+ IX86_BUILTIN_VCVTTSS2USI32,
+ IX86_BUILTIN_VCVTTSS2USI64,
+ IX86_BUILTIN_VFMADDPD512_MASK,
+ IX86_BUILTIN_VFMADDPD512_MASK3,
+ IX86_BUILTIN_VFMADDPD512_MASKZ,
+ IX86_BUILTIN_VFMADDPS512_MASK,
+ IX86_BUILTIN_VFMADDPS512_MASK3,
+ IX86_BUILTIN_VFMADDPS512_MASKZ,
+ IX86_BUILTIN_VFMADDSD3_MASK,
+ IX86_BUILTIN_VFMADDSD3_MASK3,
+ IX86_BUILTIN_VFMADDSD3_MASKZ,
+ IX86_BUILTIN_VFMADDSS3_MASK,
+ IX86_BUILTIN_VFMADDSS3_MASK3,
+ IX86_BUILTIN_VFMADDSS3_MASKZ,
+ IX86_BUILTIN_VFMADDSUBPD512_MASK,
+ IX86_BUILTIN_VFMADDSUBPD512_MASK3,
+ IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
+ IX86_BUILTIN_VFMADDSUBPS512_MASK,
+ IX86_BUILTIN_VFMADDSUBPS512_MASK3,
+ IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
+ IX86_BUILTIN_VFMSUBADDPD512_MASK3,
+ IX86_BUILTIN_VFMSUBADDPS512_MASK3,
+ IX86_BUILTIN_VFMSUBPD512_MASK3,
+ IX86_BUILTIN_VFMSUBPS512_MASK3,
+ IX86_BUILTIN_VFMSUBSD3_MASK3,
+ IX86_BUILTIN_VFMSUBSS3_MASK3,
+ IX86_BUILTIN_VFNMADDPD512_MASK,
+ IX86_BUILTIN_VFNMADDPS512_MASK,
+ IX86_BUILTIN_VFNMSUBPD512_MASK,
+ IX86_BUILTIN_VFNMSUBPD512_MASK3,
+ IX86_BUILTIN_VFNMSUBPS512_MASK,
+ IX86_BUILTIN_VFNMSUBPS512_MASK3,
+ IX86_BUILTIN_VPCLZCNTD512,
+ IX86_BUILTIN_VPCLZCNTQ512,
+ IX86_BUILTIN_VPCONFLICTD512,
+ IX86_BUILTIN_VPCONFLICTQ512,
+ IX86_BUILTIN_VPERMDF512,
+ IX86_BUILTIN_VPERMDI512,
+ IX86_BUILTIN_VPERMI2VARD512,
+ IX86_BUILTIN_VPERMI2VARPD512,
+ IX86_BUILTIN_VPERMI2VARPS512,
+ IX86_BUILTIN_VPERMI2VARQ512,
+ IX86_BUILTIN_VPERMILPD512,
+ IX86_BUILTIN_VPERMILPS512,
+ IX86_BUILTIN_VPERMILVARPD512,
+ IX86_BUILTIN_VPERMILVARPS512,
+ IX86_BUILTIN_VPERMT2VARD512,
+ IX86_BUILTIN_VPERMT2VARD512_MASKZ,
+ IX86_BUILTIN_VPERMT2VARPD512,
+ IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
+ IX86_BUILTIN_VPERMT2VARPS512,
+ IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
+ IX86_BUILTIN_VPERMT2VARQ512,
+ IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
+ IX86_BUILTIN_VPERMVARDF512,
+ IX86_BUILTIN_VPERMVARDI512,
+ IX86_BUILTIN_VPERMVARSF512,
+ IX86_BUILTIN_VPERMVARSI512,
+ IX86_BUILTIN_VTERNLOGD512_MASK,
+ IX86_BUILTIN_VTERNLOGD512_MASKZ,
+ IX86_BUILTIN_VTERNLOGQ512_MASK,
+ IX86_BUILTIN_VTERNLOGQ512_MASKZ,
+
+ /* Mask arithmetic operations */
+ IX86_BUILTIN_KAND16,
+ IX86_BUILTIN_KANDN16,
+ IX86_BUILTIN_KNOT16,
+ IX86_BUILTIN_KOR16,
+ IX86_BUILTIN_KORTESTC16,
+ IX86_BUILTIN_KORTESTZ16,
+ IX86_BUILTIN_KUNPCKBW,
+ IX86_BUILTIN_KXNOR16,
+ IX86_BUILTIN_KXOR16,
+
+ /* Alternate 4 and 8 element gather/scatter for the vectorizer
+ where all operands are 32-byte or 64-byte wide respectively. */
IX86_BUILTIN_GATHERALTSIV4DF,
IX86_BUILTIN_GATHERALTDIV8SF,
IX86_BUILTIN_GATHERALTSIV4DI,
IX86_BUILTIN_GATHERALTDIV8SI,
+ IX86_BUILTIN_GATHER3ALTDIV16SF,
+ IX86_BUILTIN_GATHER3ALTDIV16SI,
+ IX86_BUILTIN_GATHER3ALTSIV8DF,
+ IX86_BUILTIN_GATHER3ALTSIV8DI,
+ IX86_BUILTIN_GATHER3DIV16SF,
+ IX86_BUILTIN_GATHER3DIV16SI,
+ IX86_BUILTIN_GATHER3DIV8DF,
+ IX86_BUILTIN_GATHER3DIV8DI,
+ IX86_BUILTIN_GATHER3SIV16SF,
+ IX86_BUILTIN_GATHER3SIV16SI,
+ IX86_BUILTIN_GATHER3SIV8DF,
+ IX86_BUILTIN_GATHER3SIV8DI,
+ IX86_BUILTIN_SCATTERDIV16SF,
+ IX86_BUILTIN_SCATTERDIV16SI,
+ IX86_BUILTIN_SCATTERDIV8DF,
+ IX86_BUILTIN_SCATTERDIV8DI,
+ IX86_BUILTIN_SCATTERSIV16SF,
+ IX86_BUILTIN_SCATTERSIV16SI,
+ IX86_BUILTIN_SCATTERSIV8DF,
+ IX86_BUILTIN_SCATTERSIV8DI,
+
+ /* AVX512PF */
+ IX86_BUILTIN_GATHERPFDPS,
+ IX86_BUILTIN_GATHERPFQPS,
+ IX86_BUILTIN_SCATTERPFDPS,
+ IX86_BUILTIN_SCATTERPFQPS,
+ IX86_BUILTIN_EXP2PD,
+ IX86_BUILTIN_EXP2PS,
+ IX86_BUILTIN_RCP28PD,
+ IX86_BUILTIN_RCP28PS,
+ IX86_BUILTIN_RSQRT28PD,
+ IX86_BUILTIN_RSQRT28PS,
+
+ /* SHA builtins. */
+ IX86_BUILTIN_SHA1MSG1,
+ IX86_BUILTIN_SHA1MSG2,
+ IX86_BUILTIN_SHA1NEXTE,
+ IX86_BUILTIN_SHA1RNDS4,
+ IX86_BUILTIN_SHA256MSG1,
+ IX86_BUILTIN_SHA256MSG2,
+ IX86_BUILTIN_SHA256RNDS2,
/* TFmode support builtins. */
IX86_BUILTIN_INFQ,
@@ -26835,10 +27625,15 @@ enum ix86_builtins
IX86_BUILTIN_COPYSIGNQ,
/* Vectorizer support builtins. */
+ IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
IX86_BUILTIN_CPYSGNPS,
IX86_BUILTIN_CPYSGNPD,
IX86_BUILTIN_CPYSGNPS256,
+ IX86_BUILTIN_CPYSGNPS512,
IX86_BUILTIN_CPYSGNPD256,
+ IX86_BUILTIN_CPYSGNPD512,
+ IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
+ IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
/* FMA4 instructions. */
IX86_BUILTIN_VFMADDSS,
@@ -27302,13 +28097,13 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
{ OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
@@ -27337,8 +28132,8 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
@@ -27365,6 +28160,43 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
+ /* AVX512F */
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadsv2df_mask, "__builtin_ia32_loadsd_mask", IX86_BUILTIN_LOADSD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadsv4sf_mask, "__builtin_ia32_loadss_mask", IX86_BUILTIN_LOADSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storesv2df_mask, "__builtin_ia32_storesd_mask", IX86_BUILTIN_STORESD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storesv4sf_mask, "__builtin_ia32_storess_mask", IX86_BUILTIN_STORESS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
+
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
@@ -27607,7 +28439,7 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
{ OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
@@ -27955,7 +28787,7 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
@@ -28216,6 +29048,368 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
{ OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
{ OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+
+ /* AVX512F */
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movesv2df_mask, "__builtin_ia32_movesd_mask", IX86_BUILTIN_MOVESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movesv4sf_mask, "__builtin_ia32_movess_mask", IX86_BUILTIN_MOVESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df_mask, "__builtin_ia32_rcp14sd_mask", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf_mask, "__builtin_ia32_rcp14ss_mask", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14sd_mask", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ss_mask", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
+
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
+
+ /* Mask arithmetic operations */
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandhi, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kiorhi, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxorhi, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+
+ /* SHA */
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
+};
+
+/* Builtins with rounding support. */
+static const struct builtin_description bdesc_round_args[] =
+{
+ /* AVX512F */
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_mask_round, "__builtin_ia32_addsd_mask", IX86_BUILTIN_ADDSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_mask_round, "__builtin_ia32_addss_mask", IX86_BUILTIN_ADDSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_mask_round, "__builtin_ia32_cvtsd2ss_mask", IX86_BUILTIN_CVTSD2SS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_mask_round, "__builtin_ia32_cvtss2sd_mask", IX86_BUILTIN_CVTSS2SD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_mask_round, "__builtin_ia32_divsd_mask", IX86_BUILTIN_DIVSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_mask_round, "__builtin_ia32_divss_mask", IX86_BUILTIN_DIVSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_mask_round, "__builtin_ia32_getexpsd128_mask", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_mask_round, "__builtin_ia32_getexpss128_mask", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_mask_round, "__builtin_ia32_getmantsd_mask", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_mask_round, "__builtin_ia32_getmantss_mask", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_mask_round, "__builtin_ia32_maxsd_mask", IX86_BUILTIN_MAXSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_mask_round, "__builtin_ia32_maxss_mask", IX86_BUILTIN_MAXSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_mask_round, "__builtin_ia32_minsd_mask", IX86_BUILTIN_MINSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_mask_round, "__builtin_ia32_minss_mask", IX86_BUILTIN_MINSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_mask_round, "__builtin_ia32_mulsd_mask", IX86_BUILTIN_MULSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_mask_round, "__builtin_ia32_mulss_mask", IX86_BUILTIN_MULSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_mask_round, "__builtin_ia32_rndscalesd_mask", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_mask_round, "__builtin_ia32_rndscaless_mask", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_mask_round, "__builtin_ia32_scalefsd_mask", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_mask_round, "__builtin_ia32_scalefss_mask", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_mask_round, "__builtin_ia32_sqrtsd_mask", IX86_BUILTIN_SQRTSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_mask_round, "__builtin_ia32_sqrtss_mask", IX86_BUILTIN_SQRTSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_mask_round, "__builtin_ia32_subsd_mask", IX86_BUILTIN_SUBSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_mask_round, "__builtin_ia32_subss_mask", IX86_BUILTIN_SUBSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_mask_round, "__builtin_ia32_vfmaddsd3_mask", IX86_BUILTIN_VFMADDSD3_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_mask3_round, "__builtin_ia32_vfmaddsd3_mask3", IX86_BUILTIN_VFMADDSD3_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_maskz_round, "__builtin_ia32_vfmaddsd3_maskz", IX86_BUILTIN_VFMADDSD3_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_mask_round, "__builtin_ia32_vfmaddss3_mask", IX86_BUILTIN_VFMADDSS3_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_mask3_round, "__builtin_ia32_vfmaddss3_mask3", IX86_BUILTIN_VFMADDSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_maskz_round, "__builtin_ia32_vfmaddss3_maskz", IX86_BUILTIN_VFMADDSS3_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmsub_v2df_mask3_round, "__builtin_ia32_vfmsubsd3_mask3", IX86_BUILTIN_VFMSUBSD3_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmsub_v4sf_mask3_round, "__builtin_ia32_vfmsubss3_mask3", IX86_BUILTIN_VFMSUBSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+
+ /* AVX512ER */
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
};
/* FMA4 and XOP. */
@@ -28662,6 +29856,18 @@ ix86_init_mmx_sse_builtins (void)
def_builtin_const (d->mask, d->name, ftype, d->code);
}
+ /* Add all builtins with rounding. */
+ for (i = 0, d = bdesc_round_args;
+ i < ARRAY_SIZE (bdesc_round_args);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ def_builtin_const (d->mask, d->name, ftype, d->code);
+ }
+
/* pcmpestr[im] insns. */
for (i = 0, d = bdesc_pcmpestr;
i < ARRAY_SIZE (bdesc_pcmpestr);
@@ -28830,6 +30036,117 @@ ix86_init_mmx_sse_builtins (void)
V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
IX86_BUILTIN_GATHERALTDIV8SI);
+ /* AVX512F */
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
+ V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
+ IX86_BUILTIN_GATHER3SIV16SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
+ V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV8DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
+ V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV16SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
+ V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV8DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
+ V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
+ IX86_BUILTIN_GATHER3SIV16SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
+ V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV8DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
+ V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV16SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
+ V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV8DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
+ V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
+ IX86_BUILTIN_GATHER3ALTSIV8DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
+ V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
+ IX86_BUILTIN_GATHER3ALTDIV16SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
+ V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
+ IX86_BUILTIN_GATHER3ALTSIV8DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
+ V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
+ IX86_BUILTIN_GATHER3ALTDIV16SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
+ VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
+ IX86_BUILTIN_SCATTERSIV16SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
+ VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
+ IX86_BUILTIN_SCATTERSIV8DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
+ VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
+ IX86_BUILTIN_SCATTERDIV16SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
+ VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
+ IX86_BUILTIN_SCATTERDIV8DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
+ VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
+ IX86_BUILTIN_SCATTERSIV16SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
+ VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
+ IX86_BUILTIN_SCATTERSIV8DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
+ VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
+ IX86_BUILTIN_SCATTERDIV16SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
+ VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
+ IX86_BUILTIN_SCATTERDIV8DI);
+
+ /* AVX512PF */
+ def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
+ VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
+ IX86_BUILTIN_GATHERPFDPS);
+ def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
+ VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
+ IX86_BUILTIN_GATHERPFQPS);
+ def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
+ VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
+ IX86_BUILTIN_SCATTERPFDPS);
+ def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
+ VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
+ IX86_BUILTIN_SCATTERPFQPS);
+
+ /* SHA */
+ def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
+ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
+ def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
+ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
+ def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
+ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
+ def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
+ V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
+ def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
+ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
+ def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
+ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
+ def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
+ V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
+
/* RTM. */
def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
@@ -31162,12 +32479,13 @@ ix86_expand_args_builtin (const struct builtin_description *d,
rtx pat, real_target;
unsigned int i, nargs;
unsigned int nargs_constant = 0;
+ unsigned int mask_pos = 0;
int num_memory = 0;
struct
{
rtx op;
enum machine_mode mode;
- } args[4];
+ } args[6];
bool last_arg_count = false;
enum insn_code icode = d->icode;
const struct insn_data_d *insn_p = &insn_data[icode];
@@ -31187,6 +32505,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
return ix86_expand_sse_round (d, exp, target);
case V4SI_FTYPE_V2DF_V2DF_ROUND:
case V8SI_FTYPE_V4DF_V4DF_ROUND:
+ case V16SI_FTYPE_V8DF_V8DF_ROUND:
return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
case INT_FTYPE_V8SF_V8SF_PTEST:
case INT_FTYPE_V4DI_V4DI_PTEST:
@@ -31265,6 +32584,32 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4DI_FTYPE_V8HI:
case V4DI_FTYPE_V4SI:
case V4DI_FTYPE_V2DI:
+ case HI_FTYPE_HI:
+ case UINT_FTYPE_V2DF:
+ case UINT_FTYPE_V4SF:
+ case UINT64_FTYPE_V2DF:
+ case UINT64_FTYPE_V4SF:
+ case V16QI_FTYPE_V8DI:
+ case V16HI_FTYPE_V16SI:
+ case V16SI_FTYPE_HI:
+ case V16SI_FTYPE_V16SI:
+ case V16SI_FTYPE_INT:
+ case V16SF_FTYPE_FLOAT:
+ case V16SF_FTYPE_V4SF:
+ case V16SF_FTYPE_V16SF:
+ case V8HI_FTYPE_V8DI:
+ case V8UHI_FTYPE_V8UHI:
+ case V8SI_FTYPE_V8DI:
+ case V8USI_FTYPE_V8USI:
+ case V8SF_FTYPE_V8DF:
+ case V8DI_FTYPE_QI:
+ case V8DI_FTYPE_INT64:
+ case V8DI_FTYPE_V4DI:
+ case V8DI_FTYPE_V8DI:
+ case V8DF_FTYPE_DOUBLE:
+ case V8DF_FTYPE_V4DF:
+ case V8DF_FTYPE_V8DF:
+ case V8DF_FTYPE_V8SI:
nargs = 1;
break;
case V4SF_FTYPE_V4SF_VEC_MERGE:
@@ -31273,6 +32618,9 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case FLOAT128_FTYPE_FLOAT128_FLOAT128:
case V16QI_FTYPE_V16QI_V16QI:
case V16QI_FTYPE_V8HI_V8HI:
+ case V16SI_FTYPE_V16SI_V16SI:
+ case V16SF_FTYPE_V16SF_V16SF:
+ case V16SF_FTYPE_V16SF_V16SI:
case V8QI_FTYPE_V8QI_V8QI:
case V8QI_FTYPE_V4HI_V4HI:
case V8HI_FTYPE_V8HI_V8HI:
@@ -31280,6 +32628,9 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8HI_FTYPE_V4SI_V4SI:
case V8SF_FTYPE_V8SF_V8SF:
case V8SF_FTYPE_V8SF_V8SI:
+ case V8DI_FTYPE_V8DI_V8DI:
+ case V8DF_FTYPE_V8DF_V8DF:
+ case V8DF_FTYPE_V8DF_V8DI:
case V4SI_FTYPE_V4SI_V4SI:
case V4SI_FTYPE_V8HI_V8HI:
case V4SI_FTYPE_V4SF_V4SF:
@@ -31293,6 +32644,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4SF_FTYPE_V4SF_V4SI:
case V4SF_FTYPE_V4SF_V2SI:
case V4SF_FTYPE_V4SF_V2DF:
+ case V4SF_FTYPE_V4SF_UINT:
+ case V4SF_FTYPE_V4SF_UINT64:
case V4SF_FTYPE_V4SF_DI:
case V4SF_FTYPE_V4SF_SI:
case V2DI_FTYPE_V2DI_V2DI:
@@ -31309,6 +32662,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V2DF_FTYPE_V2DF_V2DI:
case V2DF_FTYPE_V2DF_DI:
case V2DF_FTYPE_V2DF_SI:
+ case V2DF_FTYPE_V2DF_UINT:
+ case V2DF_FTYPE_V2DF_UINT64:
case V2SF_FTYPE_V2SF_V2SF:
case V1DI_FTYPE_V1DI_V1DI:
case V1DI_FTYPE_V8QI_V8QI:
@@ -31324,6 +32679,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4DI_FTYPE_V4DI_V4DI:
case V4DI_FTYPE_V8SI_V8SI:
case V4UDI_FTYPE_V8USI_V8USI:
+ case QI_FTYPE_V8DI_V8DI:
+ case HI_FTYPE_V16SI_V16SI:
if (comparison == UNKNOWN)
return ix86_expand_binop_builtin (icode, exp, target);
nargs = 2;
@@ -31361,6 +32718,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case UINT_FTYPE_UINT_UCHAR:
case UINT16_FTYPE_UINT16_INT:
case UINT8_FTYPE_UINT8_INT:
+ case HI_FTYPE_HI_HI:
+ case V16SI_FTYPE_V8DF_V8DF:
nargs = 2;
break;
case V2DI_FTYPE_V2DI_INT_CONVERT:
@@ -31375,12 +32734,16 @@ ix86_expand_args_builtin (const struct builtin_description *d,
break;
case V8HI_FTYPE_V8HI_INT:
case V8HI_FTYPE_V8SF_INT:
+ case V16HI_FTYPE_V16SF_INT:
case V8HI_FTYPE_V4SF_INT:
case V8SF_FTYPE_V8SF_INT:
+ case V4SF_FTYPE_V16SF_INT:
+ case V16SF_FTYPE_V16SF_INT:
case V4SI_FTYPE_V4SI_INT:
case V4SI_FTYPE_V8SI_INT:
case V4HI_FTYPE_V4HI_INT:
case V4DF_FTYPE_V4DF_INT:
+ case V4DF_FTYPE_V8DF_INT:
case V4SF_FTYPE_V4SF_INT:
case V4SF_FTYPE_V8SF_INT:
case V2DI_FTYPE_V2DI_INT:
@@ -31388,8 +32751,12 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V2DF_FTYPE_V4DF_INT:
case V16HI_FTYPE_V16HI_INT:
case V8SI_FTYPE_V8SI_INT:
+ case V16SI_FTYPE_V16SI_INT:
+ case V4SI_FTYPE_V16SI_INT:
case V4DI_FTYPE_V4DI_INT:
case V2DI_FTYPE_V4DI_INT:
+ case V4DI_FTYPE_V8DI_INT:
+ case HI_FTYPE_HI_INT:
nargs = 2;
nargs_constant = 1;
break;
@@ -31399,6 +32766,47 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4SF_FTYPE_V4SF_V4SF_V4SF:
case V2DF_FTYPE_V2DF_V2DF_V2DF:
case V32QI_FTYPE_V32QI_V32QI_V32QI:
+ case HI_FTYPE_V16SI_V16SI_HI:
+ case QI_FTYPE_V8DI_V8DI_QI:
+ case V16HI_FTYPE_V16SI_V16HI_HI:
+ case V16QI_FTYPE_V16SI_V16QI_HI:
+ case V16QI_FTYPE_V8DI_V16QI_QI:
+ case V16SF_FTYPE_V16SF_V16SF_HI:
+ case V16SF_FTYPE_V16SF_V16SF_V16SF:
+ case V16SF_FTYPE_V16SF_V16SI_V16SF:
+ case V16SF_FTYPE_V16SI_V16SF_HI:
+ case V16SF_FTYPE_V16SI_V16SF_V16SF:
+ case V16SF_FTYPE_V4SF_V16SF_HI:
+ case V16SI_FTYPE_SI_V16SI_HI:
+ case V16SI_FTYPE_V16HI_V16SI_HI:
+ case V16SI_FTYPE_V16QI_V16SI_HI:
+ case V16SI_FTYPE_V16SF_V16SI_HI:
+ case V16SI_FTYPE_V16SI_V16SI_HI:
+ case V16SI_FTYPE_V16SI_V16SI_V16SI:
+ case V16SI_FTYPE_V4SI_V16SI_HI:
+ case V2DI_FTYPE_V2DI_V2DI_V2DI:
+ case V4DI_FTYPE_V4DI_V4DI_V4DI:
+ case V8DF_FTYPE_V2DF_V8DF_QI:
+ case V8DF_FTYPE_V4DF_V8DF_QI:
+ case V8DF_FTYPE_V8DF_V8DF_QI:
+ case V8DF_FTYPE_V8DF_V8DF_V8DF:
+ case V8DF_FTYPE_V8DF_V8DI_V8DF:
+ case V8DF_FTYPE_V8DI_V8DF_V8DF:
+ case V8DF_FTYPE_V8SF_V8DF_QI:
+ case V8DF_FTYPE_V8SI_V8DF_QI:
+ case V8DI_FTYPE_DI_V8DI_QI:
+ case V8DI_FTYPE_V16QI_V8DI_QI:
+ case V8DI_FTYPE_V2DI_V8DI_QI:
+ case V8DI_FTYPE_V4DI_V8DI_QI:
+ case V8DI_FTYPE_V8DI_V8DI_QI:
+ case V8DI_FTYPE_V8DI_V8DI_V8DI:
+ case V8DI_FTYPE_V8HI_V8DI_QI:
+ case V8DI_FTYPE_V8SI_V8DI_QI:
+ case V8HI_FTYPE_V8DI_V8HI_QI:
+ case V8SF_FTYPE_V8DF_V8SF_QI:
+ case V8SI_FTYPE_V8DF_V8SI_QI:
+ case V8SI_FTYPE_V8DI_V8SI_QI:
+ case V4SI_FTYPE_V4SI_V4SI_V4SI:
nargs = 3;
break;
case V32QI_FTYPE_V32QI_V32QI_INT:
@@ -31412,11 +32820,20 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8SF_FTYPE_V8SF_V4SF_INT:
case V4SI_FTYPE_V4SI_V4SI_INT:
case V4DF_FTYPE_V4DF_V4DF_INT:
+ case V16SF_FTYPE_V16SF_V16SF_INT:
+ case V16SF_FTYPE_V16SF_V4SF_INT:
+ case V16SI_FTYPE_V16SI_V4SI_INT:
case V4DF_FTYPE_V4DF_V2DF_INT:
case V4SF_FTYPE_V4SF_V4SF_INT:
case V2DI_FTYPE_V2DI_V2DI_INT:
case V4DI_FTYPE_V4DI_V2DI_INT:
case V2DF_FTYPE_V2DF_V2DF_INT:
+ case QI_FTYPE_V8DI_V8DI_INT:
+ case QI_FTYPE_V8DF_V8DF_INT:
+ case QI_FTYPE_V2DF_V2DF_INT:
+ case QI_FTYPE_V4SF_V4SF_INT:
+ case HI_FTYPE_V16SI_V16SI_INT:
+ case HI_FTYPE_V16SF_V16SF_INT:
nargs = 3;
nargs_constant = 1;
break;
@@ -31439,13 +32856,38 @@ ix86_expand_args_builtin (const struct builtin_description *d,
nargs = 3;
nargs_constant = 2;
break;
+ case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
+ case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
+ case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
+ case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
+ case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
+ case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
+ case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
+ case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
+ case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
+ case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
+ case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
+ case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
+ case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
+ case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
+ case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
+ case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
+ nargs = 4;
+ break;
case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
+ case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
nargs = 4;
nargs_constant = 1;
break;
+ case QI_FTYPE_V2DF_V2DF_INT_QI:
+ case QI_FTYPE_V4SF_V4SF_INT_QI:
+ nargs = 4;
+ mask_pos = 1;
+ nargs_constant = 1;
+ break;
case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
nargs = 4;
nargs_constant = 2;
@@ -31454,6 +32896,51 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
nargs = 4;
break;
+ case QI_FTYPE_V8DI_V8DI_INT_QI:
+ case HI_FTYPE_V16SI_V16SI_INT_HI:
+ case QI_FTYPE_V8DF_V8DF_INT_QI:
+ case HI_FTYPE_V16SF_V16SF_INT_HI:
+ mask_pos = 1;
+ nargs = 4;
+ nargs_constant = 1;
+ break;
+ case V8DF_FTYPE_V8DF_INT_V8DF_QI:
+ case V16SF_FTYPE_V16SF_INT_V16SF_HI:
+ case V16HI_FTYPE_V16SF_INT_V16HI_HI:
+ case V16SI_FTYPE_V16SI_INT_V16SI_HI:
+ case V4SI_FTYPE_V16SI_INT_V4SI_QI:
+ case V4DI_FTYPE_V8DI_INT_V4DI_QI:
+ case V4DF_FTYPE_V8DF_INT_V4DF_QI:
+ case V4SF_FTYPE_V16SF_INT_V4SF_QI:
+ case V8DI_FTYPE_V8DI_INT_V8DI_QI:
+ nargs = 4;
+ mask_pos = 2;
+ nargs_constant = 1;
+ break;
+ case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
+ case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
+ case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
+ case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
+ case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
+ case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
+ case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
+ case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
+ case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
+ case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
+ nargs = 5;
+ mask_pos = 2;
+ nargs_constant = 1;
+ break;
+ case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
+ case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
+ case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
+ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
+ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
+ nargs = 5;
+ mask_pos = 1;
+ nargs_constant = 1;
+ break;
+
default:
gcc_unreachable ();
}
@@ -31500,7 +32987,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
op = copy_to_reg (op);
}
}
- else if ((nargs - i) <= nargs_constant)
+ else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
+ (!mask_pos && (nargs - i) <= nargs_constant))
{
if (!match)
switch (icode)
@@ -31510,6 +32998,13 @@ ix86_expand_args_builtin (const struct builtin_description *d,
error ("the last argument must be an 1-bit immediate");
return const0_rtx;
+ case CODE_FOR_avx512f_cmpv8di3_mask:
+ case CODE_FOR_avx512f_cmpv16si3_mask:
+ case CODE_FOR_avx512f_ucmpv8di3_mask:
+ case CODE_FOR_avx512f_ucmpv16si3_mask:
+ error ("the last argument must be a 3-bit immediate");
+ return const0_rtx;
+
case CODE_FOR_sse4_1_roundsd:
case CODE_FOR_sse4_1_roundss:
@@ -31526,15 +33021,24 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_sse4_1_blendps:
case CODE_FOR_avx_blendpd256:
case CODE_FOR_avx_vpermilv4df:
+ case CODE_FOR_avx512f_getmantv8df_mask:
+ case CODE_FOR_avx512f_getmantv16sf_mask:
+ case CODE_FOR_avx512f_getmantv2df_mask:
+ case CODE_FOR_avx512f_getmantv4sf_mask:
error ("the last argument must be a 4-bit immediate");
return const0_rtx;
+ case CODE_FOR_sha1rnds4:
case CODE_FOR_sse4_1_blendpd:
case CODE_FOR_avx_vpermilv2df:
case CODE_FOR_xop_vpermil2v2df3:
case CODE_FOR_xop_vpermil2v4sf3:
case CODE_FOR_xop_vpermil2v4df3:
case CODE_FOR_xop_vpermil2v8sf3:
+ case CODE_FOR_avx512f_vinsertf32x4_mask:
+ case CODE_FOR_avx512f_vinserti32x4_mask:
+ case CODE_FOR_avx512f_vextractf32x4_mask:
+ case CODE_FOR_avx512f_vextracti32x4_mask:
error ("the last argument must be a 2-bit immediate");
return const0_rtx;
@@ -31544,6 +33048,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_avx_vinsertf128v4df:
case CODE_FOR_avx_vinsertf128v8sf:
case CODE_FOR_avx_vinsertf128v8si:
+ case CODE_FOR_avx512f_vinsertf64x4_mask:
+ case CODE_FOR_avx512f_vinserti64x4_mask:
+ case CODE_FOR_avx512f_vextractf64x4_mask:
+ case CODE_FOR_avx512f_vextracti64x4_mask:
error ("the last argument must be a 1-bit immediate");
return const0_rtx;
@@ -31553,14 +33061,19 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_avx_cmpv4sf3:
case CODE_FOR_avx_cmpv4df3:
case CODE_FOR_avx_cmpv8sf3:
+ case CODE_FOR_avx512f_cmpv8df3_mask:
+ case CODE_FOR_avx512f_cmpv16sf3_mask:
+ case CODE_FOR_avx512f_vmcmpv2df3_mask:
+ case CODE_FOR_avx512f_vmcmpv4sf3_mask:
error ("the last argument must be a 5-bit immediate");
return const0_rtx;
- default:
+ default:
switch (nargs_constant)
{
case 2:
- if ((nargs - i) == nargs_constant)
+ if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
+ (!mask_pos && (nargs - i) == nargs_constant))
{
error ("the next to last argument must be an 8-bit immediate");
break;
@@ -31616,6 +33129,14 @@ ix86_expand_args_builtin (const struct builtin_description *d,
pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
args[2].op, args[3].op);
break;
+ case 5:
+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+ args[2].op, args[3].op, args[4].op);
+ case 6:
+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+ args[2].op, args[3].op, args[4].op,
+ args[5].op);
+ break;
default:
gcc_unreachable ();
}
@@ -31627,6 +33148,404 @@ ix86_expand_args_builtin (const struct builtin_description *d,
return target;
}
+/* Transform pattern of following layout:
+ (parallel [
+ set (A B)
+ (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
+ ])
+ into:
+ (set (A B))
+
+ Or:
+ (parallel [ A B
+ ...
+ (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
+ ...
+ ])
+ into:
+ (parallel [ A B ... ]) */
+
+static rtx
+ix86_erase_embedded_rounding_1 (rtx pat, bool *removed)
+{
+ gcc_assert (GET_CODE (pat) == PARALLEL);
+
+ if (XVECLEN (pat, 0) == 2)
+ {
+ rtx p0 = XVECEXP (pat, 0, 0);
+ rtx p1 = XVECEXP (pat, 0, 1);
+
+ gcc_assert (GET_CODE (p0) == SET
+ && GET_CODE (p1) == UNSPEC
+ && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
+
+ *removed = true;
+ return p0;
+ }
+ else
+ {
+ rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
+ int i = 0;
+ int j = 0;
+
+ *removed = 0;
+
+ for (; i < XVECLEN (pat, 0); ++i)
+ {
+ rtx elem = XVECEXP (pat, 0, i);
+ if (GET_CODE (elem) != UNSPEC
+ || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
+ res [j++] = elem;
+ else
+ *removed = true;
+ }
+
+ /* No more than 1 occurence was removed. */
+ gcc_assert (j >= XVECLEN (pat, 0) - 1);
+
+ return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
+ }
+}
+
+/* Walk through insns sequence or pattern and erase rounding mentions.
+ Main transformation is performed in ix86_erase_embedded_rounding_1. */
+static rtx
+ix86_erase_embedded_rounding (rtx pat)
+{
+ gcc_assert (GET_CODE (pat) == INSN
+ || GET_CODE (pat) == PARALLEL);
+
+ rtx res;
+ bool was_removed = 0;
+
+ if (GET_CODE (pat) == INSN)
+ {
+ rtx insn = pat;
+ start_sequence ();
+ for (; insn; insn = NEXT_INSN (insn))
+ {
+ rtx p = PATTERN (insn);
+ if (GET_CODE (p) == PARALLEL)
+ {
+ bool tmp = 0;
+ p = ix86_erase_embedded_rounding_1 (p, &tmp);
+ was_removed |= tmp;
+ }
+ emit_insn (p);
+ }
+ res = get_insns ();
+ end_sequence ();
+ }
+ else
+ res = ix86_erase_embedded_rounding_1 (pat, &was_removed);
+
+ /* Assert that at least 1 occurence of rounding was removed. */
+ gcc_assert (was_removed);
+ return res;
+}
+
+/* Subroutine of ix86_expand_round_builtin to take care of comi insns
+ with rounding. */
+static rtx
+ix86_expand_sse_comi_round (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat, set_dst;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
+ tree arg3 = CALL_EXPR_ARG (exp, 3);
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ rtx op3 = expand_normal (arg3);
+ enum insn_code icode = d->icode;
+ const struct insn_data_d *insn_p = &insn_data[icode];
+ enum machine_mode mode0 = insn_p->operand[0].mode;
+ enum machine_mode mode1 = insn_p->operand[1].mode;
+ enum rtx_code comparison = UNEQ;
+ bool need_ucomi = false;
+
+ /* See avxintrin.h for values. */
+ enum rtx_code comi_comparisons[32] =
+ {
+ UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
+ UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
+ UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
+ };
+ bool need_ucomi_values[32] =
+ {
+ true, false, false, true, true, false, false, true,
+ true, false, false, true, true, false, false, true,
+ false, true, true, false, false, true, true, false,
+ false, true, true, false, false, true, true, false
+ };
+
+ if (!CONST_INT_P (op2))
+ {
+ error ("third argument must be comparison constant.");
+ return const0_rtx;
+ }
+ if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
+ {
+ error ("incorect comparison mode.");
+ return const0_rtx;
+ }
+
+ if (!insn_p->operand[2].predicate (op3, SImode))
+ {
+ error ("incorrect rounding operand.");
+ return const0_rtx;
+ }
+
+ comparison = comi_comparisons[INTVAL (op2)];
+ need_ucomi = need_ucomi_values[INTVAL (op2)];
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || !insn_p->operand[0].predicate (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !insn_p->operand[1].predicate (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ if (need_ucomi)
+ icode = icode == CODE_FOR_sse_comi_round
+ ? CODE_FOR_sse_ucomi_round
+ : CODE_FOR_sse2_ucomi_round;
+
+ pat = GEN_FCN (icode) (op0, op1, op3);
+ if (! pat)
+ return 0;
+
+ /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
+ if (INTVAL (op3) == NO_ROUND)
+ {
+ pat = ix86_erase_embedded_rounding (pat);
+ if (! pat)
+ return 0;
+
+ set_dst = SET_DEST (pat);
+ }
+ else
+ {
+ gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
+ set_dst = SET_DEST (XVECEXP (pat, 0, 0));
+ }
+
+ emit_insn (pat);
+ emit_insn (gen_rtx_SET (VOIDmode,
+ gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (comparison, QImode,
+ set_dst,
+ const0_rtx)));
+
+ return SUBREG_REG (target);
+}
+
+static rtx
+ix86_expand_round_builtin (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ unsigned int i, nargs;
+ struct
+ {
+ rtx op;
+ enum machine_mode mode;
+ } args[6];
+ enum insn_code icode = d->icode;
+ const struct insn_data_d *insn_p = &insn_data[icode];
+ enum machine_mode tmode = insn_p->operand[0].mode;
+ unsigned int nargs_constant = 0;
+ unsigned int redundant_embed_rnd = 0;
+
+ switch ((enum ix86_builtin_func_type) d->flag)
+ {
+ case UINT64_FTYPE_V2DF_INT:
+ case UINT64_FTYPE_V4SF_INT:
+ case UINT_FTYPE_V2DF_INT:
+ case UINT_FTYPE_V4SF_INT:
+ case INT64_FTYPE_V2DF_INT:
+ case INT64_FTYPE_V4SF_INT:
+ case INT_FTYPE_V2DF_INT:
+ case INT_FTYPE_V4SF_INT:
+ nargs = 2;
+ break;
+ case V4SF_FTYPE_V4SF_UINT_INT:
+ case V4SF_FTYPE_V4SF_UINT64_INT:
+ case V2DF_FTYPE_V2DF_UINT64_INT:
+ case V4SF_FTYPE_V4SF_INT_INT:
+ case V4SF_FTYPE_V4SF_INT64_INT:
+ case V2DF_FTYPE_V2DF_INT64_INT:
+ nargs = 3;
+ break;
+ case V8SF_FTYPE_V8DF_V8SF_QI_INT:
+ case V8DF_FTYPE_V8DF_V8DF_QI_INT:
+ case V8SI_FTYPE_V8DF_V8SI_QI_INT:
+ case V16SF_FTYPE_V16SF_V16SF_HI_INT:
+ case V16SF_FTYPE_V16SI_V16SF_HI_INT:
+ case V16SI_FTYPE_V16SF_V16SI_HI_INT:
+ case V8DF_FTYPE_V8SF_V8DF_QI_INT:
+ case V16SF_FTYPE_V16HI_V16SF_HI_INT:
+ nargs = 4;
+ break;
+ case INT_FTYPE_V4SF_V4SF_INT_INT:
+ case INT_FTYPE_V2DF_V2DF_INT_INT:
+ return ix86_expand_sse_comi_round (d, exp, target);
+ case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
+ case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
+ case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
+ case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
+ case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
+ case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
+ nargs = 5;
+ break;
+ case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
+ case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
+ nargs_constant = 4;
+ nargs = 5;
+ break;
+ case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
+ case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
+ case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
+ case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
+ nargs_constant = 3;
+ nargs = 5;
+ break;
+ case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
+ case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
+ nargs = 6;
+ nargs_constant = 4;
+ break;
+ case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
+ case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
+ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
+ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
+ nargs = 6;
+ nargs_constant = 3;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ gcc_assert (nargs <= ARRAY_SIZE (args));
+
+ if (optimize
+ || target == 0
+ || GET_MODE (target) != tmode
+ || !insn_p->operand[0].predicate (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ for (i = 0; i < nargs; i++)
+ {
+ tree arg = CALL_EXPR_ARG (exp, i);
+ rtx op = expand_normal (arg);
+ enum machine_mode mode = insn_p->operand[i + 1].mode;
+ bool match = insn_p->operand[i + 1].predicate (op, mode);
+
+ if (i == nargs - nargs_constant)
+ {
+ if (!match)
+ {
+ switch (icode)
+ {
+ case CODE_FOR_avx512f_getmantv8df_mask_round:
+ case CODE_FOR_avx512f_getmantv16sf_mask_round:
+ case CODE_FOR_avx512f_getmantv2df_mask_round:
+ case CODE_FOR_avx512f_getmantv4sf_mask_round:
+ error ("the immediate argument must be 4-bit immediate.");
+ return const0_rtx;
+ case CODE_FOR_avx512f_cmpv8df3_mask_round:
+ case CODE_FOR_avx512f_cmpv16sf3_mask_round:
+ case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
+ case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
+ error ("the immediate argument must be 5-bit immediate.");
+ return const0_rtx;
+ default:
+ error ("the immediate argument must be 8-bit immediate.");
+ return const0_rtx;
+ }
+ }
+ }
+ else if (i == nargs-1)
+ {
+ if (!insn_p->operand[nargs].predicate (op, SImode))
+ {
+ error ("incorrect rounding operand.");
+ return const0_rtx;
+ }
+
+ /* If there is no rounding use normal version of the pattern. */
+ if (INTVAL (op) == NO_ROUND)
+ redundant_embed_rnd = 1;
+ }
+ else
+ {
+ if (VECTOR_MODE_P (mode))
+ op = safe_vector_operand (op, mode);
+
+ if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
+ {
+ if (optimize || !match)
+ op = copy_to_mode_reg (mode, op);
+ }
+ else
+ {
+ op = copy_to_reg (op);
+ op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
+ }
+ }
+
+ args[i].op = op;
+ args[i].mode = mode;
+ }
+
+ switch (nargs)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (target, args[0].op);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+ args[2].op);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+ args[2].op, args[3].op);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+ args[2].op, args[3].op, args[4].op);
+ case 6:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+ args[2].op, args[3].op, args[4].op,
+ args[5].op);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (!pat)
+ return 0;
+
+ if (redundant_embed_rnd)
+ pat = ix86_erase_embedded_rounding (pat);
+
+ emit_insn (pat);
+ return target;
+}
+
/* Subroutine of ix86_expand_builtin to take care of special insns
with variable number of operands. */
@@ -31679,17 +33598,24 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case V4DF_FTYPE_PCDOUBLE:
case V2DF_FTYPE_PCDOUBLE:
case VOID_FTYPE_PVOID:
+ case V16SI_FTYPE_PV4SI:
+ case V16SF_FTYPE_PV4SF:
+ case V8DI_FTYPE_PV4DI:
+ case V8DF_FTYPE_PV4DF:
nargs = 1;
klass = load;
memory = 0;
break;
case VOID_FTYPE_PV2SF_V4SF:
+ case VOID_FTYPE_PV8DI_V8DI:
case VOID_FTYPE_PV4DI_V4DI:
case VOID_FTYPE_PV2DI_V2DI:
case VOID_FTYPE_PCHAR_V32QI:
case VOID_FTYPE_PCHAR_V16QI:
+ case VOID_FTYPE_PFLOAT_V16SF:
case VOID_FTYPE_PFLOAT_V8SF:
case VOID_FTYPE_PFLOAT_V4SF:
+ case VOID_FTYPE_PDOUBLE_V8DF:
case VOID_FTYPE_PDOUBLE_V4DF:
case VOID_FTYPE_PDOUBLE_V2DF:
case VOID_FTYPE_PLONGLONG_LONGLONG:
@@ -31726,11 +33652,27 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case VOID_FTYPE_PV4DI_V4DI_V4DI:
case VOID_FTYPE_PV4SI_V4SI_V4SI:
case VOID_FTYPE_PV2DI_V2DI_V2DI:
+ case VOID_FTYPE_PV8DF_V8DF_QI:
+ case VOID_FTYPE_PV16SF_V16SF_HI:
+ case VOID_FTYPE_PV8DI_V8DI_QI:
+ case VOID_FTYPE_PV16SI_V16SI_HI:
+ case VOID_FTYPE_PDOUBLE_V2DF_QI:
+ case VOID_FTYPE_PFLOAT_V4SF_QI:
nargs = 2;
klass = store;
/* Reserve memory operand for target. */
memory = ARRAY_SIZE (args);
break;
+ case V16SF_FTYPE_PCV16SF_V16SF_HI:
+ case V16SI_FTYPE_PCV16SI_V16SI_HI:
+ case V8DF_FTYPE_PCV8DF_V8DF_QI:
+ case V8DI_FTYPE_PCV8DI_V8DI_QI:
+ case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
+ case V4SF_FTYPE_PCFLOAT_V4SF_QI:
+ nargs = 3;
+ klass = load;
+ memory = 0;
+ break;
case VOID_FTYPE_UINT_UINT_UINT:
case VOID_FTYPE_UINT64_UINT_UINT:
case UCHAR_FTYPE_UINT_UINT_UINT:
@@ -31809,9 +33751,13 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
if (VECTOR_MODE_P (mode))
op = safe_vector_operand (op, mode);
- gcc_assert (GET_MODE (op) == mode
- || GET_MODE (op) == VOIDmode);
- op = copy_to_mode_reg (mode, op);
+ if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
+ op = copy_to_mode_reg (mode, op);
+ else
+ {
+ op = copy_to_reg (op);
+ op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
+ }
}
}
@@ -32516,6 +34462,38 @@ addcarryx:
emit_insn (gen_rtx_SET (VOIDmode, target, pat));
return target;
+ case IX86_BUILTIN_KORTESTC16:
+ icode = CODE_FOR_kortestchi;
+ mode0 = HImode;
+ mode1 = CCCmode;
+ goto kortest;
+
+ case IX86_BUILTIN_KORTESTZ16:
+ icode = CODE_FOR_kortestzhi;
+ mode0 = HImode;
+ mode1 = CCZmode;
+
+ kortest:
+ arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
+ arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = copy_to_reg (op0);
+ op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
+ op1 = copy_to_reg (op1);
+ op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
+
+ target = gen_reg_rtx (QImode);
+ emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
+
+ /* Emit kortest. */
+ emit_insn (GEN_FCN (icode) (op0, op1));
+ /* And use setcc to return result from flags. */
+ ix86_expand_setcc (target, EQ,
+ gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
+ return target;
+
case IX86_BUILTIN_GATHERSIV2DF:
icode = CODE_FOR_avx2_gathersiv2df;
goto gather_gen;
@@ -32576,8 +34554,83 @@ addcarryx:
case IX86_BUILTIN_GATHERALTDIV8SI:
icode = CODE_FOR_avx2_gatherdiv8si;
goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV16SF:
+ icode = CODE_FOR_avx512f_gathersiv16sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV8DF:
+ icode = CODE_FOR_avx512f_gathersiv8df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV16SF:
+ icode = CODE_FOR_avx512f_gatherdiv16sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV8DF:
+ icode = CODE_FOR_avx512f_gatherdiv8df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV16SI:
+ icode = CODE_FOR_avx512f_gathersiv16si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV8DI:
+ icode = CODE_FOR_avx512f_gathersiv8di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV16SI:
+ icode = CODE_FOR_avx512f_gatherdiv16si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV8DI:
+ icode = CODE_FOR_avx512f_gatherdiv8di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTSIV8DF:
+ icode = CODE_FOR_avx512f_gathersiv8df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTDIV16SF:
+ icode = CODE_FOR_avx512f_gatherdiv16sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTSIV8DI:
+ icode = CODE_FOR_avx512f_gathersiv8di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTDIV16SI:
+ icode = CODE_FOR_avx512f_gatherdiv16si;
+ goto gather_gen;
+ case IX86_BUILTIN_SCATTERSIV16SF:
+ icode = CODE_FOR_avx512f_scattersiv16sf;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV8DF:
+ icode = CODE_FOR_avx512f_scattersiv8df;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV16SF:
+ icode = CODE_FOR_avx512f_scatterdiv16sf;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV8DF:
+ icode = CODE_FOR_avx512f_scatterdiv8df;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV16SI:
+ icode = CODE_FOR_avx512f_scattersiv16si;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV8DI:
+ icode = CODE_FOR_avx512f_scattersiv8di;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV16SI:
+ icode = CODE_FOR_avx512f_scatterdiv16si;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV8DI:
+ icode = CODE_FOR_avx512f_scatterdiv8di;
+ goto scatter_gen;
+ case IX86_BUILTIN_GATHERPFDPS:
+ icode = CODE_FOR_avx512pf_gatherpfv16si;
+ goto vec_prefetch_gen;
+ case IX86_BUILTIN_GATHERPFQPS:
+ icode = CODE_FOR_avx512pf_gatherpfv8di;
+ goto vec_prefetch_gen;
+ case IX86_BUILTIN_SCATTERPFDPS:
+ icode = CODE_FOR_avx512pf_scatterpfv16si;
+ goto vec_prefetch_gen;
+ case IX86_BUILTIN_SCATTERPFQPS:
+ icode = CODE_FOR_avx512pf_scatterpfv8di;
+ goto vec_prefetch_gen;
gather_gen:
+ rtx half;
+ rtx (*gen) (rtx, rtx);
+
arg0 = CALL_EXPR_ARG (exp, 0);
arg1 = CALL_EXPR_ARG (exp, 1);
arg2 = CALL_EXPR_ARG (exp, 2);
@@ -32600,20 +34653,46 @@ addcarryx:
else
subtarget = target;
- if (fcode == IX86_BUILTIN_GATHERALTSIV4DF
- || fcode == IX86_BUILTIN_GATHERALTSIV4DI)
+ switch (fcode)
{
- rtx half = gen_reg_rtx (V4SImode);
+ case IX86_BUILTIN_GATHER3ALTSIV8DF:
+ case IX86_BUILTIN_GATHER3ALTSIV8DI:
+ half = gen_reg_rtx (V8SImode);
+ if (!nonimmediate_operand (op2, V16SImode))
+ op2 = copy_to_mode_reg (V16SImode, op2);
+ emit_insn (gen_vec_extract_lo_v16si (half, op2));
+ op2 = half;
+ break;
+ case IX86_BUILTIN_GATHERALTSIV4DF:
+ case IX86_BUILTIN_GATHERALTSIV4DI:
+ half = gen_reg_rtx (V4SImode);
if (!nonimmediate_operand (op2, V8SImode))
op2 = copy_to_mode_reg (V8SImode, op2);
emit_insn (gen_vec_extract_lo_v8si (half, op2));
op2 = half;
- }
- else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF
- || fcode == IX86_BUILTIN_GATHERALTDIV8SI)
- {
- rtx (*gen) (rtx, rtx);
- rtx half = gen_reg_rtx (mode0);
+ break;
+ case IX86_BUILTIN_GATHER3ALTDIV16SF:
+ case IX86_BUILTIN_GATHER3ALTDIV16SI:
+ half = gen_reg_rtx (mode0);
+ if (mode0 == V8SFmode)
+ gen = gen_vec_extract_lo_v16sf;
+ else
+ gen = gen_vec_extract_lo_v16si;
+ if (!nonimmediate_operand (op0, GET_MODE (op0)))
+ op0 = copy_to_mode_reg (GET_MODE (op0), op0);
+ emit_insn (gen (half, op0));
+ op0 = half;
+ if (GET_MODE (op3) != VOIDmode)
+ {
+ if (!nonimmediate_operand (op3, GET_MODE (op3)))
+ op3 = copy_to_mode_reg (GET_MODE (op3), op3);
+ emit_insn (gen (half, op3));
+ op3 = half;
+ }
+ break;
+ case IX86_BUILTIN_GATHERALTDIV8SF:
+ case IX86_BUILTIN_GATHERALTDIV8SI:
+ half = gen_reg_rtx (mode0);
if (mode0 == V4SFmode)
gen = gen_vec_extract_lo_v8sf;
else
@@ -32622,10 +34701,16 @@ addcarryx:
op0 = copy_to_mode_reg (GET_MODE (op0), op0);
emit_insn (gen (half, op0));
op0 = half;
- if (!nonimmediate_operand (op3, GET_MODE (op3)))
- op3 = copy_to_mode_reg (GET_MODE (op3), op3);
- emit_insn (gen (half, op3));
- op3 = half;
+ if (GET_MODE (op3) != VOIDmode)
+ {
+ if (!nonimmediate_operand (op3, GET_MODE (op3)))
+ op3 = copy_to_mode_reg (GET_MODE (op3), op3);
+ emit_insn (gen (half, op3));
+ op3 = half;
+ }
+ break;
+ default:
+ break;
}
/* Force memory operand only with base register here. But we
@@ -32639,8 +34724,16 @@ addcarryx:
op1 = copy_to_mode_reg (Pmode, op1);
if (!insn_data[icode].operand[3].predicate (op2, mode2))
op2 = copy_to_mode_reg (mode2, op2);
- if (!insn_data[icode].operand[4].predicate (op3, mode3))
- op3 = copy_to_mode_reg (mode3, op3);
+ if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
+ {
+ if (!insn_data[icode].operand[4].predicate (op3, mode3))
+ op3 = copy_to_mode_reg (mode3, op3);
+ }
+ else
+ {
+ op3 = copy_to_reg (op3);
+ op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
+ }
if (!insn_data[icode].operand[5].predicate (op4, mode4))
{
error ("last argument must be scale 1, 2, 4, 8");
@@ -32653,7 +34746,12 @@ addcarryx:
previous contents. */
if (optimize)
{
- if (TREE_CODE (arg3) == VECTOR_CST)
+ if (TREE_CODE (arg3) == INTEGER_CST)
+ {
+ if (integer_all_onesp (arg3))
+ op0 = pc_rtx;
+ }
+ else if (TREE_CODE (arg3) == VECTOR_CST)
{
unsigned int negative = 0;
for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
@@ -32669,7 +34767,8 @@ addcarryx:
if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
op0 = pc_rtx;
}
- else if (TREE_CODE (arg3) == SSA_NAME)
+ else if (TREE_CODE (arg3) == SSA_NAME
+ && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
{
/* Recognize also when mask is like:
__v2df src = _mm_setzero_pd ();
@@ -32714,22 +34813,150 @@ addcarryx:
return const0_rtx;
emit_insn (pat);
- if (fcode == IX86_BUILTIN_GATHERDIV8SF
- || fcode == IX86_BUILTIN_GATHERDIV8SI)
+ switch (fcode)
{
- enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode
- ? V4SFmode : V4SImode;
+ case IX86_BUILTIN_GATHER3DIV16SF:
if (target == NULL_RTX)
- target = gen_reg_rtx (tmode);
- if (tmode == V4SFmode)
- emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
- else
- emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
+ target = gen_reg_rtx (V8SFmode);
+ emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
+ break;
+ case IX86_BUILTIN_GATHER3DIV16SI:
+ if (target == NULL_RTX)
+ target = gen_reg_rtx (V8SImode);
+ emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
+ break;
+ case IX86_BUILTIN_GATHERDIV8SF:
+ if (target == NULL_RTX)
+ target = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
+ break;
+ case IX86_BUILTIN_GATHERDIV8SI:
+ if (target == NULL_RTX)
+ target = gen_reg_rtx (V4SImode);
+ emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
+ break;
+ default:
+ target = subtarget;
+ break;
+ }
+ return target;
+
+ scatter_gen:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+ arg3 = CALL_EXPR_ARG (exp, 3);
+ arg4 = CALL_EXPR_ARG (exp, 4);
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ op3 = expand_normal (arg3);
+ op4 = expand_normal (arg4);
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+ mode3 = insn_data[icode].operand[3].mode;
+ mode4 = insn_data[icode].operand[4].mode;
+
+ /* Force memory operand only with base register here. But we
+ don't want to do it on memory operand for other builtin
+ functions. */
+ if (GET_MODE (op0) != Pmode)
+ op0 = convert_to_mode (Pmode, op0, 1);
+ op0 = force_reg (Pmode, op0);
+
+ if (!insn_data[icode].operand[0].predicate (op0, Pmode))
+ op0 = copy_to_mode_reg (Pmode, op0);
+
+ if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
+ {
+ if (!insn_data[icode].operand[1].predicate (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
}
else
- target = subtarget;
+ {
+ op1 = copy_to_reg (op1);
+ op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
+ }
- return target;
+ if (!insn_data[icode].operand[2].predicate (op2, mode2))
+ op2 = copy_to_mode_reg (mode2, op2);
+
+ if (!insn_data[icode].operand[3].predicate (op3, mode3))
+ op3 = copy_to_mode_reg (mode3, op3);
+
+ if (!insn_data[icode].operand[4].predicate (op4, mode4))
+ {
+ error ("last argument must be scale 1, 2, 4, 8");
+ return const0_rtx;
+ }
+
+ pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
+ if (! pat)
+ return const0_rtx;
+
+ emit_insn (pat);
+ return 0;
+
+ vec_prefetch_gen:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+ arg3 = CALL_EXPR_ARG (exp, 3);
+ arg4 = CALL_EXPR_ARG (exp, 4);
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ op3 = expand_normal (arg3);
+ op4 = expand_normal (arg4);
+ mode0 = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode3 = insn_data[icode].operand[3].mode;
+ mode4 = insn_data[icode].operand[4].mode;
+
+ if (GET_MODE (op0) == mode0
+ || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
+ {
+ if (!insn_data[icode].operand[0].predicate (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ }
+ else if (op0 != constm1_rtx)
+ {
+ op0 = copy_to_reg (op0);
+ op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
+ }
+
+ if (!insn_data[icode].operand[1].predicate (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ /* Force memory operand only with base register here. But we
+ don't want to do it on memory operand for other builtin
+ functions. */
+ if (GET_MODE (op2) != Pmode)
+ op2 = convert_to_mode (Pmode, op2, 1);
+ op2 = force_reg (Pmode, op2);
+
+ if (!insn_data[icode].operand[2].predicate (op2, Pmode))
+ op2 = copy_to_mode_reg (Pmode, op2);
+
+ if (!insn_data[icode].operand[3].predicate (op3, mode3))
+ {
+ error ("forth argument must be scale 1, 2, 4, 8");
+ return const0_rtx;
+ }
+
+ if (!insn_data[icode].operand[4].predicate (op4, mode4))
+ {
+ error ("last argument must be hint 0 or 1");
+ return const0_rtx;
+ }
+
+ pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
+ if (! pat)
+ return const0_rtx;
+
+ emit_insn (pat);
+
+ return 0;
case IX86_BUILTIN_XABORT:
icode = CODE_FOR_xabort;
@@ -32773,6 +35000,10 @@ addcarryx:
if (d->code == fcode)
return ix86_expand_sse_comi (d, exp, target);
+ for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_round_builtin (d, exp, target);
+
for (i = 0, d = bdesc_pcmpestr;
i < ARRAY_SIZE (bdesc_pcmpestr);
i++, d++)
@@ -32825,6 +35056,8 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
return ix86_builtins[IX86_BUILTIN_SQRTPD];
else if (out_n == 4 && in_n == 4)
return ix86_builtins[IX86_BUILTIN_SQRTPD256];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_SQRTPD512];
}
break;
@@ -32835,6 +35068,8 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
else if (out_n == 8 && in_n == 8)
return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
+ else if (out_n == 16 && in_n == 16)
+ return ix86_builtins[IX86_BUILTIN_SQRTPS_NR512];
}
break;
@@ -32851,6 +35086,8 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX];
else if (out_n == 8 && in_n == 4)
return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256];
+ else if (out_n == 16 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512];
}
break;
@@ -32883,6 +35120,8 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX];
else if (out_n == 8 && in_n == 4)
return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256];
+ else if (out_n == 16 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512];
}
break;
@@ -32939,6 +35178,8 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX];
else if (out_n == 8 && in_n == 4)
return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256];
+ else if (out_n == 16 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512];
}
break;
@@ -32965,6 +35206,8 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
else if (out_n == 4 && in_n == 4)
return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_CPYSGNPD512];
}
break;
@@ -32975,6 +35218,8 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
else if (out_n == 8 && in_n == 8)
return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
+ else if (out_n == 16 && in_n == 16)
+ return ix86_builtins[IX86_BUILTIN_CPYSGNPS512];
}
break;
@@ -33392,24 +35637,48 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
case V4DFmode:
code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
break;
+ case V8DFmode:
+ if (TARGET_AVX512F)
+ code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
+ else
+ return NULL_TREE;
+ break;
case V2DImode:
code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
break;
case V4DImode:
code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
break;
+ case V8DImode:
+ if (TARGET_AVX512F)
+ code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
+ else
+ return NULL_TREE;
+ break;
case V4SFmode:
code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
break;
case V8SFmode:
code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
break;
+ case V16SFmode:
+ if (TARGET_AVX512F)
+ code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
+ else
+ return NULL_TREE;
+ break;
case V4SImode:
code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
break;
case V8SImode:
code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
break;
+ case V16SImode:
+ if (TARGET_AVX512F)
+ code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
+ else
+ return NULL_TREE;
+ break;
default:
return NULL_TREE;
}
@@ -33465,7 +35734,7 @@ avx_vpermilp_parallel (rtx par, enum machine_mode mode)
{
unsigned i, nelt = GET_MODE_NUNITS (mode);
unsigned mask = 0;
- unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
+ unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
if (XVECLEN (par, 0) != (int) nelt)
return 0;
@@ -33488,6 +35757,24 @@ avx_vpermilp_parallel (rtx par, enum machine_mode mode)
switch (mode)
{
+ case V8DFmode:
+ /* In the 512-bit DFmode case, we can only move elements within
+ a 128-bit lane. First fill the second part of the mask,
+ then fallthru. */
+ for (i = 4; i < 6; ++i)
+ {
+ if (ipar[i] < 4 || ipar[i] >= 6)
+ return 0;
+ mask |= (ipar[i] - 4) << i;
+ }
+ for (i = 6; i < 8; ++i)
+ {
+ if (ipar[i] < 6)
+ return 0;
+ mask |= (ipar[i] - 6) << i;
+ }
+ /* FALLTHRU */
+
case V4DFmode:
/* In the 256-bit DFmode case, we can only move elements within
a 128-bit lane. */
@@ -33505,10 +35792,18 @@ avx_vpermilp_parallel (rtx par, enum machine_mode mode)
}
break;
+ case V16SFmode:
+ /* In 512 bit SFmode case, permutation in the upper 256 bits
+ must mirror the permutation in the lower 256-bits. */
+ for (i = 0; i < 8; ++i)
+ if (ipar[i] + 8 != ipar[i + 8])
+ return 0;
+ /* FALLTHRU */
+
case V8SFmode:
- /* In the 256-bit SFmode case, we have full freedom of movement
- within the low 128-bit lane, but the high 128-bit lane must
- mirror the exact same pattern. */
+ /* In 256 bit SFmode case, we have full freedom of
+ movement within the low 128-bit lane, but the high 128-bit
+ lane must mirror the exact same pattern. */
for (i = 0; i < 4; ++i)
if (ipar[i] + 4 != ipar[i + 4])
return 0;
@@ -33729,10 +36024,12 @@ ix86_preferred_reload_class (rtx x, reg_class_t regclass)
return regclass;
/* Force constants into memory if we are loading a (nonzero) constant into
- an MMX or SSE register. This is because there are no MMX/SSE instructions
- to load from a constant. */
+ an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
+ instructions to load from a constant. */
if (CONSTANT_P (x)
- && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
+ && (MAYBE_MMX_CLASS_P (regclass)
+ || MAYBE_SSE_CLASS_P (regclass)
+ || MAYBE_MASK_CLASS_P (regclass)))
return NO_REGS;
/* Prefer SSE regs only, if we can use them for math. */
@@ -33798,7 +36095,7 @@ ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
alternative: if reload cannot do this, it will still use its choice. */
mode = GET_MODE (x);
if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
- return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
+ return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
if (X87_FLOAT_MODE_P (mode))
{
@@ -34035,6 +36332,9 @@ ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
if (MAYBE_FLOAT_CLASS_P (regclass))
return true;
+ if (MAYBE_MASK_CLASS_P (regclass))
+ return true;
+
if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
{
/* Vector registers do not support QI or HImode loads. If we don't
@@ -34265,18 +36565,26 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
return false;
if (STACK_REGNO_P (regno))
return VALID_FP_MODE_P (mode);
+ if (MASK_REGNO_P (regno))
+ return VALID_MASK_REG_MODE (mode);
if (SSE_REGNO_P (regno))
{
/* We implement the move patterns for all vector modes into and
out of SSE registers, even when no operation instructions
- are available. OImode move is available only when AVX is
- enabled. */
- return ((TARGET_AVX && mode == OImode)
- || VALID_AVX256_REG_MODE (mode)
- || VALID_SSE_REG_MODE (mode)
- || VALID_SSE2_REG_MODE (mode)
- || VALID_MMX_REG_MODE (mode)
- || VALID_MMX_REG_MODE_3DNOW (mode));
+ are available. In xmm16-xmm31 we can store only 512 bit
+ modes. OImode move is available only when AVX is enabled.
+ XImode is available only when AVX512F is enabled. */
+ return ((TARGET_AVX512F
+ && (mode == XImode
+ || VALID_AVX512F_REG_MODE (mode)
+ || VALID_AVX512F_SCALAR_MODE (mode)))
+ || (!EXT_REX_SSE_REGNO_P (regno)
+ && ((TARGET_AVX && mode == OImode)
+ || VALID_AVX256_REG_MODE (mode)
+ || VALID_SSE_REG_MODE (mode)
+ || VALID_SSE2_REG_MODE (mode)
+ || VALID_MMX_REG_MODE (mode)
+ || VALID_MMX_REG_MODE_3DNOW (mode))));
}
if (MMX_REGNO_P (regno))
{
@@ -34422,7 +36730,8 @@ ix86_set_reg_reg_cost (enum machine_mode mode)
case MODE_VECTOR_INT:
case MODE_VECTOR_FLOAT:
- if ((TARGET_AVX && VALID_AVX256_REG_MODE (mode))
+ if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
+ || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
|| (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
|| (TARGET_SSE && VALID_SSE_REG_MODE (mode))
|| (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
@@ -34442,6 +36751,7 @@ static bool
ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
bool speed)
{
+ rtx mask;
enum rtx_code code = (enum rtx_code) code_i;
enum rtx_code outer_code = (enum rtx_code) outer_code_i;
enum machine_mode mode = GET_MODE (x);
@@ -34625,7 +36935,7 @@ ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
rtx sub;
gcc_assert (FLOAT_MODE_P (mode));
- gcc_assert (TARGET_FMA || TARGET_FMA4);
+ gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
/* ??? SSE scalar/vector cost should be used here. */
/* ??? Bald assumption that fma has the same cost as fmul. */
@@ -34918,13 +37228,21 @@ ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
case VEC_SELECT:
case VEC_CONCAT:
- case VEC_MERGE:
case VEC_DUPLICATE:
/* ??? Assume all of these vector manipulation patterns are
recognizable. In which case they all pretty much have the
same cost. */
*total = cost->fabs;
return true;
+ case VEC_MERGE:
+ mask = XEXP (x, 2);
+ /* This is masked instruction, assume the same cost,
+ as nonmasked variant. */
+ if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
+ *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
+ else
+ *total = cost->fabs;
+ return true;
default:
return false;
@@ -35054,6 +37372,14 @@ x86_order_regs_for_local_alloc (void)
for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
reg_alloc_order [pos++] = i;
+ /* Extended REX SSE registers. */
+ for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
+ reg_alloc_order [pos++] = i;
+
+ /* Mask register. */
+ for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
+ reg_alloc_order [pos++] = i;
+
/* x87 registers. */
if (TARGET_SSE_MATH)
for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
@@ -36051,9 +38377,9 @@ x86_emit_floatuns (rtx operands[2])
emit_label (donelab);
}
-/* AVX2 does support 32-byte integer vector operations,
- thus the longest vector we are faced with is V32QImode. */
-#define MAX_VECT_LEN 32
+/* AVX512F does support 64-byte integer vector operations,
+ thus the longest vector we are faced with is V64QImode. */
+#define MAX_VECT_LEN 64
struct expand_vec_perm_d
{
@@ -36082,6 +38408,36 @@ get_mode_wider_vector (enum machine_mode o)
return n;
}
+/* A subroutine of ix86_expand_vector_init_duplicate. Tries to
+ fill target with val via vec_duplicate. */
+
+static bool
+ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
+{
+ bool ok;
+ rtx insn, dup;
+
+ /* First attempt to recognize VAL as-is. */
+ dup = gen_rtx_VEC_DUPLICATE (mode, val);
+ insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
+ if (recog_memoized (insn) < 0)
+ {
+ rtx seq;
+ /* If that fails, force VAL into a register. */
+
+ start_sequence ();
+ XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
+ seq = get_insns ();
+ end_sequence ();
+ if (seq)
+ emit_insn_before (seq, insn);
+
+ ok = recog_memoized (insn) >= 0;
+ gcc_assert (ok);
+ }
+ return true;
+}
+
/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
with all elements equal to VAR. Return true if successful. */
@@ -36107,29 +38463,11 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
case V2DImode:
case V4SFmode:
case V4SImode:
- {
- rtx insn, dup;
-
- /* First attempt to recognize VAL as-is. */
- dup = gen_rtx_VEC_DUPLICATE (mode, val);
- insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
- if (recog_memoized (insn) < 0)
- {
- rtx seq;
- /* If that fails, force VAL into a register. */
-
- start_sequence ();
- XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
- seq = get_insns ();
- end_sequence ();
- if (seq)
- emit_insn_before (seq, insn);
-
- ok = recog_memoized (insn) >= 0;
- gcc_assert (ok);
- }
- }
- return true;
+ case V16SImode:
+ case V8DImode:
+ case V16SFmode:
+ case V8DFmode:
+ return ix86_vector_duplicate_value (mode, target, val);
case V4HImode:
if (!mmx_ok)
@@ -36477,8 +38815,8 @@ static void
ix86_expand_vector_init_concat (enum machine_mode mode,
rtx target, rtx *ops, int n)
{
- enum machine_mode cmode, hmode = VOIDmode;
- rtx first[8], second[4];
+ enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
+ rtx first[16], second[8], third[4];
rtvec v;
int i, j;
@@ -36487,6 +38825,18 @@ ix86_expand_vector_init_concat (enum machine_mode mode,
case 2:
switch (mode)
{
+ case V16SImode:
+ cmode = V8SImode;
+ break;
+ case V16SFmode:
+ cmode = V8SFmode;
+ break;
+ case V8DImode:
+ cmode = V4DImode;
+ break;
+ case V8DFmode:
+ cmode = V4DFmode;
+ break;
case V8SImode:
cmode = V4SImode;
break;
@@ -36553,6 +38903,14 @@ ix86_expand_vector_init_concat (enum machine_mode mode,
case 8:
switch (mode)
{
+ case V8DImode:
+ cmode = V2DImode;
+ hmode = V4DImode;
+ break;
+ case V8DFmode:
+ cmode = V2DFmode;
+ hmode = V4DFmode;
+ break;
case V8SImode:
cmode = V2SImode;
hmode = V4SImode;
@@ -36566,6 +38924,24 @@ ix86_expand_vector_init_concat (enum machine_mode mode,
}
goto half;
+ case 16:
+ switch (mode)
+ {
+ case V16SImode:
+ cmode = V2SImode;
+ hmode = V4SImode;
+ gmode = V8SImode;
+ break;
+ case V16SFmode:
+ cmode = V2SFmode;
+ hmode = V4SFmode;
+ gmode = V8SFmode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ goto half;
+
half:
/* FIXME: We process inputs backward to help RA. PR 36222. */
i = n - 1;
@@ -36579,7 +38955,27 @@ half:
}
n >>= 1;
- if (n > 2)
+ if (n > 4)
+ {
+ gcc_assert (hmode != VOIDmode);
+ gcc_assert (gmode != VOIDmode);
+ for (i = j = 0; i < n; i += 2, j++)
+ {
+ second[j] = gen_reg_rtx (hmode);
+ ix86_expand_vector_init_concat (hmode, second [j],
+ &first [i], 2);
+ }
+ n >>= 1;
+ for (i = j = 0; i < n; i += 2, j++)
+ {
+ third[j] = gen_reg_rtx (gmode);
+ ix86_expand_vector_init_concat (gmode, third[j],
+ &second[i], 2);
+ }
+ n >>= 1;
+ ix86_expand_vector_init_concat (mode, target, third, n);
+ }
+ else if (n > 2)
{
gcc_assert (hmode != VOIDmode);
for (i = j = 0; i < n; i += 2, j++)
@@ -36722,7 +39118,7 @@ static void
ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
rtx target, rtx vals)
{
- rtx ops[32], op0, op1;
+ rtx ops[64], op0, op1;
enum machine_mode half_mode = VOIDmode;
int n, i;
@@ -36734,6 +39130,10 @@ ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
break;
/* FALLTHRU */
+ case V16SImode:
+ case V16SFmode:
+ case V8DFmode:
+ case V8DImode:
case V8SFmode:
case V8SImode:
case V4DFmode:
@@ -37358,6 +39758,42 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
}
break;
+ case V16SFmode:
+ tmp = gen_reg_rtx (V8SFmode);
+ if (elt < 8)
+ emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 7);
+ return;
+
+ case V8DFmode:
+ tmp = gen_reg_rtx (V4DFmode);
+ if (elt < 4)
+ emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 3);
+ return;
+
+ case V16SImode:
+ tmp = gen_reg_rtx (V8SImode);
+ if (elt < 8)
+ emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 7);
+ return;
+
+ case V8DImode:
+ tmp = gen_reg_rtx (V4DImode);
+ if (elt < 4)
+ emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 3);
+ return;
+
case V8QImode:
/* ??? Could extract the appropriate HImode element and shift. */
default:
@@ -37445,6 +39881,44 @@ emit_reduc_half (rtx dest, rtx src, int i)
gen_lowpart (V2TImode, src),
GEN_INT (i / 2));
break;
+ case V16SImode:
+ case V16SFmode:
+ case V8DImode:
+ case V8DFmode:
+ if (i > 128)
+ tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
+ gen_lowpart (V16SImode, src),
+ gen_lowpart (V16SImode, src),
+ GEN_INT (0x4 + (i == 512 ? 4 : 0)),
+ GEN_INT (0x5 + (i == 512 ? 4 : 0)),
+ GEN_INT (0x6 + (i == 512 ? 4 : 0)),
+ GEN_INT (0x7 + (i == 512 ? 4 : 0)),
+ GEN_INT (0xC), GEN_INT (0xD),
+ GEN_INT (0xE), GEN_INT (0xF),
+ GEN_INT (0x10), GEN_INT (0x11),
+ GEN_INT (0x12), GEN_INT (0x13),
+ GEN_INT (0x14), GEN_INT (0x15),
+ GEN_INT (0x16), GEN_INT (0x17));
+ else
+ tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
+ gen_lowpart (V16SImode, src),
+ GEN_INT (i == 128 ? 0x2 : 0x1),
+ GEN_INT (0x3),
+ GEN_INT (0x3),
+ GEN_INT (0x3),
+ GEN_INT (i == 128 ? 0x6 : 0x5),
+ GEN_INT (0x7),
+ GEN_INT (0x7),
+ GEN_INT (0x7),
+ GEN_INT (i == 128 ? 0xA : 0x9),
+ GEN_INT (0xB),
+ GEN_INT (0xB),
+ GEN_INT (0xB),
+ GEN_INT (i == 128 ? 0xE : 0xD),
+ GEN_INT (0xF),
+ GEN_INT (0xF),
+ GEN_INT (0xF));
+ break;
default:
gcc_unreachable ();
}
@@ -37507,6 +39981,8 @@ ix86_vector_mode_supported_p (enum machine_mode mode)
return true;
if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
return true;
+ if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
+ return true;
if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
return true;
if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
@@ -37820,9 +40296,15 @@ void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
b = force_reg (mode, b);
/* x0 = rcp(b) estimate */
- emit_insn (gen_rtx_SET (VOIDmode, x0,
- gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
- UNSPEC_RCP)));
+ if (mode == V16SFmode || mode == V8DFmode)
+ emit_insn (gen_rtx_SET (VOIDmode, x0,
+ gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
+ UNSPEC_RCP14)));
+ else
+ emit_insn (gen_rtx_SET (VOIDmode, x0,
+ gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
+ UNSPEC_RCP)));
+
/* e0 = x0 * b */
emit_insn (gen_rtx_SET (VOIDmode, e0,
gen_rtx_MULT (mode, x0, b)));
@@ -37852,6 +40334,7 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
{
rtx x0, e0, e1, e2, e3, mthree, mhalf;
REAL_VALUE_TYPE r;
+ int unspec;
x0 = gen_reg_rtx (mode);
e0 = gen_reg_rtx (mode);
@@ -37864,11 +40347,15 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
+ unspec = UNSPEC_RSQRT;
if (VECTOR_MODE_P (mode))
{
mthree = ix86_build_const_vector (mode, true, mthree);
mhalf = ix86_build_const_vector (mode, true, mhalf);
+ /* There is no 512-bit rsqrt. There is however rsqrt14. */
+ if (GET_MODE_SIZE (mode) == 64)
+ unspec = UNSPEC_RSQRT14;
}
/* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
@@ -37879,7 +40366,7 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
/* x0 = rsqrt(a) estimate */
emit_insn (gen_rtx_SET (VOIDmode, x0,
gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
- UNSPEC_RSQRT)));
+ unspec)));
/* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
if (!recip)
@@ -37890,11 +40377,23 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
mask = gen_reg_rtx (mode);
zero = force_reg (mode, CONST0_RTX(mode));
- emit_insn (gen_rtx_SET (VOIDmode, mask,
- gen_rtx_NE (mode, zero, a)));
- emit_insn (gen_rtx_SET (VOIDmode, x0,
- gen_rtx_AND (mode, x0, mask)));
+ /* Handle masked compare. */
+ if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
+ {
+ mask = gen_reg_rtx (HImode);
+ /* Imm value 0x4 corresponds to not-equal comparison. */
+ emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
+ emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
+ }
+ else
+ {
+ emit_insn (gen_rtx_SET (VOIDmode, mask,
+ gen_rtx_NE (mode, zero, a)));
+
+ emit_insn (gen_rtx_SET (VOIDmode, x0,
+ gen_rtx_AND (mode, x0, mask)));
+ }
}
/* e0 = x0 * a */
@@ -40928,6 +43427,14 @@ ix86_expand_vec_perm_const (rtx operands[4])
gcc_assert (XVECLEN (sel, 0) == nelt);
gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
+ /* vpermi2 takes vector, there is no need to disasemble it. */
+ if (d.vmode == V16SFmode || d.vmode == V16SImode
+ || d.vmode == V8DFmode || d.vmode == V8DImode)
+ {
+ ix86_expand_vec_perm_vpermi2 (d.target, d.op0, sel, d.op1);
+ return true;
+ }
+
for (i = 0; i < nelt; ++i)
{
rtx e = XVECEXP (sel, 0, i);
@@ -40971,6 +43478,10 @@ ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
/* Given sufficient ISA support we can just return true here
for selected vector modes. */
+ if (d.vmode == V16SImode || d.vmode == V16SFmode
+ || d.vmode == V8DFmode || d.vmode == V8DImode)
+ /* All implementable with a single vpermi2 insn. */
+ return true;
if (GET_MODE_SIZE (d.vmode) == 16)
{
/* All implementable with a single vpperm insn. */
@@ -41038,6 +43549,19 @@ ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
d.perm[i] = i * 2 + odd;
/* We'll either be able to implement the permutation directly... */
+ if (d.vmode == V16SFmode || d.vmode == V16SImode
+ || d.vmode == V8DImode || d.vmode == V8DFmode)
+ {
+ rtx vec[64];
+ for (i = 0; i < nelt; ++i)
+ vec[i] = GEN_INT (d.perm[i]);
+ rtx mask = gen_rtx_CONST_VECTOR (d.vmode, gen_rtvec_v (nelt, vec));
+ rtx operands[4] = {targ, op0, op1, mask};
+ if (ix86_expand_vec_perm_const (operands))
+ return;
+ else
+ gcc_unreachable ();
+ }
if (expand_vec_perm_1 (&d))
return;
@@ -41213,7 +43737,7 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
op2 = force_reg (mode, op2);
/* We only play even/odd games with vectors of SImode. */
- gcc_assert (mode == V4SImode || mode == V8SImode);
+ gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
/* If we're looking for the odd results, shift those members down to
the even slots. For some cpus this is faster than a PSHUFD. */
@@ -41239,7 +43763,14 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
op2 = gen_lowpart (mode, op2);
}
- if (mode == V8SImode)
+ if (mode == V16SImode)
+ {
+ if (uns_p)
+ x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
+ else
+ x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
+ }
+ else if (mode == V8SImode)
{
if (uns_p)
x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
@@ -41455,6 +43986,11 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
umul = gen_vec_widen_umult_even_v8si;
nmode = V8SImode;
}
+ else if (mode == V8DImode)
+ {
+ umul = gen_vec_widen_umult_even_v16si;
+ nmode = V16SImode;
+ }
else
gcc_unreachable ();
@@ -42546,12 +45082,16 @@ ix86_preferred_simd_mode (enum machine_mode mode)
case HImode:
return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
case SImode:
- return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
+ return TARGET_AVX512F ? V16SImode :
+ (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
case DImode:
- return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
+ return TARGET_AVX512F ? V8DImode :
+ (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
case SFmode:
- if (TARGET_AVX && !TARGET_PREFER_AVX128)
+ if (TARGET_AVX512F)
+ return V16SFmode;
+ else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V8SFmode;
else
return V4SFmode;
@@ -42559,6 +45099,8 @@ ix86_preferred_simd_mode (enum machine_mode mode)
case DFmode:
if (!TARGET_VECTORIZE_DOUBLE)
return word_mode;
+ else if (TARGET_AVX512F)
+ return V8DFmode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V4DFmode;
else if (TARGET_SSE2)
@@ -42571,12 +45113,14 @@ ix86_preferred_simd_mode (enum machine_mode mode)
}
/* If AVX is enabled then try vectorizing with both 256bit and 128bit
- vectors. */
+ vectors. If AVX512F is enabled then try vectorizing with 512bit,
+ 256bit and 128bit vectors. */
static unsigned int
ix86_autovectorize_vector_sizes (void)
{
- return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
+ return TARGET_AVX512F ? 64 | 32 | 16 :
+ (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
}
@@ -42590,7 +45134,7 @@ ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
&& (mode == SImode || (TARGET_64BIT && mode == DImode))
&& INTEGER_CLASS_P (rclass))
- return SSE_REGS;
+ return ALL_SSE_REGS;
return NO_REGS;
}
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 7d940f98804..d7a934df1ba 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -51,6 +51,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_SSE4_2 TARGET_ISA_SSE4_2
#define TARGET_AVX TARGET_ISA_AVX
#define TARGET_AVX2 TARGET_ISA_AVX2
+#define TARGET_AVX512F TARGET_ISA_AVX512F
+#define TARGET_AVX512PF TARGET_ISA_AVX512PF
+#define TARGET_AVX512ER TARGET_ISA_AVX512ER
+#define TARGET_AVX512CD TARGET_ISA_AVX512CD
#define TARGET_FMA TARGET_ISA_FMA
#define TARGET_SSE4A TARGET_ISA_SSE4A
#define TARGET_FMA4 TARGET_ISA_FMA4
@@ -67,6 +71,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_MOVBE TARGET_ISA_MOVBE
#define TARGET_CRC32 TARGET_ISA_CRC32
#define TARGET_AES TARGET_ISA_AES
+#define TARGET_SHA TARGET_ISA_SHA
#define TARGET_PCLMUL TARGET_ISA_PCLMUL
#define TARGET_CMPXCHG16B TARGET_ISA_CX16
#define TARGET_FSGSBASE TARGET_ISA_FSGSBASE
@@ -802,7 +807,8 @@ enum target_cpu_default
Pentium+ prefers DFmode values to be aligned to 64 bit boundary
and Pentium Pro XFmode values at 128 bit boundaries. */
-#define BIGGEST_ALIGNMENT (TARGET_AVX ? 256 : 128)
+#define BIGGEST_ALIGNMENT \
+ (TARGET_AVX512F ? 512 : (TARGET_AVX ? 256 : 128))
/* Maximum stack alignment. */
#define MAX_STACK_ALIGNMENT MAX_OFILE_ALIGNMENT
@@ -958,7 +964,7 @@ enum target_cpu_default
eliminated during reloading in favor of either the stack or frame
pointer. */
-#define FIRST_PSEUDO_REGISTER 53
+#define FIRST_PSEUDO_REGISTER 77
/* Number of hardware registers that go into the DWARF-2 unwind info.
If not defined, equals FIRST_PSEUDO_REGISTER. */
@@ -984,7 +990,13 @@ enum target_cpu_default
/* r8, r9, r10, r11, r12, r13, r14, r15*/ \
0, 0, 0, 0, 0, 0, 0, 0, \
/*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/ \
- 0, 0, 0, 0, 0, 0, 0, 0 }
+ 0, 0, 0, 0, 0, 0, 0, 0, \
+/*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/ \
+ 0, 0, 0, 0, 0, 0, 0, 0, \
+/*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/ \
+ 0, 0, 0, 0, 0, 0, 0, 0, \
+/* k0, k1, k2, k3, k4, k5, k6, k7*/ \
+ 0, 0, 0, 0, 0, 0, 0, 0 }
/* 1 for registers not available across function calls.
These must include the FIXED_REGISTERS and also any
@@ -1012,7 +1024,13 @@ enum target_cpu_default
/* r8, r9, r10, r11, r12, r13, r14, r15*/ \
1, 1, 1, 1, 2, 2, 2, 2, \
/*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/ \
- 6, 6, 6, 6, 6, 6, 6, 6 }
+ 6, 6, 6, 6, 6, 6, 6, 6, \
+/*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/ \
+ 6, 6, 6, 6, 6, 6, 6, 6, \
+/*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/ \
+ 6, 6, 6, 6, 6, 6, 6, 6, \
+ /* k0, k1, k2, k3, k4, k5, k6, k7*/ \
+ 1, 1, 1, 1, 1, 1, 1, 1 }
/* Order in which to allocate registers. Each register must be
listed once, even those in FIXED_REGISTERS. List frame pointer
@@ -1027,7 +1045,8 @@ enum target_cpu_default
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, \
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \
- 48, 49, 50, 51, 52 }
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, \
+ 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76 }
/* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order
to be rearranged based on a particular function. When using sse math,
@@ -1073,6 +1092,14 @@ enum target_cpu_default
#define VALID_AVX256_REG_OR_OI_MODE(MODE) \
(VALID_AVX256_REG_MODE (MODE) || (MODE) == OImode)
+#define VALID_AVX512F_SCALAR_MODE(MODE) \
+ ((MODE) == DImode || (MODE) == DFmode || (MODE) == SImode \
+ || (MODE) == SFmode)
+
+#define VALID_AVX512F_REG_MODE(MODE) \
+ ((MODE) == V8DImode || (MODE) == V8DFmode || (MODE) == V64QImode \
+ || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode)
+
#define VALID_SSE2_REG_MODE(MODE) \
((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
|| (MODE) == V2DImode || (MODE) == DFmode)
@@ -1112,7 +1139,11 @@ enum target_cpu_default
|| (MODE) == V2DImode || (MODE) == V4SFmode || (MODE) == V4SImode \
|| (MODE) == V32QImode || (MODE) == V16HImode || (MODE) == V8SImode \
|| (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode \
- || (MODE) == V2TImode)
+ || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode \
+ || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode \
+ || (MODE) == V16SFmode)
+
+#define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode)
/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */
@@ -1175,15 +1206,21 @@ enum target_cpu_default
#define FIRST_SSE_REG (FRAME_POINTER_REGNUM + 1)
#define LAST_SSE_REG (FIRST_SSE_REG + 7)
-#define FIRST_MMX_REG (LAST_SSE_REG + 1)
+#define FIRST_MMX_REG (LAST_SSE_REG + 1) /*29*/
#define LAST_MMX_REG (FIRST_MMX_REG + 7)
-#define FIRST_REX_INT_REG (LAST_MMX_REG + 1)
+#define FIRST_REX_INT_REG (LAST_MMX_REG + 1) /*37*/
#define LAST_REX_INT_REG (FIRST_REX_INT_REG + 7)
-#define FIRST_REX_SSE_REG (LAST_REX_INT_REG + 1)
+#define FIRST_REX_SSE_REG (LAST_REX_INT_REG + 1) /*45*/
#define LAST_REX_SSE_REG (FIRST_REX_SSE_REG + 7)
+#define FIRST_EXT_REX_SSE_REG (LAST_REX_SSE_REG + 1) /*53*/
+#define LAST_EXT_REX_SSE_REG (FIRST_EXT_REX_SSE_REG + 15) /*68*/
+
+#define FIRST_MASK_REG (LAST_EXT_REX_SSE_REG + 1) /*69*/
+#define LAST_MASK_REG (FIRST_MASK_REG + 7) /*76*/
+
/* Override this in other tm.h files to cope with various OS lossage
requiring a frame pointer. */
#ifndef SUBTARGET_FRAME_POINTER_REQUIRED
@@ -1263,6 +1300,8 @@ enum reg_class
FLOAT_REGS,
SSE_FIRST_REG,
SSE_REGS,
+ EVEX_SSE_REGS,
+ ALL_SSE_REGS,
MMX_REGS,
FP_TOP_SSE_REGS,
FP_SECOND_SSE_REGS,
@@ -1270,6 +1309,8 @@ enum reg_class
FLOAT_INT_REGS,
INT_SSE_REGS,
FLOAT_INT_SSE_REGS,
+ MASK_EVEX_REGS,
+ MASK_REGS,
ALL_REGS, LIM_REG_CLASSES
};
@@ -1280,7 +1321,7 @@ enum reg_class
#define FLOAT_CLASS_P(CLASS) \
reg_class_subset_p ((CLASS), FLOAT_REGS)
#define SSE_CLASS_P(CLASS) \
- reg_class_subset_p ((CLASS), SSE_REGS)
+ reg_class_subset_p ((CLASS), ALL_SSE_REGS)
#define MMX_CLASS_P(CLASS) \
((CLASS) == MMX_REGS)
#define MAYBE_INTEGER_CLASS_P(CLASS) \
@@ -1288,9 +1329,11 @@ enum reg_class
#define MAYBE_FLOAT_CLASS_P(CLASS) \
reg_classes_intersect_p ((CLASS), FLOAT_REGS)
#define MAYBE_SSE_CLASS_P(CLASS) \
- reg_classes_intersect_p (SSE_REGS, (CLASS))
+ reg_classes_intersect_p (ALL_SSE_REGS, (CLASS))
#define MAYBE_MMX_CLASS_P(CLASS) \
reg_classes_intersect_p (MMX_REGS, (CLASS))
+#define MAYBE_MASK_CLASS_P(CLASS) \
+ reg_classes_intersect_p (MASK_REGS, (CLASS))
#define Q_CLASS_P(CLASS) \
reg_class_subset_p ((CLASS), Q_REGS)
@@ -1311,6 +1354,8 @@ enum reg_class
"FLOAT_REGS", \
"SSE_FIRST_REG", \
"SSE_REGS", \
+ "EVEX_SSE_REGS", \
+ "ALL_SSE_REGS", \
"MMX_REGS", \
"FP_TOP_SSE_REGS", \
"FP_SECOND_SSE_REGS", \
@@ -1318,6 +1363,8 @@ enum reg_class
"FLOAT_INT_REGS", \
"INT_SSE_REGS", \
"FLOAT_INT_SSE_REGS", \
+ "MASK_EVEX_REGS", \
+ "MASK_REGS", \
"ALL_REGS" }
/* Define which registers fit in which classes. This is an initializer
@@ -1326,30 +1373,38 @@ enum reg_class
Note that CLOBBERED_REGS are calculated by
TARGET_CONDITIONAL_REGISTER_USAGE. */
-#define REG_CLASS_CONTENTS \
-{ { 0x00, 0x0 }, \
- { 0x01, 0x0 }, { 0x02, 0x0 }, /* AREG, DREG */ \
- { 0x04, 0x0 }, { 0x08, 0x0 }, /* CREG, BREG */ \
- { 0x10, 0x0 }, { 0x20, 0x0 }, /* SIREG, DIREG */ \
- { 0x03, 0x0 }, /* AD_REGS */ \
- { 0x0f, 0x0 }, /* Q_REGS */ \
- { 0x1100f0, 0x1fe0 }, /* NON_Q_REGS */ \
- { 0x7f, 0x1fe0 }, /* INDEX_REGS */ \
- { 0x1100ff, 0x0 }, /* LEGACY_REGS */ \
- { 0x00, 0x0 }, /* CLOBBERED_REGS */ \
- { 0x1100ff, 0x1fe0 }, /* GENERAL_REGS */ \
- { 0x100, 0x0 }, { 0x0200, 0x0 },/* FP_TOP_REG, FP_SECOND_REG */\
- { 0xff00, 0x0 }, /* FLOAT_REGS */ \
- { 0x200000, 0x0 }, /* SSE_FIRST_REG */ \
-{ 0x1fe00000,0x1fe000 }, /* SSE_REGS */ \
-{ 0xe0000000, 0x1f }, /* MMX_REGS */ \
-{ 0x1fe00100,0x1fe000 }, /* FP_TOP_SSE_REG */ \
-{ 0x1fe00200,0x1fe000 }, /* FP_SECOND_SSE_REG */ \
-{ 0x1fe0ff00,0x1fe000 }, /* FLOAT_SSE_REGS */ \
- { 0x11ffff, 0x1fe0 }, /* FLOAT_INT_REGS */ \
-{ 0x1ff100ff,0x1fffe0 }, /* INT_SSE_REGS */ \
-{ 0x1ff1ffff,0x1fffe0 }, /* FLOAT_INT_SSE_REGS */ \
-{ 0xffffffff,0x1fffff } \
+#define REG_CLASS_CONTENTS \
+{ { 0x00, 0x0, 0x0 }, \
+ { 0x01, 0x0, 0x0 }, /* AREG */ \
+ { 0x02, 0x0, 0x0 }, /* DREG */ \
+ { 0x04, 0x0, 0x0 }, /* CREG */ \
+ { 0x08, 0x0, 0x0 }, /* BREG */ \
+ { 0x10, 0x0, 0x0 }, /* SIREG */ \
+ { 0x20, 0x0, 0x0 }, /* DIREG */ \
+ { 0x03, 0x0, 0x0 }, /* AD_REGS */ \
+ { 0x0f, 0x0, 0x0 }, /* Q_REGS */ \
+ { 0x1100f0, 0x1fe0, 0x0 }, /* NON_Q_REGS */ \
+ { 0x7f, 0x1fe0, 0x0 }, /* INDEX_REGS */ \
+ { 0x1100ff, 0x0, 0x0 }, /* LEGACY_REGS */ \
+ { 0x07, 0x0, 0x0 }, /* CLOBBERED_REGS */ \
+ { 0x1100ff, 0x1fe0, 0x0 }, /* GENERAL_REGS */ \
+ { 0x100, 0x0, 0x0 }, /* FP_TOP_REG */ \
+ { 0x0200, 0x0, 0x0 }, /* FP_SECOND_REG */ \
+ { 0xff00, 0x0, 0x0 }, /* FLOAT_REGS */ \
+ { 0x200000, 0x0, 0x0 }, /* SSE_FIRST_REG */ \
+{ 0x1fe00000, 0x1fe000, 0x0 }, /* SSE_REGS */ \
+ { 0x0,0xffe00000, 0x1f }, /* EVEX_SSE_REGS */ \
+{ 0x1fe00000,0xffffe000, 0x1f }, /* ALL_SSE_REGS */ \
+{ 0xe0000000, 0x1f, 0x0 }, /* MMX_REGS */ \
+{ 0x1fe00100,0xffffe000, 0x1f }, /* FP_TOP_SSE_REG */ \
+{ 0x1fe00200,0xffffe000, 0x1f }, /* FP_SECOND_SSE_REG */ \
+{ 0x1fe0ff00,0xffffe000, 0x1f }, /* FLOAT_SSE_REGS */ \
+{ 0x11ffff, 0x1fe0, 0x0 }, /* FLOAT_INT_REGS */ \
+{ 0x1ff100ff,0xffffffe0, 0x1f }, /* INT_SSE_REGS */ \
+{ 0x1ff1ffff,0xffffffe0, 0x1f }, /* FLOAT_INT_SSE_REGS */ \
+ { 0x0, 0x0,0x1fc0 }, /* MASK_EVEX_REGS */ \
+ { 0x0, 0x0,0x1fe0 }, /* MASK_REGS */ \
+{ 0xffffffff,0xffffffff,0x1fff } \
}
/* The same information, inverted:
@@ -1393,13 +1448,22 @@ enum reg_class
#define SSE_REG_P(X) (REG_P (X) && SSE_REGNO_P (REGNO (X)))
#define SSE_REGNO_P(N) \
(IN_RANGE ((N), FIRST_SSE_REG, LAST_SSE_REG) \
- || REX_SSE_REGNO_P (N))
+ || REX_SSE_REGNO_P (N) \
+ || EXT_REX_SSE_REGNO_P (N))
#define REX_SSE_REGNO_P(N) \
IN_RANGE ((N), FIRST_REX_SSE_REG, LAST_REX_SSE_REG)
+#define EXT_REX_SSE_REGNO_P(N) \
+ IN_RANGE ((N), FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG)
+
#define SSE_REGNO(N) \
- ((N) < 8 ? FIRST_SSE_REG + (N) : FIRST_REX_SSE_REG + (N) - 8)
+ ((N) < 8 ? FIRST_SSE_REG + (N) \
+ : (N) <= LAST_REX_SSE_REG ? (FIRST_REX_SSE_REG + (N) - 8) \
+ : (FIRST_EXT_REX_SSE_REG + (N) - 16))
+
+#define MASK_REGNO_P(N) IN_RANGE ((N), FIRST_MASK_REG, LAST_MASK_REG)
+#define ANY_MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X)))
#define SSE_FLOAT_MODE_P(MODE) \
((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
@@ -1452,10 +1516,10 @@ enum reg_class
/* Get_secondary_mem widens integral modes to BITS_PER_WORD.
There is no need to emit full 64 bit move on 64 bit targets
- for integral modes that can be moved using 32 bit move. */
+ for integral modes that can be moved using 8 bit move. */
#define SECONDARY_MEMORY_NEEDED_MODE(MODE) \
- (GET_MODE_BITSIZE (MODE) < 32 && INTEGRAL_MODE_P (MODE) \
- ? mode_for_size (32, GET_MODE_CLASS (MODE), 0) \
+ (GET_MODE_BITSIZE (MODE) < 8 && INTEGRAL_MODE_P (MODE) \
+ ? mode_for_size (8, GET_MODE_CLASS (MODE), 0) \
: MODE)
/* Return a class of registers that cannot change FROM mode to TO mode. */
@@ -1952,7 +2016,12 @@ do { \
"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7", \
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", \
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \
- "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"}
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", \
+ "xmm16", "xmm17", "xmm18", "xmm19", \
+ "xmm20", "xmm21", "xmm22", "xmm23", \
+ "xmm24", "xmm25", "xmm26", "xmm27", \
+ "xmm28", "xmm29", "xmm30", "xmm31", \
+ "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7" }
#define REGISTER_NAMES HI_REGISTER_NAMES
@@ -2273,9 +2342,13 @@ enum avx_u128_state
scheduling just increases amount of live registers at time and in
the turn amount of fxch instructions needed.
- ??? Maybe Pentium chips benefits from renaming, someone can try.... */
+ ??? Maybe Pentium chips benefits from renaming, someone can try....
+
+ Don't rename evex to non-evex sse registers. */
-#define HARD_REGNO_RENAME_OK(SRC, TARGET) !STACK_REGNO_P (SRC)
+#define HARD_REGNO_RENAME_OK(SRC, TARGET) (!STACK_REGNO_P (SRC) && \
+ (EXT_REX_SSE_REGNO_P (SRC) == \
+ EXT_REX_SSE_REGNO_P (TARGET)))
#define FASTCALL_PREFIX '@'
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c67ed31923e..fd27d295505 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -178,6 +178,12 @@
;; For BMI2 support
UNSPEC_PDEP
UNSPEC_PEXT
+
+ ;; For AVX512F mask support
+ UNSPEC_KIOR
+ UNSPEC_KXOR
+ UNSPEC_KAND
+ UNSPEC_KANDN
])
(define_c_enum "unspecv" [
@@ -235,6 +241,16 @@
(ROUND_NO_EXC 0x8)
])
+;; Constants to represent AVX512F embeded rounding
+(define_constants
+ [(ROUND_NEAREST_INT 0)
+ (ROUND_NEG_INF 1)
+ (ROUND_POS_INF 2)
+ (ROUND_ZERO 3)
+ (NO_ROUND 4)
+ (ROUND_SAE 5)
+ ])
+
;; Constants to represent pcomtrue/pcomfalse variants
(define_constants
[(PCOM_FALSE 0)
@@ -312,6 +328,30 @@
(XMM13_REG 50)
(XMM14_REG 51)
(XMM15_REG 52)
+ (XMM16_REG 53)
+ (XMM17_REG 54)
+ (XMM18_REG 55)
+ (XMM19_REG 56)
+ (XMM20_REG 57)
+ (XMM21_REG 58)
+ (XMM22_REG 59)
+ (XMM23_REG 60)
+ (XMM24_REG 61)
+ (XMM25_REG 62)
+ (XMM26_REG 63)
+ (XMM27_REG 64)
+ (XMM28_REG 65)
+ (XMM29_REG 66)
+ (XMM30_REG 67)
+ (XMM31_REG 68)
+ (MASK0_REG 69)
+ (MASK1_REG 70)
+ (MASK2_REG 71)
+ (MASK3_REG 72)
+ (MASK4_REG 73)
+ (MASK5_REG 74)
+ (MASK6_REG 75)
+ (MASK7_REG 76)
])
;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
@@ -344,13 +384,14 @@
sseishft,sseishft1,ssecmp,ssecomi,
ssecvt,ssecvt1,sseicvt,sseins,
sseshuf,sseshuf1,ssemuladd,sse4arg,
- lwp,
+ lwp,mskmov,msklog,
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
(const_string "other"))
;; Main data type used by the insn
(define_attr "mode"
- "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF"
+ "unknown,none,QI,HI,SI,DI,TI,OI,XI,SF,DF,XF,TF,V16SF,V8SF,V4DF,V4SF,
+ V2DF,V2SF,V1DF,V8DF"
(const_string "unknown"))
;; The CPU unit operations uses.
@@ -362,7 +403,7 @@
ssemul,sseimul,ssediv,sselog,sselog1,
sseishft,sseishft1,ssecmp,ssecomi,
ssecvt,ssecvt1,sseicvt,sseins,
- sseshuf,sseshuf1,ssemuladd,sse4arg")
+ sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov")
(const_string "sse")
(eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
(const_string "mmx")
@@ -373,7 +414,7 @@
;; The (bounding maximum) length of an instruction immediate.
(define_attr "length_immediate" ""
(cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
- bitmanip,imulx")
+ bitmanip,imulx,msklog,mskmov")
(const_int 0)
(eq_attr "unit" "i387,sse,mmx")
(const_int 0)
@@ -471,10 +512,13 @@
(const_int 0)))
;; Prefix used: original, VEX or maybe VEX.
-(define_attr "prefix" "orig,vex,maybe_vex"
- (if_then_else (eq_attr "mode" "OI,V8SF,V4DF")
- (const_string "vex")
- (const_string "orig")))
+(define_attr "prefix" "orig,vex,maybe_vex,evex,maybe_evex"
+ (cond [(eq_attr "mode" "OI,V8SF,V4DF")
+ (const_string "vex")
+ (eq_attr "mode" "XI,V16SF,V8DF")
+ (const_string "evex")
+ ]
+ (const_string "orig")))
;; VEX W bit is used.
(define_attr "prefix_vex_w" "" (const_int 0))
@@ -493,6 +537,9 @@
(symbol_ref "ix86_attr_length_vex_default (insn, false, true)")
(symbol_ref "ix86_attr_length_vex_default (insn, false, false)"))))
+;; 4-bytes evex prefix and 1 byte opcode.
+(define_attr "length_evex" "" (const_int 5))
+
;; Set when modrm byte is used.
(define_attr "modrm" ""
(cond [(eq_attr "type" "str,leave")
@@ -544,8 +591,17 @@
(plus (const_int 2)
(plus (attr "prefix_data16")
(attr "length_address")))
+ (ior (eq_attr "prefix" "evex")
+ (and (ior (eq_attr "prefix" "maybe_evex")
+ (eq_attr "prefix" "maybe_vex"))
+ (match_test "TARGET_AVX512F")))
+ (plus (attr "length_evex")
+ (plus (attr "length_immediate")
+ (plus (attr "modrm")
+ (attr "length_address"))))
(ior (eq_attr "prefix" "vex")
- (and (eq_attr "prefix" "maybe_vex")
+ (and (ior (eq_attr "prefix" "maybe_vex")
+ (eq_attr "prefix" "maybe_evex"))
(match_test "TARGET_AVX")))
(plus (attr "length_vex")
(plus (attr "length_immediate")
@@ -619,7 +675,7 @@
fmov,fcmp,fsgn,
sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1,
- mmx,mmxmov,mmxcmp,mmxcvt")
+ mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog")
(match_operand 2 "memory_operand"))
(const_string "load")
(and (eq_attr "type" "icmov,ssemuladd,sse4arg")
@@ -663,7 +719,7 @@
;; Used to control the "enabled" attribute on a per-instruction basis.
(define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
- avx2,noavx2,bmi2,fma4,fma"
+ avx2,noavx2,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f"
(const_string "base"))
(define_attr "enabled" ""
@@ -689,6 +745,10 @@
(eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
(eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4")
(eq_attr "isa" "fma") (symbol_ref "TARGET_FMA")
+ (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F")
+ (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
+ (eq_attr "isa" "fma_avx512f")
+ (symbol_ref "TARGET_FMA || TARGET_AVX512F")
]
(const_int 1)))
@@ -701,6 +761,8 @@
(define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
+(define_code_iterator multdiv [mult div])
+
;; Base name for define_insn
(define_code_attr plusminus_insn
[(plus "add") (ss_plus "ssadd") (us_plus "usadd")
@@ -712,6 +774,8 @@
(minus "sub") (ss_minus "subs") (us_minus "subus")])
(define_code_attr plusminus_carry_mnemonic
[(plus "adc") (minus "sbb")])
+(define_code_attr multdiv_mnemonic
+ [(mult "mul") (div "div")])
;; Mark commutative operators as such in constraints.
(define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
@@ -734,6 +798,7 @@
;; Mapping of logic operators
(define_code_iterator any_logic [and ior xor])
(define_code_iterator any_or [ior xor])
+(define_code_iterator fpint_logic [and xor])
;; Base name for insn mnemonic.
(define_code_attr logic [(and "and") (ior "or") (xor "xor")])
@@ -781,6 +846,15 @@
(define_code_attr s [(sign_extend "s") (zero_extend "u")])
(define_code_attr u_bool [(sign_extend "false") (zero_extend "true")])
+;; Used in signed and unsigned truncations.
+(define_code_iterator any_truncate [ss_truncate truncate us_truncate])
+;; Instruction suffix for truncations.
+(define_code_attr trunsuffix [(ss_truncate "s") (truncate "") (us_truncate "us")])
+
+;; Used in signed and unsigned fix.
+(define_code_iterator any_fix [fix unsigned_fix])
+(define_code_attr fixsuffix [(fix "") (unsigned_fix "u")])
+
;; All integer modes.
(define_mode_iterator SWI1248x [QI HI SI DI])
@@ -924,10 +998,12 @@
;; SSE instruction suffix for various modes
(define_mode_attr ssemodesuffix
[(SF "ss") (DF "sd")
+ (V16SF "ps") (V8DF "pd")
(V8SF "ps") (V4DF "pd")
(V4SF "ps") (V2DF "pd")
(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
- (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")])
+ (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")
+ (V64QI "b") (V16SI "d") (V8DI "q")])
;; SSE vector suffix for floating point modes
(define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")])
@@ -1649,6 +1725,12 @@
;; Move instructions.
+(define_expand "movxi"
+ [(set (match_operand:XI 0 "nonimmediate_operand")
+ (match_operand:XI 1 "general_operand"))]
+ "TARGET_AVX512F"
+ "ix86_expand_move (XImode, operands); DONE;")
+
;; Reload patterns to support multi-word load/store
;; with non-offsetable address.
(define_expand "reload_noff_store"
@@ -1746,6 +1828,26 @@
(set_attr "mode" "<MODE>")
(set_attr "length_immediate" "1")])
+(define_insn "*movxi_internal_avx512f"
+ [(set (match_operand:XI 0 "nonimmediate_operand" "=x,x ,m")
+ (match_operand:XI 1 "vector_move_operand" "C ,xm,x"))]
+ "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return standard_sse_constant_opcode (insn, operands[1]);
+ case 1:
+ case 2:
+ return "vmovdqu32\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sselog1,ssemov,ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_insn "*movoi_internal_avx"
[(set (match_operand:OI 0 "nonimmediate_operand" "=x,x ,m")
(match_operand:OI 1 "vector_move_operand" "C ,xm,x"))]
@@ -1857,9 +1959,9 @@
(define_insn "*movdi_internal"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*x,*x,*x,m ,?r ,?r,?*Yi,?*Ym,?*Yi")
+ "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi")
(match_operand:DI 1 "general_operand"
- "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*x,m ,*x,*Yj,*x,r ,*Yj ,*Yn"))]
+ "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -1896,6 +1998,8 @@
return "%vmovq\t{%1, %0|%0, %1}";
case MODE_TI:
return "%vmovdqa\t{%1, %0|%0, %1}";
+ case MODE_XI:
+ return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
case MODE_V2SF:
gcc_assert (!TARGET_AVX);
@@ -1989,7 +2093,9 @@
(cond [(eq_attr "alternative" "2")
(const_string "SI")
(eq_attr "alternative" "12,13")
- (cond [(ior (not (match_test "TARGET_SSE2"))
+ (cond [(match_test "TARGET_AVX512F")
+ (const_string "XI")
+ (ior (not (match_test "TARGET_SSE2"))
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
(const_string "V4SF")
(match_test "TARGET_AVX")
@@ -2018,9 +2124,9 @@
(define_insn "*movsi_internal"
[(set (match_operand:SI 0 "nonimmediate_operand"
- "=r,m ,*y,*y,?rm,?*y,*x,*x,*x,m ,?r ,?r,?*Yi")
+ "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi")
(match_operand:SI 1 "general_operand"
- "g ,re,C ,*y,*y ,rm ,C ,*x,m ,*x,*Yj,*x,r"))]
+ "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2038,6 +2144,8 @@
return "%vmovd\t{%1, %0|%0, %1}";
case MODE_TI:
return "%vmovdqa\t{%1, %0|%0, %1}";
+ case MODE_XI:
+ return "vmovdqa32\t{%g1, %g0|%g0, %g1}";
case MODE_V4SF:
return "%vmovaps\t{%1, %0|%0, %1}";
@@ -2116,7 +2224,9 @@
(cond [(eq_attr "alternative" "2,3")
(const_string "DI")
(eq_attr "alternative" "6,7")
- (cond [(ior (not (match_test "TARGET_SSE2"))
+ (cond [(match_test "TARGET_AVX512F")
+ (const_string "XI")
+ (ior (not (match_test "TARGET_SSE2"))
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
(const_string "V4SF")
(match_test "TARGET_AVX")
@@ -2135,8 +2245,8 @@
(const_string "SI")))])
(define_insn "*movhi_internal"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m")
- (match_operand:HI 1 "general_operand" "r ,rn,rm,rn"))]
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,Yk,rm")
+ (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,Ykrm,Yk"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2145,6 +2255,10 @@
/* movzwl is faster than movw on p2 due to partial word stalls,
though not as fast as an aligned movl. */
return "movz{wl|x}\t{%1, %k0|%k0, %1}";
+
+ case TYPE_MSKMOV:
+ return "kmovw\t{%k1, %k0|%k0, %k1}";
+
default:
if (get_attr_mode (insn) == MODE_SI)
return "mov{l}\t{%k1, %k0|%k0, %k1}";
@@ -2162,11 +2276,17 @@
(and (eq_attr "alternative" "1,2")
(match_operand:HI 1 "aligned_operand"))
(const_string "imov")
+ (eq_attr "alternative" "4,5")
+ (const_string "mskmov")
(and (match_test "TARGET_MOVX")
(eq_attr "alternative" "0,2"))
(const_string "imovx")
]
(const_string "imov")))
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "4,5")
+ (const_string "vex")
+ (const_string "orig")))
(set (attr "mode")
(cond [(eq_attr "type" "imovx")
(const_string "SI")
@@ -2191,8 +2311,8 @@
;; register stall machines with, where we use QImode instructions, since
;; partial register stall can be caused there. Then we use movzx.
(define_insn "*movqi_internal"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m")
- (match_operand:QI 1 "general_operand" "q ,qn,qm,q,rn,qm,qn"))]
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m ,Yk,r")
+ (match_operand:QI 1 "general_operand" "q ,qn,qm,q,rn,qm,qn,Ykrm,Yk"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2200,6 +2320,10 @@
case TYPE_IMOVX:
gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
return "movz{bl|x}\t{%1, %k0|%k0, %1}";
+
+ case TYPE_MSKMOV:
+ return "kmovw\t{%k1, %k0|%k0, %k1}";
+
default:
if (get_attr_mode (insn) == MODE_SI)
return "mov{l}\t{%k1, %k0|%k0, %k1}";
@@ -2219,11 +2343,17 @@
(const_string "imov")
(eq_attr "alternative" "3,5")
(const_string "imovx")
+ (eq_attr "alternative" "7,8")
+ (const_string "mskmov")
(and (match_test "TARGET_MOVX")
(eq_attr "alternative" "2"))
(const_string "imovx")
]
(const_string "imov")))
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "7,8")
+ (const_string "vex")
+ (const_string "orig")))
(set (attr "mode")
(cond [(eq_attr "alternative" "3,4,5")
(const_string "SI")
@@ -2703,9 +2833,9 @@
;; Possible store forwarding (partial memory) stall in alternative 4.
(define_insn "*movdf_internal"
[(set (match_operand:DF 0 "nonimmediate_operand"
- "=Yf*f,m ,Yf*f,?Yd*r ,!o ,?r,?m,?r,?r,x,x,x,m,*x,*x,*x,m ,r ,Yi")
+ "=Yf*f,m ,Yf*f,?Yd*r ,!o ,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,Yi")
(match_operand:DF 1 "general_operand"
- "Yf*fm,Yf*f,G ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,x,m,x,C ,*x,m ,*x,Yj,r"))]
+ "Yf*fm,Yf*f,G ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,Yj,r"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (!can_create_pseudo_p ()
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
@@ -2750,6 +2880,8 @@
case MODE_V4SF:
return "%vmovaps\t{%1, %0|%0, %1}";
+ case MODE_V8DF:
+ return "vmovapd\t{%g1, %g0|%g0, %g1}";
case MODE_V2DF:
return "%vmovapd\t{%1, %0|%0, %1}";
@@ -2824,6 +2956,8 @@
(eq_attr "alternative" "9,13")
(cond [(not (match_test "TARGET_SSE2"))
(const_string "V4SF")
+ (match_test "TARGET_AVX512F")
+ (const_string "XI")
(match_test "TARGET_AVX")
(const_string "V2DF")
(match_test "optimize_function_for_size_p (cfun)")
@@ -2839,7 +2973,9 @@
/* movaps is one byte shorter for non-AVX targets. */
(eq_attr "alternative" "10,14")
- (cond [(ior (not (match_test "TARGET_SSE2"))
+ (cond [(match_test "TARGET_AVX512F")
+ (const_string "V8DF")
+ (ior (not (match_test "TARGET_SSE2"))
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
(const_string "V4SF")
(match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
@@ -2872,9 +3008,9 @@
(define_insn "*movsf_internal"
[(set (match_operand:SF 0 "nonimmediate_operand"
- "=Yf*f,m ,Yf*f,?r ,?m,x,x,x,m,?r,?Yi,!*y,!*y,!m,!r ,!*Ym")
+ "=Yf*f,m ,Yf*f,?r ,?m,v,v,v,m,?r,?Yi,!*y,!*y,!m,!r ,!*Ym")
(match_operand:SF 1 "general_operand"
- "Yf*fm,Yf*f,G ,rmF,rF,C,x,m,x,Yj,r ,*y ,m ,*y,*Yn,r"))]
+ "Yf*fm,Yf*f,G ,rmF,rF,C,v,m,v,Yj,r ,*y ,m ,*y,*Yn,r"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (!can_create_pseudo_p ()
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
@@ -2907,6 +3043,8 @@
return "vmovss\t{%1, %0, %0|%0, %0, %1}";
return "%vmovss\t{%1, %0|%0, %1}";
+ case MODE_V16SF:
+ return "vmovaps\t{%g1, %g0|%g0, %g1}";
case MODE_V4SF:
return "%vmovaps\t{%1, %0|%0, %1}";
@@ -2960,6 +3098,8 @@
(eq_attr "alternative" "5")
(cond [(not (match_test "TARGET_SSE2"))
(const_string "V4SF")
+ (match_test "TARGET_AVX512F")
+ (const_string "V16SF")
(match_test "TARGET_AVX")
(const_string "V4SF")
(match_test "optimize_function_for_size_p (cfun)")
@@ -2979,10 +3119,14 @@
of instructions to load just part of the register. It is
better to maintain the whole registers in single format
to avoid problems on using packed logical operations. */
- (and (eq_attr "alternative" "6")
- (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
- (match_test "TARGET_SSE_SPLIT_REGS")))
- (const_string "V4SF")
+ (eq_attr "alternative" "6")
+ (cond [(match_test "TARGET_AVX512F")
+ (const_string "V16SF")
+ (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (match_test "TARGET_SSE_SPLIT_REGS"))
+ (const_string "V4SF")
+ ]
+ (const_string "SF"))
]
(const_string "SF")))])
@@ -7524,6 +7668,18 @@
(const_string "*")))
(set_attr "mode" "HI,HI,SI")])
+(define_insn "kandn<mode>"
+ [(set (match_operand:SWI12 0 "register_operand" "=Yk")
+ (unspec:SWI12
+ [(match_operand:SWI12 1 "register_operand" "Yk")
+ (match_operand:SWI12 2 "register_operand" "Yk")]
+ UNSPEC_KANDN))]
+ "TARGET_AVX512F"
+ "kandnw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mode" "<MODE>")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
;; %%% Potential partial reg stall on alternative 2. What to do?
(define_insn "*andqi_1"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
@@ -7549,6 +7705,18 @@
[(set_attr "type" "alu1")
(set_attr "mode" "QI")])
+(define_insn "kand<mode>"
+ [(set (match_operand:SWI12 0 "register_operand" "=Yk")
+ (unspec:SWI12
+ [(match_operand:SWI12 1 "register_operand" "Yk")
+ (match_operand:SWI12 2 "register_operand" "Yk")]
+ UNSPEC_KAND))]
+ "TARGET_AVX512F"
+ "kandw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "msklog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
;; Turn *anddi_1 into *andsi_1_zext if possible.
(define_split
[(set (match_operand:DI 0 "register_operand")
@@ -7952,6 +8120,81 @@
[(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
+(define_insn "kior<mode>"
+ [(set (match_operand:SWI12 0 "register_operand" "=Yk")
+ (unspec:SWI12
+ [(match_operand:SWI12 1 "register_operand" "Yk")
+ (match_operand:SWI12 2 "register_operand" "Yk")]
+ UNSPEC_KIOR))]
+ "TARGET_AVX512F"
+ "korw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "msklog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "kxor<mode>"
+ [(set (match_operand:SWI12 0 "register_operand" "=Yk")
+ (unspec:SWI12
+ [(match_operand:SWI12 1 "register_operand" "Yk")
+ (match_operand:SWI12 2 "register_operand" "Yk")]
+ UNSPEC_KXOR))]
+ "TARGET_AVX512F"
+ "kxorw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "msklog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "kxnor<mode>"
+ [(set (match_operand:SWI12 0 "register_operand" "=Yk")
+ (not:SWI12
+ (xor:SWI12
+ (match_operand:SWI12 1 "register_operand" "Yk")
+ (match_operand:SWI12 2 "register_operand" "Yk"))))]
+ "TARGET_AVX512F"
+ "kxnorw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mode" "<MODE>")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
+(define_insn "kortestzhi"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (ior:HI
+ (match_operand:HI 0 "register_operand" "%Yk")
+ (match_operand:HI 1 "register_operand" "Yk"))
+ (const_int 0)))]
+ "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)"
+ "kortestw\t{%1, %0|%0, %1}"
+ [(set_attr "mode" "HI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
+(define_insn "kortestchi"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (ior:HI
+ (match_operand:HI 0 "register_operand" "%Yk")
+ (match_operand:HI 1 "register_operand" "Yk"))
+ (const_int -1)))]
+ "TARGET_AVX512F && ix86_match_ccmode (insn, CCCmode)"
+ "kortestw\t{%1, %0|%0, %1}"
+ [(set_attr "mode" "HI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
+(define_insn "kunpckhi"
+ [(set (match_operand:HI 0 "register_operand" "=Yk")
+ (ior:HI
+ (ashift:HI
+ (match_operand:HI 1 "register_operand" "Yk")
+ (const_int 8))
+ (zero_extend:HI (subreg:QI (match_operand:HI 2 "register_operand" "Yk") 0))))]
+ "TARGET_AVX512F"
+ "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mode" "HI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
;; See comment for addsi_1_zext why we do use nonimmediate_operand
;; ??? Special case for immediate operand is missing - it is tricky.
(define_insn "*<code>si_2_zext"
@@ -8521,23 +8764,38 @@
"ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
(define_insn "*one_cmpl<mode>2_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
- (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0")))]
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+ (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0")))]
"ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
"not{<imodesuffix>}\t%0"
[(set_attr "type" "negnot")
(set_attr "mode" "<MODE>")])
+(define_insn "*one_cmplhi2_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yk")
+ (not:HI (match_operand:HI 1 "nonimmediate_operand" "0,Yk")))]
+ "ix86_unary_operator_ok (NOT, HImode, operands)"
+ "@
+ not{w}\t%0
+ knotw\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "*,avx512f")
+ (set_attr "type" "negnot,msklog")
+ (set_attr "prefix" "*,vex")
+ (set_attr "mode" "HI")])
+
;; %%% Potential partial reg stall on alternative 1. What to do?
(define_insn "*one_cmplqi2_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
- (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))]
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yk")
+ (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,Yk")))]
"ix86_unary_operator_ok (NOT, QImode, operands)"
"@
not{b}\t%0
- not{l}\t%k0"
- [(set_attr "type" "negnot")
- (set_attr "mode" "QI,SI")])
+ not{l}\t%k0
+ knotw\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "*,*,avx512f")
+ (set_attr "type" "negnot,negnot,msklog")
+ (set_attr "prefix" "*,*,vex")
+ (set_attr "mode" "QI,SI,QI")])
;; ??? Currently never generated - xor is used instead.
(define_insn "*one_cmplsi2_1_zext"
@@ -12629,10 +12887,10 @@
(set_attr "mode" "<MODE>")])
(define_insn "*fop_<mode>_comm_sse"
- [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+ [(set (match_operand:MODEF 0 "register_operand" "=x,v")
(match_operator:MODEF 3 "binary_fp_operator"
- [(match_operand:MODEF 1 "nonimmediate_operand" "%0,x")
- (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]))]
+ [(match_operand:MODEF 1 "nonimmediate_operand" "%0,v")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")]))]
"SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& COMMUTATIVE_ARITH_P (operands[3])
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
@@ -15977,10 +16235,10 @@
;; are undefined in this condition, we're certain this is correct.
(define_insn "<code><mode>3"
- [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+ [(set (match_operand:MODEF 0 "register_operand" "=x,v")
(smaxmin:MODEF
- (match_operand:MODEF 1 "nonimmediate_operand" "%0,x")
- (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")))]
+ (match_operand:MODEF 1 "nonimmediate_operand" "%0,v")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")))]
"SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
"@
<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -16261,7 +16519,7 @@
})
;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
-
+;; Do not split instructions with mask registers.
(define_split
[(set (match_operand 0 "register_operand")
(match_operator 3 "promotable_binary_operator"
@@ -16275,7 +16533,10 @@
|| !CONST_INT_P (operands[2])
|| satisfies_constraint_K (operands[2])))
|| (GET_MODE (operands[0]) == QImode
- && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))"
+ && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))
+ && (! ANY_MASK_REG_P (operands[0])
+ || ! ANY_MASK_REG_P (operands[1])
+ || ! ANY_MASK_REG_P (operands[2]))"
[(parallel [(set (match_dup 0)
(match_op_dup 3 [(match_dup 1) (match_dup 2)]))
(clobber (reg:CC FLAGS_REG))])]
@@ -16360,6 +16621,7 @@
operands[1] = gen_lowpart (SImode, operands[1]);
})
+;; Do not split instructions with mask regs.
(define_split
[(set (match_operand 0 "register_operand")
(not (match_operand 1 "register_operand")))]
@@ -16367,7 +16629,9 @@
&& (GET_MODE (operands[0]) == HImode
|| (GET_MODE (operands[0]) == QImode
&& (TARGET_PROMOTE_QImode
- || optimize_insn_for_size_p ())))"
+ || optimize_insn_for_size_p ())))
+ && (! ANY_MASK_REG_P (operands[0])
+ || ! ANY_MASK_REG_P (operands[1]))"
[(set (match_dup 0)
(not:SI (match_dup 1)))]
{
@@ -17157,7 +17421,7 @@
[(prefetch (match_operand 0 "address_operand")
(match_operand:SI 1 "const_int_operand")
(match_operand:SI 2 "const_int_operand"))]
- "TARGET_PREFETCH_SSE || TARGET_PRFCHW"
+ "TARGET_PREFETCH_SSE || TARGET_PRFCHW || TARGET_AVX512PF"
{
bool write = INTVAL (operands[1]) != 0;
int locality = INTVAL (operands[2]);
@@ -17170,6 +17434,8 @@
of locality. */
if (TARGET_PRFCHW && (write || !TARGET_PREFETCH_SSE))
operands[2] = GEN_INT (3);
+ else if (TARGET_AVX512PF && (write || !TARGET_PREFETCH_SSE))
+ operands[2] = GEN_INT (1);
else
operands[1] = const0_rtx;
})
@@ -17211,6 +17477,18 @@
(symbol_ref "memory_address_length (operands[0], false)"))
(set_attr "memory" "none")])
+(define_insn "*prefetch_avx512pf_<mode>"
+ [(prefetch (match_operand:P 0 "address_operand" "p")
+ (const_int 1)
+ (const_int 1))]
+ "TARGET_AVX512PF"
+ "prefetchwt1\t%a0";
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set (attr "length_address")
+ (symbol_ref "memory_address_length (operands[0], false)"))
+ (set_attr "memory" "none")])
+
(define_expand "stack_protect_set"
[(match_operand 0 "memory_operand")
(match_operand 1 "memory_operand")]
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 9fbf5451e9c..57eddbee952 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -498,6 +498,22 @@ mavx2
Target Report Mask(ISA_AVX2) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation
+mavx512f
+Target Report Mask(ISA_AVX512F) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F built-in functions and code generation
+
+mavx512pf
+Target Report Mask(ISA_AVX512PF) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512PF built-in functions and code generation
+
+mavx512er
+Target Report Mask(ISA_AVX512ER) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512ER built-in functions and code generation
+
+mavx512cd
+Target Report Mask(ISA_AVX512CD) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512CD built-in functions and code generation
+
mfma
Target Report Mask(ISA_FMA) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation
@@ -590,6 +606,10 @@ maes
Target Report Mask(ISA_AES) Var(ix86_isa_flags) Save
Support AES built-in functions and code generation
+msha
+Target Report Mask(ISA_SHA) Var(ix86_isa_flags) Save
+Support SHA1 and SHA256 built-in functions and code generation
+
mpclmul
Target Report Mask(ISA_PCLMUL) Var(ix86_isa_flags) Save
Support PCLMUL built-in functions and code generation
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index e825c34a256..4fdf0000006 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -42,6 +42,16 @@
#include <avx2intrin.h>
+#include <avx512fintrin.h>
+
+#include <avx512erintrin.h>
+
+#include <avx512pfintrin.h>
+
+#include <avx512cdintrin.h>
+
+#include <shaintrin.h>
+
#include <lzcntintrin.h>
#include <bmiintrin.h>
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 12c062687c0..8f2688826cd 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -78,9 +78,9 @@
(define_insn "*mov<mode>_internal"
[(set (match_operand:MMXMODE 0 "nonimmediate_operand"
- "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!Ym,x,x,x,m,*x,*x,*x,m ,r ,Yi,!Ym,*Yi")
+ "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!Ym,v,v,v,m,*x,*x,*x,m ,r ,Yi,!Ym,*Yi")
(match_operand:MMXMODE 1 "vector_move_operand"
- "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!Yn,r ,C,x,m,x,C ,*x,m ,*x,Yj,r ,*Yj,!Yn"))]
+ "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!Yn,r ,C,v,m,v,C ,*x,m ,*x,Yj,r ,*Yj,!Yn"))]
"TARGET_MMX
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index b64ef6999ee..668ec495a7d 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -672,6 +672,16 @@
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 0, 3)")))
+;; Match 0 to 4.
+(define_predicate "const_0_to_4_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 4)")))
+
+;; Match 0 to 5.
+(define_predicate "const_0_to_5_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 5)")))
+
;; Match 0 to 7.
(define_predicate "const_0_to_7_operand"
(and (match_code "const_int")
@@ -737,16 +747,66 @@
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 6, 7)")))
+;; Match 8 to 9.
+(define_predicate "const_8_to_9_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 8, 9)")))
+
;; Match 8 to 11.
(define_predicate "const_8_to_11_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 8, 11)")))
+;; Match 8 to 15.
+(define_predicate "const_8_to_15_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 8, 15)")))
+
+;; Match 10 to 11.
+(define_predicate "const_10_to_11_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 10, 11)")))
+
+;; Match 12 to 13.
+(define_predicate "const_12_to_13_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 12, 13)")))
+
;; Match 12 to 15.
(define_predicate "const_12_to_15_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 12, 15)")))
+;; Match 14 to 15.
+(define_predicate "const_14_to_15_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 14, 15)")))
+
+;; Match 16 to 19.
+(define_predicate "const_16_to_19_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 16, 19)")))
+
+;; Match 16 to 31.
+(define_predicate "const_16_to_31_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 16, 31)")))
+
+;; Match 20 to 23.
+(define_predicate "const_20_to_23_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 20, 23)")))
+
+;; Match 24 to 27.
+(define_predicate "const_24_to_27_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 24, 27)")))
+
+;; Match 28 to 31.
+(define_predicate "const_28_to_31_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 28, 31)")))
+
;; True if this is a constant appropriate for an increment or decrement.
(define_predicate "incdec_operand"
(match_code "const_int")
@@ -1317,3 +1377,8 @@
(define_predicate "general_vector_operand"
(ior (match_operand 0 "nonimmediate_operand")
(match_code "const_vector")))
+
+;; Return true if OP is either -1 constant or stored in register.
+(define_predicate "register_or_constm1_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_test "op == constm1_rtx")))
diff --git a/gcc/config/i386/shaintrin.h b/gcc/config/i386/shaintrin.h
new file mode 100644
index 00000000000..58c5c5d1206
--- /dev/null
+++ b/gcc/config/i386/shaintrin.h
@@ -0,0 +1,99 @@
+/* Copyright (C) 2013
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <shaintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _SHAINTRIN_H_INCLUDED
+#define _SHAINTRIN_H_INCLUDED
+
+#ifndef __SHA__
+#pragma GCC push_options
+#pragma GCC target("sha")
+#define __DISABLE_SHA__
+#endif /* __SHA__ */
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1msg1_epu32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_sha1msg1 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1msg2_epu32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_sha1msg2 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1nexte_epu32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_sha1nexte ((__v4si) __A, (__v4si) __B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha1rnds4_epu32 (__m128i __A, __m128i __B, const int __I)
+{
+ return (__m128i) __builtin_ia32_sha1rnds4 ((__v4si) __A, (__v4si) __B, __I);
+}
+#else
+#define _mm_sha1rnds4_epu32(A, B, I) \
+ ((__m128i) __builtin_ia32_sha1rnds4 ((__v4si)(__m128i)A, \
+ (__v4si)(__m128i)B, (int)I))
+#endif
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha256msg1_epu32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_sha256msg1 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha256msg2_epu32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_sha256msg2 ((__v4si) __A, (__v4si) __B);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha256rnds2_epu32 (__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_sha256rnds2 ((__v4si) __A, (__v4si) __B,
+ (__v4si) __C);
+}
+
+#ifdef __DISABLE_SHA__
+#undef __DISABLE_SHA__
+#pragma GCC pop_options
+#endif /* __DISABLE_SHA__ */
+
+#endif /* _SHAINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 439749877f2..89851fefeac 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -83,6 +83,52 @@
UNSPEC_VPERMTI
UNSPEC_GATHER
UNSPEC_VSIBADDR
+
+ ;; For AVX512F support
+ UNSPEC_BLENDM
+ UNSPEC_VPERMI2
+ UNSPEC_VPERMI2_MASK
+ UNSPEC_VPERMT2
+ UNSPEC_UNSIGNED_FIX_NOTRUNC
+ UNSPEC_UNSIGNED_PCMP
+ UNSPEC_TESTM
+ UNSPEC_TESTNM
+ UNSPEC_SCATTER
+ UNSPEC_RCP14
+ UNSPEC_RSQRT14
+ UNSPEC_FIXUPIMM
+ UNSPEC_SCALEF
+ UNSPEC_VTERNLOG
+ UNSPEC_GETEXP
+ UNSPEC_GETMANT
+ UNSPEC_ALIGN
+ UNSPEC_CONFLICT
+ UNSPEC_COMPRESS
+ UNSPEC_COMPRESS_STORE
+ UNSPEC_EXPAND
+ UNSPEC_MASKED_EQ
+ UNSPEC_MASKED_GT
+
+ ;; For embed. rounding feature
+ UNSPEC_EMBEDDED_ROUNDING
+
+ ;; For AVX512PF support
+ UNSPEC_GATHER_PREFETCH
+ UNSPEC_SCATTER_PREFETCH
+
+ ;; For AVX512ER support
+ UNSPEC_EXP2
+ UNSPEC_RCP28
+ UNSPEC_RSQRT28
+
+ ;; For SHA support
+ UNSPEC_SHA1MSG1
+ UNSPEC_SHA1MSG2
+ UNSPEC_SHA1NEXTE
+ UNSPEC_SHA1RNDS4
+ UNSPEC_SHA256MSG1
+ UNSPEC_SHA256MSG2
+ UNSPEC_SHA256RNDS2
])
(define_c_enum "unspecv" [
@@ -97,22 +143,22 @@
;; All vector modes including V?TImode, used in move patterns.
(define_mode_iterator V16
- [(V32QI "TARGET_AVX") V16QI
- (V16HI "TARGET_AVX") V8HI
- (V8SI "TARGET_AVX") V4SI
- (V4DI "TARGET_AVX") V2DI
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
(V2TI "TARGET_AVX") V1TI
- (V8SF "TARGET_AVX") V4SF
- (V4DF "TARGET_AVX") V2DF])
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
;; All vector modes
(define_mode_iterator V
[(V32QI "TARGET_AVX") V16QI
(V16HI "TARGET_AVX") V8HI
- (V8SI "TARGET_AVX") V4SI
- (V4DI "TARGET_AVX") V2DI
- (V8SF "TARGET_AVX") V4SF
- (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
;; All 128bit vector modes
(define_mode_iterator V_128
@@ -122,19 +168,41 @@
(define_mode_iterator V_256
[V32QI V16HI V8SI V4DI V8SF V4DF])
+;; All 512bit vector modes
+(define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
+
+;; All 256bit and 512bit vector modes
+(define_mode_iterator V_256_512
+ [V32QI V16HI V8SI V4DI V8SF V4DF
+ (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
+ (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
+
;; All vector float modes
(define_mode_iterator VF
[(V8SF "TARGET_AVX") V4SF
(V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+(define_mode_iterator VF_AVX512F
+ [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+
;; All SFmode vector float modes
(define_mode_iterator VF1
[(V8SF "TARGET_AVX") V4SF])
+(define_mode_iterator VF1_AVX512F
+ [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
+
;; All DFmode vector float modes
(define_mode_iterator VF2
[(V4DF "TARGET_AVX") V2DF])
+(define_mode_iterator VF2_AVX512F
+ [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
+
+(define_mode_iterator VF2_512_256
+ [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")])
+
;; All 128bit vector float modes
(define_mode_iterator VF_128
[V4SF (V2DF "TARGET_SSE2")])
@@ -143,9 +211,18 @@
(define_mode_iterator VF_256
[V8SF V4DF])
+;; All 512bit vector float modes
+(define_mode_iterator VF_512
+ [V16SF V8DF])
+
+;; 128bit and 512bit float modes
+(define_mode_iterator VF_128_512
+ [V4SF V2DF V16SF V8DF])
+
;; All vector integer modes
(define_mode_iterator VI
- [(V32QI "TARGET_AVX") V16QI
+ [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+ (V32QI "TARGET_AVX") V16QI
(V16HI "TARGET_AVX") V8HI
(V8SI "TARGET_AVX") V4SI
(V4DI "TARGET_AVX") V2DI])
@@ -153,16 +230,20 @@
(define_mode_iterator VI_AVX2
[(V32QI "TARGET_AVX2") V16QI
(V16HI "TARGET_AVX2") V8HI
- (V8SI "TARGET_AVX2") V4SI
- (V4DI "TARGET_AVX2") V2DI])
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
;; All QImode vector integer modes
(define_mode_iterator VI1
[(V32QI "TARGET_AVX") V16QI])
+(define_mode_iterator VI_UNALIGNED_LOADSTORE
+ [(V32QI "TARGET_AVX") V16QI
+ (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
+
;; All DImode vector integer modes
(define_mode_iterator VI8
- [(V4DI "TARGET_AVX") V2DI])
+ [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
(define_mode_iterator VI1_AVX2
[(V32QI "TARGET_AVX2") V16QI])
@@ -170,12 +251,36 @@
(define_mode_iterator VI2_AVX2
[(V16HI "TARGET_AVX2") V8HI])
+(define_mode_iterator VI2_AVX512F
+ [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
+
+(define_mode_iterator VI4_AVX
+ [(V8SI "TARGET_AVX") V4SI])
+
(define_mode_iterator VI4_AVX2
[(V8SI "TARGET_AVX2") V4SI])
+(define_mode_iterator VI4_AVX512F
+ [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
+
+(define_mode_iterator VI48_AVX512F
+ [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+ (V8DI "TARGET_AVX512F")])
+
(define_mode_iterator VI8_AVX2
[(V4DI "TARGET_AVX2") V2DI])
+(define_mode_iterator VI8_AVX2_AVX512F
+ [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
+
+;; All V8D* modes
+(define_mode_iterator V8FI
+ [V8DF V8DI])
+
+;; All V16S* modes
+(define_mode_iterator V16FI
+ [V16SF V16SI])
+
;; ??? We should probably use TImode instead.
(define_mode_iterator VIMAX_AVX2
[(V2TI "TARGET_AVX2") V1TI])
@@ -192,19 +297,35 @@
[(V16HI "TARGET_AVX2") V8HI
(V8SI "TARGET_AVX2") V4SI])
+(define_mode_iterator VI124_AVX2_48_AVX512F
+ [(V32QI "TARGET_AVX2") V16QI
+ (V16HI "TARGET_AVX2") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+ (V8DI "TARGET_AVX512F")])
+
+(define_mode_iterator VI124_AVX512F
+ [(V32QI "TARGET_AVX2") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
+
(define_mode_iterator VI124_AVX2
[(V32QI "TARGET_AVX2") V16QI
(V16HI "TARGET_AVX2") V8HI
(V8SI "TARGET_AVX2") V4SI])
-(define_mode_iterator VI248_AVX2
+(define_mode_iterator VI248_AVX512F
+ [(V16HI "TARGET_AVX2") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
+
+(define_mode_iterator VI248_AVX2_8_AVX512F
[(V16HI "TARGET_AVX2") V8HI
(V8SI "TARGET_AVX2") V4SI
- (V4DI "TARGET_AVX2") V2DI])
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
-(define_mode_iterator VI48_AVX2
- [(V8SI "TARGET_AVX2") V4SI
- (V4DI "TARGET_AVX2") V2DI])
+(define_mode_iterator VI48_AVX2_48_AVX512F
+ [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator V48_AVX2
[V4SF V2DF
@@ -212,11 +333,18 @@
(V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
+(define_mode_attr sse2_avx_avx512f
+ [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
+ (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
+ (V8DI "avx512f")
+ (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
+ (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
+
(define_mode_attr sse2_avx2
[(V16QI "sse2") (V32QI "avx2")
(V8HI "sse2") (V16HI "avx2")
- (V4SI "sse2") (V8SI "avx2")
- (V2DI "sse2") (V4DI "avx2")
+ (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
+ (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
(V1TI "sse2") (V2TI "avx2")])
(define_mode_attr ssse3_avx2
@@ -229,7 +357,7 @@
(define_mode_attr sse4_1_avx2
[(V16QI "sse4_1") (V32QI "avx2")
(V8HI "sse4_1") (V16HI "avx2")
- (V4SI "sse4_1") (V8SI "avx2")
+ (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
(V2DI "sse4_1") (V4DI "avx2")])
(define_mode_attr avx_avx2
@@ -244,12 +372,34 @@
(V4SI "vec") (V8SI "avx2")
(V2DI "vec") (V4DI "avx2")])
+(define_mode_attr avx2_avx512f
+ [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
+ (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
+ (V8SF "avx2") (V16SF "avx512f")
+ (V4DF "avx2") (V8DF "avx512f")])
+
+(define_mode_attr shuffletype
+ [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
+ (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
+ (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
+ (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
+ (V64QI "i") (V1TI "i") (V2TI "i")])
+
+(define_mode_attr ssequartermode
+ [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
+
(define_mode_attr ssedoublemode
- [(V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
+ [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
+ (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
(V32QI "V32HI") (V16QI "V16HI")])
+(define_mode_attr ssefixupmode
+ [(V16SF "V16SI") (V4SF "V4SI") (V8DF "V8DI") (V2DF "V2DI")])
+
(define_mode_attr ssebytemode
- [(V4DI "V32QI") (V2DI "V16QI")])
+ [(V4DI "V32QI") (V2DI "V16QI")
+ (V8SI "V32QI") (V4SI "V16QI")
+ (V16HI "V32QI") (V8HI "V16QI")])
;; All 128bit vector integer modes
(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
@@ -257,7 +407,10 @@
;; All 256bit vector integer modes
(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
-;; Random 128bit vector integer mode combinations
+;; All 512bit vector integer modes
+(define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
+
+;; Various 128bit vector integer mode combinations
(define_mode_iterator VI12_128 [V16QI V8HI])
(define_mode_iterator VI14_128 [V16QI V4SI])
(define_mode_iterator VI124_128 [V16QI V8HI V4SI])
@@ -266,58 +419,85 @@
(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
(define_mode_iterator VI48_128 [V4SI V2DI])
-;; Random 256bit vector integer mode combinations
-(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
+;; Various 256bit and 512 vector integer mode combinations
+(define_mode_iterator VI124_256_48_512
+ [V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")])
(define_mode_iterator VI48_256 [V8SI V4DI])
+(define_mode_iterator VI48_512 [V16SI V8DI])
;; Int-float size matches
(define_mode_iterator VI4F_128 [V4SI V4SF])
(define_mode_iterator VI8F_128 [V2DI V2DF])
(define_mode_iterator VI4F_256 [V8SI V8SF])
(define_mode_iterator VI8F_256 [V4DI V4DF])
+(define_mode_iterator VI8F_256_512
+ [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
+(define_mode_iterator VI48F_256_512
+ [V8SI V8SF
+ (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
+(define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
;; Mapping from float mode to required SSE level
(define_mode_attr sse
[(SF "sse") (DF "sse2")
(V4SF "sse") (V2DF "sse2")
- (V8SF "avx") (V4DF "avx")])
+ (V16SF "avx512f") (V8SF "avx")
+ (V8DF "avx512f") (V4DF "avx")])
(define_mode_attr sse2
- [(V16QI "sse2") (V32QI "avx")
- (V2DI "sse2") (V4DI "avx")])
+ [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
+ (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
(define_mode_attr sse3
[(V16QI "sse3") (V32QI "avx")])
(define_mode_attr sse4_1
[(V4SF "sse4_1") (V2DF "sse4_1")
- (V8SF "avx") (V4DF "avx")])
+ (V8SF "avx") (V4DF "avx")
+ (V8DF "avx512f")])
(define_mode_attr avxsizesuffix
- [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
+ [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
+ (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
(V16QI "") (V8HI "") (V4SI "") (V2DI "")
+ (V16SF "512") (V8DF "512")
(V8SF "256") (V4DF "256")
(V4SF "") (V2DF "")])
;; SSE instruction mode
(define_mode_attr sseinsnmode
- [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
+ [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI")
+ (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
+ (V16SF "V16SF") (V8DF "V8DF")
(V8SF "V8SF") (V4DF "V4DF")
(V4SF "V4SF") (V2DF "V2DF")
(TI "TI")])
+;; Mapping of vector modes to corresponding mask size
+(define_mode_attr avx512fmaskmode
+ [(V16QI "HI")
+ (V16HI "HI") (V8HI "QI")
+ (V16SI "HI") (V8SI "QI") (V4SI "QI")
+ (V8DI "QI") (V4DI "QI") (V2DI "QI")
+ (V16SF "HI") (V8SF "QI") (V4SF "QI")
+ (V8DF "QI") (V4DF "QI") (V2DF "QI")])
+
;; Mapping of vector float modes to an integer mode of the same size
(define_mode_attr sseintvecmode
- [(V8SF "V8SI") (V4DF "V4DI")
- (V4SF "V4SI") (V2DF "V2DI")
- (V8SI "V8SI") (V4DI "V4DI")
- (V4SI "V4SI") (V2DI "V2DI")
- (V16HI "V16HI") (V8HI "V8HI")
+ [(V16SF "V16SI") (V8DF "V8DI")
+ (V8SF "V8SI") (V4DF "V4DI")
+ (V4SF "V4SI") (V2DF "V2DI")
+ (V16SI "V16SI") (V8DI "V8DI")
+ (V8SI "V8SI") (V4DI "V4DI")
+ (V4SI "V4SI") (V2DI "V2DI")
+ (V16HI "V16HI") (V8HI "V8HI")
(V32QI "V32QI") (V16QI "V16QI")])
(define_mode_attr sseintvecmodelower
- [(V8SF "v8si") (V4DF "v4di")
+ [(V16SF "v16si")
+ (V8SF "v8si") (V4DF "v4di")
(V4SF "v4si") (V2DF "v2di")
(V8SI "v8si") (V4DI "v4di")
(V4SI "v4si") (V2DI "v2di")
@@ -333,15 +513,20 @@
;; Mapping of vector modes to a vector mode of half size
(define_mode_attr ssehalfvecmode
- [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
- (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
- (V8SF "V4SF") (V4DF "V2DF")
- (V4SF "V2SF")])
+ [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
+ (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
+ (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
+ (V16SF "V8SF") (V8DF "V4DF")
+ (V8SF "V4SF") (V4DF "V2DF")
+ (V4SF "V2SF")])
;; Mapping of vector modes ti packed single mode of the same size
+;; TODO: it's questionable if we should extend it to 512 or not
(define_mode_attr ssePSmode
- [(V32QI "V8SF") (V16QI "V4SF")
- (V16HI "V8SF") (V8HI "V4SF")
+ [(V16SI "V16SF") (V8DF "V16SF")
+ (V16SF "V16SF") (V8DI "V16SF")
+ (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
+ (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
(V8SI "V8SF") (V4SI "V4SF")
(V4DI "V8SF") (V2DI "V4SF")
(V2TI "V8SF") (V1TI "V4SF")
@@ -350,10 +535,21 @@
;; Mapping of vector modes back to the scalar modes
(define_mode_attr ssescalarmode
- [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
- (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
- (V8SF "SF") (V4DF "DF")
- (V4SF "SF") (V2DF "DF")])
+ [(V64QI "QI") (V32QI "QI") (V16QI "QI")
+ (V32HI "HI") (V16HI "HI") (V8HI "HI")
+ (V16SI "SI") (V8SI "SI") (V4SI "SI")
+ (V8DI "DI") (V4DI "DI") (V2DI "DI")
+ (V16SF "SF") (V8SF "SF") (V4SF "SF")
+ (V8DF "DF") (V4DF "DF") (V2DF "DF")])
+
+;; Mapping of vector modes to the 128bit modes
+(define_mode_attr ssexmmmode
+ [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
+ (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
+ (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
+ (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
+ (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
+ (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
;; Pointer size override for scalar modes (Intel asm dialect)
(define_mode_attr iptr
@@ -365,8 +561,10 @@
;; Number of scalar elements in each vector type
(define_mode_attr ssescalarnum
- [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
+ [(V64QI "64") (V16SI "16") (V8DI "8")
+ (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
(V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
+ (V16SF "16") (V8DF "8")
(V8SF "8") (V4DF "4")
(V4SF "4") (V2DF "2")])
@@ -377,12 +575,19 @@
(V8SF "7") (V4DF "3")
(V4SF "3") (V2DF "1")])
+(define_mode_attr ssescalarsize
+ [(V8DI "64") (V4DI "64") (V2DI "64")
+ (V32HI "16") (V16HI "16") (V8HI "16")
+ (V16SI "32") (V8SI "32") (V4SI "32")])
+
;; SSE prefix for integer vector modes
(define_mode_attr sseintprefix
- [(V2DI "p") (V2DF "")
- (V4DI "p") (V4DF "")
- (V4SI "p") (V4SF "")
- (V8SI "p") (V8SF "")])
+ [(V2DI "p") (V2DF "")
+ (V4DI "p") (V4DF "")
+ (V8DI "p") (V8DF "")
+ (V4SI "p") (V4SF "")
+ (V8SI "p") (V8SF "")
+ (V16SI "p") (V16SF "")])
;; SSE scalar suffix for vector modes
(define_mode_attr ssescalarmodesuffix
@@ -390,16 +595,20 @@
(V8SF "ss") (V4DF "sd")
(V4SF "ss") (V2DF "sd")
(V8SI "ss") (V4DI "sd")
+ (V16SF "ss") (V8DF "sd")
+ (V16SI "ss") (V8DI "sd")
(V4SI "d")])
;; Pack/unpack vector modes
(define_mode_attr sseunpackmode
[(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
- (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
+ (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
+ (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
(define_mode_attr ssepackmode
[(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
- (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
+ (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
+ (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
;; Mapping of the max integer size for xop rotate immediate constraint
(define_mode_attr sserotatemax
@@ -412,9 +621,11 @@
(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
+;; i64x4 or f64x4 for 512bit modes.
(define_mode_attr i128
- [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
- (V8SI "%~128") (V4DI "%~128")])
+ [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
+ (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
+ (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
;; Mix-n-match
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
@@ -423,6 +634,13 @@
(define_mode_attr blendbits
[(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
+;; Mapping suffixes for broadcast
+(define_mode_attr bcstscalarsuff
+ [(V16SI "d") (V16SF "ss") (V8DI "q") (V8DF "sd")])
+
+;; Include define_subst patterns for instructions with mask
+(include "subst.md")
+
;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -444,20 +662,64 @@
})
(define_insn "*mov<mode>_internal"
- [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
- (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
+ [(set (match_operand:V16 0 "nonimmediate_operand" "=v,v ,m")
+ (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
"TARGET_SSE
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
{
+ int mode = get_attr_mode (insn);
switch (which_alternative)
{
case 0:
return standard_sse_constant_opcode (insn, operands[1]);
case 1:
case 2:
- switch (get_attr_mode (insn))
+ /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
+ in avx512f, so we need to use workarounds, to access sse registers
+ 16-31, which are evex-only. */
+ if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64
+ && (EXT_REX_SSE_REGNO_P (REGNO (operands[0]))
+ || EXT_REX_SSE_REGNO_P (REGNO (operands[1]))))
{
+ if (memory_operand (operands[0], <MODE>mode))
+ {
+ if (GET_MODE_SIZE (<MODE>mode) == 32)
+ return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
+ else if (GET_MODE_SIZE (<MODE>mode) == 16)
+ return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
+ else
+ gcc_unreachable ();
+ }
+ else if (memory_operand (operands[1], <MODE>mode))
+ {
+ if (GET_MODE_SIZE (<MODE>mode) == 32)
+ return "vinsert<shuffletype>64x4\t{$0x0, %1, %g0, %g0|%g0, %g0, %1, $0x0}";
+ else if (GET_MODE_SIZE (<MODE>mode) == 16)
+ return "vinsert<shuffletype>32x4\t{$0x0, %1, %g0, %g0|%g0, %g0, %1, $0x0}";
+ else
+ gcc_unreachable ();
+ }
+ else
+ /* Reg -> reg move is always aligned. Just use wider move. */
+ switch (mode)
+ {
+ case MODE_V8SF:
+ case MODE_V4SF:
+ return "vmovaps\t{%g1, %g0|%g0, %g1}";
+ case MODE_V4DF:
+ case MODE_V2DF:
+ return "%vmovapd\t{%g1, %g0|%g0, %g1}";
+ case MODE_OI:
+ case MODE_TI:
+ return "vmovdqu64\t{%g1, %g0|%g0, %g1}";
+ default:
+ gcc_unreachable ();
+ }
+ }
+ switch (mode)
+ {
+ case MODE_V16SF:
case MODE_V8SF:
case MODE_V4SF:
if (TARGET_AVX
@@ -467,6 +729,7 @@
else
return "%vmovaps\t{%1, %0|%0, %1}";
+ case MODE_V8DF:
case MODE_V4DF:
case MODE_V2DF:
if (TARGET_AVX
@@ -484,6 +747,12 @@
return "vmovdqu\t{%1, %0|%0, %1}";
else
return "%vmovdqa\t{%1, %0|%0, %1}";
+ case MODE_XI:
+ if (misaligned_operand (operands[0], <MODE>mode)
+ || misaligned_operand (operands[1], <MODE>mode))
+ return "vmovdqu64\t{%1, %0|%0, %1}";
+ else
+ return "vmovdqa64\t{%1, %0|%0, %1}";
default:
gcc_unreachable ();
@@ -511,6 +780,62 @@
]
(const_string "<sseinsnmode>")))])
+(define_insn "avx512f_load<mode>_mask"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
+ (vec_merge:VI48F_512
+ (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
+ (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+{
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8DF:
+ case MODE_V16SF:
+ return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
+ default:
+ return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
+ }
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_blendm<mode>"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+ (vec_merge:VI48F_512
+ (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
+ (match_operand:VI48F_512 1 "register_operand" "v")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_store<mode>_mask"
+ [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
+ (vec_merge:VI48F_512
+ (match_operand:VI48F_512 1 "register_operand" "v")
+ (match_dup 0)
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
+ "TARGET_AVX512F"
+{
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8DF:
+ case MODE_V16SF:
+ return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+ default:
+ return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+ }
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "memory" "store")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "sse2_movq128"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_concat:V2DI
@@ -602,20 +927,21 @@
DONE;
})
-(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:VF 0 "register_operand" "=x")
- (unspec:VF
- [(match_operand:VF 1 "memory_operand" "m")]
+(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
+ [(set (match_operand:VF_AVX512F 0 "register_operand" "=v")
+ (unspec:VF_AVX512F
+ [(match_operand:VF_AVX512F 1 "nonimmediate_operand" "vm")]
UNSPEC_LOADU))]
- "TARGET_SSE"
+ "TARGET_SSE && <mask_mode512bit_condition>"
{
switch (get_attr_mode (insn))
{
+ case MODE_V16SF:
case MODE_V8SF:
case MODE_V4SF:
- return "%vmovups\t{%1, %0|%0, %1}";
+ return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
default:
- return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
+ return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
}
}
[(set_attr "type" "ssemov")
@@ -632,14 +958,15 @@
(const_string "<MODE>")))])
(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:VF 0 "memory_operand" "=m")
- (unspec:VF
- [(match_operand:VF 1 "register_operand" "x")]
+ [(set (match_operand:VF_AVX512F 0 "memory_operand" "=m")
+ (unspec:VF_AVX512F
+ [(match_operand:VF_AVX512F 1 "register_operand" "v")]
UNSPEC_STOREU))]
"TARGET_SSE"
{
switch (get_attr_mode (insn))
{
+ case MODE_V16SF:
case MODE_V8SF:
case MODE_V4SF:
return "%vmovups\t{%1, %0|%0, %1}";
@@ -661,17 +988,47 @@
]
(const_string "<MODE>")))])
-(define_insn "<sse2>_loaddqu<avxsizesuffix>"
- [(set (match_operand:VI1 0 "register_operand" "=x")
- (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
- UNSPEC_LOADU))]
- "TARGET_SSE2"
+(define_insn "avx512f_storeu<ssemodesuffix>512_mask"
+ [(set (match_operand:VF_512 0 "memory_operand" "=m")
+ (vec_merge:VF_512
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "register_operand" "v")]
+ UNSPEC_STOREU)
+ (match_dup 0)
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
+ "TARGET_AVX512F"
+{
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V16SF:
+ return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+ default:
+ return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+ }
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
+ [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
+ (unspec:VI_UNALIGNED_LOADSTORE
+ [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
+ UNSPEC_LOADU))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
switch (get_attr_mode (insn))
{
case MODE_V8SF:
case MODE_V4SF:
return "%vmovups\t{%1, %0|%0, %1}";
+ case MODE_XI:
+ if (<MODE>mode == V8DImode)
+ return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+ else
+ return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
default:
return "%vmovdqu\t{%1, %0|%0, %1}";
}
@@ -694,10 +1051,11 @@
]
(const_string "<sseinsnmode>")))])
-(define_insn "<sse2>_storedqu<avxsizesuffix>"
- [(set (match_operand:VI1 0 "memory_operand" "=m")
- (unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")]
- UNSPEC_STOREU))]
+(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
+ [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m")
+ (unspec:VI_UNALIGNED_LOADSTORE
+ [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")]
+ UNSPEC_STOREU))]
"TARGET_SSE2"
{
switch (get_attr_mode (insn))
@@ -705,6 +1063,11 @@
case MODE_V8SF:
case MODE_V4SF:
return "%vmovups\t{%1, %0|%0, %1}";
+ case MODE_XI:
+ if (<MODE>mode == V8DImode)
+ return "vmovdqu64\t{%1, %0|%0, %1}";
+ else
+ return "vmovdqu32\t{%1, %0|%0, %1}";
default:
return "%vmovdqu\t{%1, %0|%0, %1}";
}
@@ -728,6 +1091,88 @@
]
(const_string "<sseinsnmode>")))])
+(define_insn "avx512f_storedqu<mode>_mask"
+ [(set (match_operand:VI48_512 0 "memory_operand" "=m")
+ (vec_merge:VI48_512
+ (unspec:VI48_512
+ [(match_operand:VI48_512 1 "register_operand" "v")]
+ UNSPEC_STOREU)
+ (match_dup 0)
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
+ "TARGET_AVX512F"
+{
+ if (<MODE>mode == V8DImode)
+ return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+ else
+ return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_moves<mode>_mask"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (match_operand:VF_128 2 "register_operand" "v")
+ (match_operand:VF_128 3 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k"))
+ (match_operand:VF_128 1 "register_operand" "v")
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_loads<mode>_mask"
+ [(set (match_operand:VF_128 0 "register_operand")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (vec_duplicate:VF_128
+ (match_operand:<ssescalarmode> 1 "memory_operand"))
+ (match_operand:VF_128 2 "vector_move_operand")
+ (match_operand:<avx512fmaskmode> 3 "register_operand"))
+ (match_dup 4)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "operands[4] = CONST0_RTX (<MODE>mode);")
+
+(define_insn "*avx512f_loads<mode>_mask"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (vec_duplicate:VF_128
+ (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
+ (match_operand:VF_128 2 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k"))
+ (match_operand:VF_128 4 "const0_operand")
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "memory" "load")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_stores<mode>_mask"
+ [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
+ (vec_select:<ssescalarmode>
+ (vec_merge:VF_128
+ (match_operand:VF_128 1 "register_operand" "v")
+ (vec_duplicate:VF_128
+ (match_dup 0))
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "k"))
+ (parallel [(const_int 0)])))]
+ "TARGET_AVX512F"
+ "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "memory" "store")
+ (set_attr "mode" "<ssescalarmode>")])
+
(define_insn "<sse3>_lddqu<avxsizesuffix>"
[(set (match_operand:VI1 0 "register_operand" "=x")
(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
@@ -760,9 +1205,10 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sse>_movnt<mode>"
- [(set (match_operand:VF 0 "memory_operand" "=m")
- (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
- UNSPEC_MOVNT))]
+ [(set (match_operand:VF_AVX512F 0 "memory_operand" "=m")
+ (unspec:VF_AVX512F
+ [(match_operand:VF_AVX512F 1 "register_operand" "v")]
+ UNSPEC_MOVNT))]
"TARGET_SSE"
"%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
@@ -771,7 +1217,7 @@
(define_insn "<sse2>_movnt<mode>"
[(set (match_operand:VI8 0 "memory_operand" "=m")
- (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
+ (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
UNSPEC_MOVNT))]
"TARGET_SSE2"
"%vmovntdq\t{%1, %0|%0, %1}"
@@ -792,9 +1238,9 @@
(define_mode_iterator STORENT_MODE
[(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
(SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
- (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
- (V8SF "TARGET_AVX") V4SF
- (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
(define_expand "storent<mode>"
[(set (match_operand:STORENT_MODE 0 "memory_operand")
@@ -810,17 +1256,17 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "<code><mode>2"
- [(set (match_operand:VF 0 "register_operand")
- (absneg:VF
- (match_operand:VF 1 "register_operand")))]
+ [(set (match_operand:VF_AVX512F 0 "register_operand")
+ (absneg:VF_AVX512F
+ (match_operand:VF_AVX512F 1 "register_operand")))]
"TARGET_SSE"
"ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
(define_insn_and_split "*absneg<mode>2"
- [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
- (match_operator:VF 3 "absneg_operator"
- [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
- (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
+ [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,x,v,v")
+ (match_operator:VF_AVX512F 3 "absneg_operator"
+ [(match_operand:VF_AVX512F 1 "nonimmediate_operand" "0, xm, v, m")]))
+ (use (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm, 0, vm,v"))]
"TARGET_SSE"
"#"
"&& reload_completed"
@@ -854,96 +1300,99 @@
}
[(set_attr "isa" "noavx,noavx,avx,avx")])
-(define_expand "<plusminus_insn><mode>3"
- [(set (match_operand:VF 0 "register_operand")
- (plusminus:VF
- (match_operand:VF 1 "nonimmediate_operand")
- (match_operand:VF 2 "nonimmediate_operand")))]
- "TARGET_SSE"
+(define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
+ [(set (match_operand:VF_AVX512F 0 "register_operand")
+ (plusminus:VF_AVX512F
+ (match_operand:VF_AVX512F 1 "nonimmediate_operand")
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand")))]
+ "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
-(define_insn "*<plusminus_insn><mode>3"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (plusminus:VF
- (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+(define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
+ [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
+ (plusminus:VF_AVX512F
+ (match_operand:VF_AVX512F 1 "nonimmediate_operand" "<comm>0,v")
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,<round_constraint>")))]
+ "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
- v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vm<plusminus_insn><mode>3"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+(define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(plusminus:VF_128
- (match_operand:VF_128 1 "register_operand" "0,x")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:VF_128 1 "register_operand" "0,v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
"@
<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
- v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+ v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_mask_scalar_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_scalar_prefix>")
(set_attr "mode" "<ssescalarmode>")])
-(define_expand "mul<mode>3"
- [(set (match_operand:VF 0 "register_operand")
- (mult:VF
- (match_operand:VF 1 "nonimmediate_operand")
- (match_operand:VF 2 "nonimmediate_operand")))]
- "TARGET_SSE"
+(define_expand "mul<mode>3<mask_name><round_name>"
+ [(set (match_operand:VF_AVX512F 0 "register_operand")
+ (mult:VF_AVX512F
+ (match_operand:VF_AVX512F 1 "nonimmediate_operand")
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand")))]
+ "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
-(define_insn "*mul<mode>3"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (mult:VF
- (match_operand:VF 1 "nonimmediate_operand" "%0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+(define_insn "*mul<mode>3<mask_name><round_name>"
+ [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
+ (mult:VF_AVX512F
+ (match_operand:VF_AVX512F 1 "nonimmediate_operand" "%0,v")
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,<round_constraint>")))]
+ "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
mul<ssemodesuffix>\t{%2, %0|%0, %2}
- vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "ssemul")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set_attr "btver2_decode" "direct,double")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vmmul<mode>3"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
- (mult:VF_128
- (match_operand:VF_128 1 "register_operand" "0,x")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
+ (multdiv:VF_128
+ (match_operand:VF_128 1 "register_operand" "0,v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
"@
- mul<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
- vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+ <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
+ v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_mask_scalar_op3>}"
[(set_attr "isa" "noavx,avx")
- (set_attr "type" "ssemul")
- (set_attr "prefix" "orig,vex")
+ (set_attr "type" "sse<multdiv_mnemonic>")
+ (set_attr "prefix" "<mask_scalar_prefix>")
+ (set_attr "btver2_decode" "direct,double")
(set_attr "mode" "<ssescalarmode>")])
(define_expand "div<mode>3"
- [(set (match_operand:VF2 0 "register_operand")
- (div:VF2 (match_operand:VF2 1 "register_operand")
- (match_operand:VF2 2 "nonimmediate_operand")))]
+ [(set (match_operand:VF2_AVX512F 0 "register_operand")
+ (div:VF2_AVX512F
+ (match_operand:VF2_AVX512F 1 "register_operand")
+ (match_operand:VF2_AVX512F 2 "nonimmediate_operand")))]
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
(define_expand "div<mode>3"
- [(set (match_operand:VF1 0 "register_operand")
- (div:VF1 (match_operand:VF1 1 "register_operand")
- (match_operand:VF1 2 "nonimmediate_operand")))]
+ [(set (match_operand:VF1_AVX512F 0 "register_operand")
+ (div:VF1_AVX512F
+ (match_operand:VF1_AVX512F 1 "register_operand")
+ (match_operand:VF1_AVX512F 2 "nonimmediate_operand")))]
"TARGET_SSE"
{
ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
@@ -959,38 +1408,20 @@
}
})
-(define_insn "<sse>_div<mode>3"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (div:VF
- (match_operand:VF 1 "register_operand" "0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE"
+(define_insn "<sse>_div<mode>3<mask_name><round_name>"
+ [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
+ (div:VF_AVX512F
+ (match_operand:VF_AVX512F 1 "register_operand" "0,v")
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,<round_constraint>")))]
+ "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
div<ssemodesuffix>\t{%2, %0|%0, %2}
- vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "ssediv")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vmdiv<mode>3"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x")
- (vec_merge:VF_128
- (div:VF_128
- (match_operand:VF_128 1 "register_operand" "0,x")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
- (match_dup 1)
- (const_int 1)))]
- "TARGET_SSE"
- "@
- div<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
- vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
- [(set_attr "isa" "noavx,avx")
- (set_attr "type" "ssediv")
- (set_attr "prefix" "orig,vex")
- (set_attr "btver2_decode" "direct,double")
- (set_attr "mode" "<ssescalarmode>")])
-
(define_insn "<sse>_rcp<mode>2"
[(set (match_operand:VF1 0 "register_operand" "=x")
(unspec:VF1
@@ -1021,14 +1452,40 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "SF")])
+(define_insn "<mask_codefor>rcp14<mode><mask_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+ UNSPEC_RCP14))]
+ "TARGET_AVX512F"
+ "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<mask_scalar_codefor>srcp14<mode><mask_scalar_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
+ UNSPEC_RCP14)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
(define_expand "sqrt<mode>2"
- [(set (match_operand:VF2 0 "register_operand")
- (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
+ [(set (match_operand:VF2_AVX512F 0 "register_operand")
+ (sqrt:VF2_AVX512F (match_operand:VF2_AVX512F 1 "nonimmediate_operand")))]
"TARGET_SSE2")
(define_expand "sqrt<mode>2"
- [(set (match_operand:VF1 0 "register_operand")
- (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
+ [(set (match_operand:VF1_AVX512F 0 "register_operand")
+ (sqrt:VF1_AVX512F (match_operand:VF1_AVX512F 1 "nonimmediate_operand")))]
"TARGET_SSE"
{
if (TARGET_SSE_MATH
@@ -1042,33 +1499,33 @@
}
})
-(define_insn "<sse>_sqrt<mode>2"
- [(set (match_operand:VF 0 "register_operand" "=x")
- (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
+(define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
+ [(set (match_operand:VF_AVX512F 0 "register_operand" "=v")
+ (sqrt:VF_AVX512F (match_operand:VF_AVX512F 1 "nonimmediate_operand" "<round_constraint>")))]
+ "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "sse")
(set_attr "atom_sse_attr" "sqrt")
(set_attr "btver2_sse_attr" "sqrt")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vmsqrt<mode>2"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+(define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(sqrt:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
(match_operand:VF_128 2 "register_operand" "0,x")
(const_int 1)))]
"TARGET_SSE"
"@
sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
- vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
+ vsqrt<ssescalarmodesuffix>\t{<round_mask_scalar_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_mask_scalar_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse")
(set_attr "atom_sse_attr" "sqrt")
+ (set_attr "prefix" "<mask_scalar_prefix>")
(set_attr "btver2_sse_attr" "sqrt")
- (set_attr "prefix" "orig,vex")
(set_attr "mode" "<ssescalarmode>")])
(define_expand "rsqrt<mode>2"
@@ -1091,6 +1548,32 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
+(define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+ UNSPEC_RSQRT14))]
+ "TARGET_AVX512F"
+ "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<mask_scalar_codefor>rsqrt14<mode><mask_scalar_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
+ UNSPEC_RSQRT14)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "sse_vmrsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_merge:V4SF
@@ -1111,65 +1594,67 @@
;; isn't really correct, as those rtl operators aren't defined when
;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
-(define_expand "<code><mode>3"
- [(set (match_operand:VF 0 "register_operand")
- (smaxmin:VF
- (match_operand:VF 1 "nonimmediate_operand")
- (match_operand:VF 2 "nonimmediate_operand")))]
- "TARGET_SSE"
+(define_expand "<code><mode>3<mask_name><round_saeonly_name>"
+ [(set (match_operand:VF_AVX512F 0 "register_operand")
+ (smaxmin:VF_AVX512F
+ (match_operand:VF_AVX512F 1 "nonimmediate_operand")
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand")))]
+ "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
{
if (!flag_finite_math_only)
operands[1] = force_reg (<MODE>mode, operands[1]);
ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
})
-(define_insn "*<code><mode>3_finite"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (smaxmin:VF
- (match_operand:VF 1 "nonimmediate_operand" "%0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
+(define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
+ [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
+ (smaxmin:VF_AVX512F
+ (match_operand:VF_AVX512F 1 "nonimmediate_operand" "%0,v")
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>")))]
"TARGET_SSE && flag_finite_math_only
- && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
"@
<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
- v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
(set_attr "btver2_sse_attr" "maxmin")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<MODE>")])
-(define_insn "*<code><mode>3"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (smaxmin:VF
- (match_operand:VF 1 "register_operand" "0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE && !flag_finite_math_only"
+(define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
+ [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
+ (smaxmin:VF_AVX512F
+ (match_operand:VF_AVX512F 1 "register_operand" "0,v")
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>")))]
+ "TARGET_SSE && !flag_finite_math_only
+ && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
"@
<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
- v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
(set_attr "btver2_sse_attr" "maxmin")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vm<code><mode>3"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+(define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(smaxmin:VF_128
- (match_operand:VF_128 1 "register_operand" "0,x")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:VF_128 1 "register_operand" "0,v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
"@
<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
- v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+ v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_mask_scalar_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse")
(set_attr "btver2_sse_attr" "maxmin")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_scalar_prefix>")
(set_attr "mode" "<ssescalarmode>")])
;; These versions of the min/max patterns implement exactly the operations
@@ -1476,6 +1961,15 @@
(set_attr "prefix_rep" "1,*")
(set_attr "mode" "V4SF")])
+(define_expand "reduc_splus_v8df"
+ [(match_operand:V8DF 0 "register_operand")
+ (match_operand:V8DF 1 "register_operand")]
+ "TARGET_AVX512F"
+{
+ ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
+ DONE;
+})
+
(define_expand "reduc_splus_v4df"
[(match_operand:V4DF 0 "register_operand")
(match_operand:V4DF 1 "register_operand")]
@@ -1498,6 +1992,15 @@
DONE;
})
+(define_expand "reduc_splus_v16sf"
+ [(match_operand:V16SF 0 "register_operand")
+ (match_operand:V16SF 1 "register_operand")]
+ "TARGET_AVX512F"
+{
+ ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
+ DONE;
+})
+
(define_expand "reduc_splus_v8sf"
[(match_operand:V8SF 0 "register_operand")
(match_operand:V8SF 1 "register_operand")]
@@ -1533,7 +2036,9 @@
[(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
(V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
- (V4SF "TARGET_SSE")])
+ (V4SF "TARGET_SSE") (V16SI "TARGET_AVX512F")
+ (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V8DF "TARGET_AVX512F")])
(define_expand "reduc_<code>_<mode>"
[(smaxmin:REDUC_SMINMAX_MODE
@@ -1546,6 +2051,16 @@
})
(define_expand "reduc_<code>_<mode>"
+ [(umaxmin:VI48_512
+ (match_operand:VI48_512 0 "register_operand")
+ (match_operand:VI48_512 1 "register_operand"))]
+ "TARGET_AVX512F"
+{
+ ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "reduc_<code>_<mode>"
[(umaxmin:VI_256
(match_operand:VI_256 0 "register_operand")
(match_operand:VI_256 1 "register_operand"))]
@@ -1651,17 +2166,95 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<ssescalarmode>")])
-(define_insn "<sse>_comi"
+(define_mode_attr cmp_imm_predicate
+ [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
+ (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")])
+
+(define_insn "avx512f_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI48F_512 1 "register_operand" "v")
+ (match_operand:VI48F_512 2 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_PCMP))]
+ "TARGET_AVX512F && <round_saeonly_mode512bit_condition_op1>"
+ "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_ucmp<mode>3<mask_scalar_merge_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI48_512 1 "register_operand" "v")
+ (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ UNSPEC_UNSIGNED_PCMP))]
+ "TARGET_AVX512F"
+ "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (and:<avx512fmaskmode>
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (match_operand:SI 3 "const_0_to_31_operand" "n")]
+ UNSPEC_PCMP)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (and:<avx512fmaskmode>
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (match_operand:SI 3 "const_0_to_31_operand" "n")]
+ UNSPEC_PCMP)
+ (and:<avx512fmaskmode>
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")
+ (const_int 1))))]
+ "TARGET_AVX512F"
+ "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx512f_maskcmp<mode>3"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
+ [(match_operand:VF_AVX512F 1 "register_operand" "v")
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "vm")]))]
+ "TARGET_SSE"
+ "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<sse>_comi<round_saeonly_name>"
[(set (reg:CCFP FLAGS_REG)
(compare:CCFP
(vec_select:MODEF
- (match_operand:<ssevecmode> 0 "register_operand" "x")
+ (match_operand:<ssevecmode> 0 "register_operand" "v")
(parallel [(const_int 0)]))
(vec_select:MODEF
- (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
+ (match_operand:<ssevecmode> 1 "nonimmediate_operand" "<round_saeonly_constraint>")
(parallel [(const_int 0)]))))]
"SSE_FLOAT_MODE_P (<MODE>mode)"
- "%vcomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
+ "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
[(set_attr "type" "ssecomi")
(set_attr "prefix" "maybe_vex")
(set_attr "prefix_rep" "0")
@@ -1671,17 +2264,17 @@
(const_string "0")))
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_ucomi"
+(define_insn "<sse>_ucomi<round_saeonly_name>"
[(set (reg:CCFPU FLAGS_REG)
(compare:CCFPU
(vec_select:MODEF
- (match_operand:<ssevecmode> 0 "register_operand" "x")
+ (match_operand:<ssevecmode> 0 "register_operand" "v")
(parallel [(const_int 0)]))
(vec_select:MODEF
- (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
+ (match_operand:<ssevecmode> 1 "nonimmediate_operand" "<round_saeonly_constraint>")
(parallel [(const_int 0)]))))]
"SSE_FLOAT_MODE_P (<MODE>mode)"
- "%vucomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
+ "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
[(set_attr "type" "ssecomi")
(set_attr "prefix" "maybe_vex")
(set_attr "prefix_rep" "0")
@@ -1691,6 +2284,23 @@
(const_string "0")))
(set_attr "mode" "<MODE>")])
+(define_expand "vcond<V_512:mode><VF_512:mode>"
+ [(set (match_operand:V_512 0 "register_operand")
+ (if_then_else:V_512
+ (match_operator 3 ""
+ [(match_operand:VF_512 4 "nonimmediate_operand")
+ (match_operand:VF_512 5 "nonimmediate_operand")])
+ (match_operand:V_512 1 "general_operand")
+ (match_operand:V_512 2 "general_operand")))]
+ "TARGET_AVX512F
+ && (GET_MODE_NUNITS (<V_512:MODE>mode)
+ == GET_MODE_NUNITS (<VF_512:MODE>mode))"
+{
+ bool ok = ix86_expand_fp_vcond (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
(define_expand "vcond<V_256:mode><VF_256:mode>"
[(set (match_operand:V_256 0 "register_operand")
(if_then_else:V_256
@@ -1732,11 +2342,11 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "<sse>_andnot<mode>3"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (and:VF
- (not:VF
- (match_operand:VF 1 "register_operand" "0,x"))
- (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
+ [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
+ (and:VF_AVX512F
+ (not:VF_AVX512F
+ (match_operand:VF_AVX512F 1 "register_operand" "0,v"))
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
"TARGET_SSE"
{
static char buf[32];
@@ -1765,12 +2375,19 @@
gcc_unreachable ();
}
+ /* There is no vandnp[sd]. Use vpandnq. */
+ if (GET_MODE_SIZE (<MODE>mode) == 64)
+ {
+ suffix = "q";
+ ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+ }
+
snprintf (buf, sizeof (buf), ops, suffix);
return buf;
}
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set (attr "mode")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "<ssePSmode>")
@@ -1789,11 +2406,19 @@
"TARGET_SSE"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+(define_expand "<code><mode>3"
+ [(set (match_operand:VF_512 0 "register_operand")
+ (fpint_logic:VF_512
+ (match_operand:VF_512 1 "nonimmediate_operand")
+ (match_operand:VF_512 2 "nonimmediate_operand")))]
+ "TARGET_AVX512F"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
(define_insn "*<code><mode>3"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (any_logic:VF
- (match_operand:VF 1 "nonimmediate_operand" "%0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
+ [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
+ (any_logic:VF_AVX512F
+ (match_operand:VF_AVX512F 1 "nonimmediate_operand" "%0,v")
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
"TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
{
static char buf[32];
@@ -1822,12 +2447,19 @@
gcc_unreachable ();
}
+ /* There is no v<logic>p[sd]. Use vp<logic>q. */
+ if (GET_MODE_SIZE (<MODE>mode) == 64)
+ {
+ suffix = "q";
+ ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+ }
+
snprintf (buf, sizeof (buf), ops, suffix);
return buf;
}
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set (attr "mode")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "<ssePSmode>")
@@ -1840,14 +2472,14 @@
(define_expand "copysign<mode>3"
[(set (match_dup 4)
- (and:VF
- (not:VF (match_dup 3))
- (match_operand:VF 1 "nonimmediate_operand")))
+ (and:VF_AVX512F
+ (not:VF_AVX512F (match_dup 3))
+ (match_operand:VF_AVX512F 1 "nonimmediate_operand")))
(set (match_dup 5)
- (and:VF (match_dup 3)
- (match_operand:VF 2 "nonimmediate_operand")))
- (set (match_operand:VF 0 "register_operand")
- (ior:VF (match_dup 4) (match_dup 5)))]
+ (and:VF_AVX512F (match_dup 3)
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand")))
+ (set (match_operand:VF_AVX512F 0 "register_operand")
+ (ior:VF_AVX512F (match_dup 4) (match_dup 5)))]
"TARGET_SSE"
{
operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
@@ -2045,6 +2677,23 @@
]
(const_string "TI")))])
+;; There are no floating point xor for V16SF and V8DF in avx512f
+;; but we need them for negation. Instead we use int versions of
+;; xor. Maybe there could be a better way to do that.
+
+(define_mode_attr avx512flogicsuff
+ [(V16SF "d") (V8DF "q")])
+
+(define_insn "avx512f_<logic><mode>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (fpint_logic:VF_512
+ (match_operand:VF_512 1 "register_operand" "v")
+ (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512F"
+ "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; FMA floating point multiply/accumulate instructions. These include
@@ -2053,9 +2702,18 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The standard names for scalar FMA are only available with SSE math enabled.
-(define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH")
- (DF "TARGET_SSE_MATH")
- V4SF V2DF V8SF V4DF])
+;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
+;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
+;; and TARGET_FMA4 are both false.
+(define_mode_iterator FMAMODEM
+ [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
+ (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
+ (V4SF "TARGET_FMA || TARGET_FMA4")
+ (V2DF "TARGET_FMA || TARGET_FMA4")
+ (V8SF "TARGET_FMA || TARGET_FMA4")
+ (V4DF "TARGET_FMA || TARGET_FMA4")
+ (V16SF "TARGET_AVX512F")
+ (V8DF "TARGET_AVX512F")])
(define_expand "fma<mode>4"
[(set (match_operand:FMAMODEM 0 "register_operand")
@@ -2063,7 +2721,7 @@
(match_operand:FMAMODEM 1 "nonimmediate_operand")
(match_operand:FMAMODEM 2 "nonimmediate_operand")
(match_operand:FMAMODEM 3 "nonimmediate_operand")))]
- "TARGET_FMA || TARGET_FMA4")
+ "")
(define_expand "fms<mode>4"
[(set (match_operand:FMAMODEM 0 "register_operand")
@@ -2071,7 +2729,7 @@
(match_operand:FMAMODEM 1 "nonimmediate_operand")
(match_operand:FMAMODEM 2 "nonimmediate_operand")
(neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
- "TARGET_FMA || TARGET_FMA4")
+ "")
(define_expand "fnma<mode>4"
[(set (match_operand:FMAMODEM 0 "register_operand")
@@ -2079,7 +2737,7 @@
(neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
(match_operand:FMAMODEM 2 "nonimmediate_operand")
(match_operand:FMAMODEM 3 "nonimmediate_operand")))]
- "TARGET_FMA || TARGET_FMA4")
+ "")
(define_expand "fnms<mode>4"
[(set (match_operand:FMAMODEM 0 "register_operand")
@@ -2087,10 +2745,17 @@
(neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
(match_operand:FMAMODEM 2 "nonimmediate_operand")
(neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
- "TARGET_FMA || TARGET_FMA4")
+ "")
;; The builtins for intrinsics are not constrained by SSE math enabled.
-(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
+(define_mode_iterator FMAMODE [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+ (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+ (V4SF "TARGET_FMA || TARGET_FMA4")
+ (V2DF "TARGET_FMA || TARGET_FMA4")
+ (V8SF "TARGET_FMA || TARGET_FMA4")
+ (V4DF "TARGET_FMA || TARGET_FMA4")
+ (V16SF "TARGET_AVX512F")
+ (V8DF "TARGET_AVX512F")])
(define_expand "fma4i_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand")
@@ -2098,77 +2763,227 @@
(match_operand:FMAMODE 1 "nonimmediate_operand")
(match_operand:FMAMODE 2 "nonimmediate_operand")
(match_operand:FMAMODE 3 "nonimmediate_operand")))]
- "TARGET_FMA || TARGET_FMA4")
+ "")
+
+(define_expand "avx512f_fmadd_<mode>_maskz<round_expand_name>"
+ [(match_operand:VF_512 0 "register_operand")
+ (match_operand:VF_512 1 "<round_expand_predicate>")
+ (match_operand:VF_512 2 "<round_expand_predicate>")
+ (match_operand:VF_512 3 "<round_expand_predicate>")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
+ "TARGET_AVX512F"
+{
+ emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
+ operands[0], operands[1], operands[2], operands[3],
+ CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
+ DONE;
+})
-(define_insn "*fma_fmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
+(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
- "TARGET_FMA || TARGET_FMA4"
- "@
- vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x")))]
+ "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "@
+ vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (match_operand:VF_512 1 "register_operand" "0,0")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>"))
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+ "@
+ vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+ vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma_fmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
+(define_insn "avx512f_fmadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=x")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (match_operand:VF_512 1 "register_operand" "x")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_512 3 "register_operand" "0"))
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+ [(set_attr "isa" "fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
(neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
- "TARGET_FMA || TARGET_FMA4"
+ (match_operand:FMAMODE 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x"))))]
+ "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
- vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (match_operand:VF_512 1 "register_operand" "0,0")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_512
+ (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>")))
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+ "@
+ vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+ vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (match_operand:VF_512 1 "register_operand" "v")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_512
+ (match_operand:VF_512 3 "register_operand" "0")))
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+ [(set_attr "isa" "fma_avx512f")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma_fnmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
+(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE
(neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
- "TARGET_FMA || TARGET_FMA4"
- "@
- vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x")))]
+ "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "@
+ vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma_fnmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
+(define_insn "avx512f_fnmadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (neg:VF_512
+ (match_operand:VF_512 1 "register_operand" "0,0"))
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>"))
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+ "@
+ vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+ vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fnmadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (neg:VF_512
+ (match_operand:VF_512 1 "register_operand" "v"))
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_512 3 "register_operand" "0"))
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+ [(set_attr "isa" "fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE
(neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
(neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
- "TARGET_FMA || TARGET_FMA4"
+ (match_operand:FMAMODE 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x"))))]
+ "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
- vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fnmsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (neg:VF_512
+ (match_operand:VF_512 1 "register_operand" "0,0"))
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_512
+ (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>")))
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+ "@
+ vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+ vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fnmsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (neg:VF_512
+ (match_operand:VF_512 1 "register_operand" "v"))
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_512
+ (match_operand:VF_512 3 "register_operand" "0")))
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+ [(set_attr "isa" "fma_avx512f")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
@@ -2184,55 +2999,154 @@
;; But this doesn't seem useful in practice.
(define_expand "fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand")
- (match_operand:VF 2 "nonimmediate_operand")
- (match_operand:VF 3 "nonimmediate_operand")]
+ [(set (match_operand:VF_AVX512F 0 "register_operand")
+ (unspec:VF_AVX512F
+ [(match_operand:VF_AVX512F 1 "nonimmediate_operand")
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand")
+ (match_operand:VF_AVX512F 3 "nonimmediate_operand")]
UNSPEC_FMADDSUB))]
- "TARGET_FMA || TARGET_FMA4")
+ "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+
+(define_expand "avx512f_fmaddsub_<mode>_maskz<round_expand_name>"
+ [(match_operand:VF_512 0 "register_operand")
+ (match_operand:VF_512 1 "<round_expand_predicate>")
+ (match_operand:VF_512 2 "<round_expand_predicate>")
+ (match_operand:VF_512 3 "<round_expand_predicate>")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
+ "TARGET_AVX512F"
+{
+ emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
+ operands[0], operands[1], operands[2], operands[3],
+ CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
+ DONE;
+})
-(define_insn "*fma_fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
- (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")]
+(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
+ [(set (match_operand:VF_AVX512F 0 "register_operand" "=v,v,v,x,x")
+ (unspec:VF_AVX512F
+ [(match_operand:VF_AVX512F 1 "nonimmediate_operand" "%0,0,v,x,x")
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
+ (match_operand:VF_AVX512F 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x")]
UNSPEC_FMADDSUB))]
- "TARGET_FMA || TARGET_FMA4"
+ "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
- vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma_fmsubadd_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
- (neg:VF
- (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))]
+(define_insn "avx512f_fmaddsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+ (vec_merge:VF_512
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "register_operand" "0,0")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>")]
+ UNSPEC_FMADDSUB)
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+ "@
+ vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+ vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmaddsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (vec_merge:VF_512
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "register_operand" "v")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_512 3 "register_operand" "0")]
+ UNSPEC_FMADDSUB)
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+ [(set_attr "isa" "fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
+ [(set (match_operand:VF_AVX512F 0 "register_operand" "=v,v,v,x,x")
+ (unspec:VF_AVX512F
+ [(match_operand:VF_AVX512F 1 "nonimmediate_operand" "%0,0,v,x,x")
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
+ (neg:VF_AVX512F
+ (match_operand:VF_AVX512F 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x"))]
UNSPEC_FMADDSUB))]
- "TARGET_FMA || TARGET_FMA4"
+ "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
- vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmsubadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+ (vec_merge:VF_512
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "register_operand" "0,0")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_512
+ (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>"))]
+ UNSPEC_FMADDSUB)
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+ "@
+ vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+ vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmsubadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (vec_merge:VF_512
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "register_operand" "v")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_512
+ (match_operand:VF_512 3 "register_operand" "0"))]
+ UNSPEC_FMADDSUB)
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+ [(set_attr "isa" "fma_avx512f")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
;; FMA3 floating point scalar intrinsics. These merge result with
;; high-order elements from the destination register.
-(define_expand "fmai_vmfmadd_<mode>"
+(define_expand "fmai_vmfmadd_<mode>_maskz<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand")
+ (match_operand:VF_128 2 "nonimmediate_operand")
+ (match_operand:VF_128 3 "nonimmediate_operand"))
+ (match_dup <round_opnum>)
+ (match_operand:QI 4 "register_operand"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "operands[<round_opnum>] = CONST0_RTX (<MODE>mode);")
+
+(define_expand "fmai_vmfmadd_<mode><round_name>"
[(set (match_operand:VF_128 0 "register_operand")
(vec_merge:VF_128
(fma:VF_128
@@ -2243,71 +3157,303 @@
(const_int 1)))]
"TARGET_FMA")
-(define_insn "*fmai_fmadd_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+(define_insn "fmai_vmfmadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>, v")
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
+ (match_dup 1)
+ (match_operand:QI 4 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+ vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fmai_vmfmadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "%v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_128 3 "register_operand" "0"))
+ (match_dup 3)
+ (match_operand:QI 4 "register_operand" "k"))
+ (match_dup 3)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmadd_<mode>_maskz<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
+ (match_operand:VF_128 4 "const0_operand")
+ (match_operand:QI 5 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %3, %2<round_op6>}
+ vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %2, %3<round_op6>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmadd_<mode><round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
(vec_merge:VF_128
(fma:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
(match_dup 1)
(const_int 1)))]
- "TARGET_FMA"
+ "TARGET_FMA || TARGET_AVX512F"
"@
- vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
- vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
+ vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+ vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fmai_fmsub_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+(define_insn "*fmai_fmsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_128
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
+ (match_dup 1)
+ (match_operand:QI 4 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
+ vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fmai_vmfmsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "%v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_128
+ (match_operand:VF_128 3 "register_operand" "0")))
+ (match_dup 3)
+ (match_operand:QI 4 "register_operand" "k"))
+ (match_dup 3)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>1, %<iptr>2<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmsub_<mode>_maskz<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_128
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
+ (match_operand:VF_128 4 "const0_operand")
+ (match_operand:QI 5 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2<round_op6>}
+ vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3<round_op6>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_vmfnmadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (neg:VF_128
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
+ (match_dup 1)
+ (match_operand:QI 4 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
+ vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_vmfnmadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (neg:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "%v"))
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_128 3 "register_operand" "0"))
+ (match_dup 3)
+ (match_operand:QI 4 "register_operand" "k"))
+ (match_dup 3)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>1, %<iptr>2<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_vmfnmadd_<mode>_maskz<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (neg:VF_128
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
+ (match_operand:VF_128 4 "const0_operand")
+ (match_operand:QI 5 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2<round_op6>}
+ vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3<round_op6>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fnmsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (neg:VF_128
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (neg:VF_128
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
+ (match_dup 1)
+ (match_operand:QI 4 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
+ vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fnmsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (neg:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "%v"))
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_128
+ (match_operand:VF_128 3 "register_operand" "0")))
+ (match_dup 3)
+ (match_operand:QI 4 "register_operand" "k"))
+ (match_dup 3)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>1, %<iptr>2<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fnmsub_<mode>_maskz<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (neg:VF_128
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (neg:VF_128
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
+ (match_operand:VF_128 4 "const0_operand")
+ (match_operand:QI 5 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2<round_op6>}
+ vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3<round_op6>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmsub_<mode><round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
(vec_merge:VF_128
(fma:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v")
(neg:VF_128
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
(match_dup 1)
(const_int 1)))]
- "TARGET_FMA"
+ "TARGET_FMA || TARGET_AVX512F"
"@
- vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
- vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
+ vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+ vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fmai_fnmadd_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+(define_insn "*fmai_fnmadd_<mode><round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
(vec_merge:VF_128
(fma:VF_128
(neg:VF_128
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
- (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
(match_dup 1)
(const_int 1)))]
- "TARGET_FMA"
+ "TARGET_FMA || TARGET_AVX512F"
"@
- vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
- vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
+ vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+ vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fmai_fnmsub_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+(define_insn "*fmai_fnmsub_<mode><round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
(vec_merge:VF_128
(fma:VF_128
(neg:VF_128
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
- (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
(neg:VF_128
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
(match_dup 1)
(const_int 1)))]
- "TARGET_FMA"
+ "TARGET_FMA || TARGET_AVX512F"
"@
- vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
- vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
+ vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+ vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
@@ -2428,39 +3574,39 @@
(set_attr "prefix_rep" "0")
(set_attr "mode" "SF")])
-(define_insn "sse_cvtsi2ss"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
+(define_insn "sse_cvtsi2ss<round_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
(vec_merge:V4SF
(vec_duplicate:V4SF
- (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
- (match_operand:V4SF 1 "register_operand" "0,0,x")
+ (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,<round_constraint3>")))
+ (match_operand:V4SF 1 "register_operand" "0,0,v")
(const_int 1)))]
"TARGET_SSE"
"@
cvtsi2ss\t{%2, %0|%0, %2}
cvtsi2ss\t{%2, %0|%0, %2}
- vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
+ vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "vector,double,*")
(set_attr "amdfam10_decode" "vector,double,*")
(set_attr "bdver1_decode" "double,direct,*")
(set_attr "btver2_decode" "double,double,double")
- (set_attr "prefix" "orig,orig,vex")
+ (set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "SF")])
-(define_insn "sse_cvtsi2ssq"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
+(define_insn "sse_cvtsi2ssq<round_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
(vec_merge:V4SF
(vec_duplicate:V4SF
- (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
- (match_operand:V4SF 1 "register_operand" "0,0,x")
+ (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,<round_constraint3>")))
+ (match_operand:V4SF 1 "register_operand" "0,0,v")
(const_int 1)))]
"TARGET_SSE && TARGET_64BIT"
"@
cvtsi2ssq\t{%2, %0|%0, %2}
cvtsi2ssq\t{%2, %0|%0, %2}
- vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
+ vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "vector,double,*")
@@ -2469,18 +3615,18 @@
(set_attr "btver2_decode" "double,double,double")
(set_attr "length_vex" "*,*,4")
(set_attr "prefix_rex" "1,1,*")
- (set_attr "prefix" "orig,orig,vex")
+ (set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "SF")])
-(define_insn "sse_cvtss2si"
+(define_insn "sse_cvtss2si<round_name>"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(unspec:SI
[(vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+ (match_operand:V4SF 1 "nonimmediate_operand" "v,<round_constraint2>")
(parallel [(const_int 0)]))]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE"
- "%vcvtss2si\t{%1, %0|%0, %k1}"
+ "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "bdver1_decode" "double,double")
@@ -2490,7 +3636,7 @@
(define_insn "sse_cvtss2si_2"
[(set (match_operand:SI 0 "register_operand" "=r,r")
- (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+ (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE"
"%vcvtss2si\t{%1, %0|%0, %k1}"
@@ -2502,15 +3648,15 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
-(define_insn "sse_cvtss2siq"
+(define_insn "sse_cvtss2siq<round_name>"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(unspec:DI
[(vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+ (match_operand:V4SF 1 "nonimmediate_operand" "v,<round_constraint2>")
(parallel [(const_int 0)]))]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE && TARGET_64BIT"
- "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
+ "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "bdver1_decode" "double,double")
@@ -2520,7 +3666,7 @@
(define_insn "sse_cvtss2siq_2"
[(set (match_operand:DI 0 "register_operand" "=r,r")
- (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+ (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE && TARGET_64BIT"
"%vcvtss2si{q}\t{%1, %0|%0, %k1}"
@@ -2532,14 +3678,14 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
-(define_insn "sse_cvttss2si"
+(define_insn "sse_cvttss2si<round_saeonly_name>"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(fix:SI
(vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+ (match_operand:V4SF 1 "nonimmediate_operand" "v,<round_saeonly_constraint2>")
(parallel [(const_int 0)]))))]
"TARGET_SSE"
- "%vcvttss2si\t{%1, %0|%0, %k1}"
+ "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
@@ -2548,14 +3694,14 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
-(define_insn "sse_cvttss2siq"
+(define_insn "sse_cvttss2siq<round_saeonly_name>"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(fix:DI
(vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+ (match_operand:V4SF 1 "nonimmediate_operand" "v,<round_saeonly_constraint>")
(parallel [(const_int 0)]))))]
"TARGET_SSE && TARGET_64BIT"
- "%vcvttss2si{q}\t{%1, %0|%0, %k1}"
+ "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
@@ -2564,18 +3710,56 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
-(define_insn "float<sseintvecmodelower><mode>2"
- [(set (match_operand:VF1 0 "register_operand" "=x")
- (float:VF1
- (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2"
- "%vcvtdq2ps\t{%1, %0|%0, %1}"
+(define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_duplicate:VF_128
+ (unsigned_float:<ssescalarmode>
+ (match_operand:SI 2 "nonimmediate_operand" "<round_constraint3>")))
+ (match_operand:VF_128 1 "register_operand" "v")
+ (const_int 1)))]
+ "TARGET_AVX512F && <round_modev4sf_condition>"
+ "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_duplicate:VF_128
+ (unsigned_float:<ssescalarmode>
+ (match_operand:DI 2 "nonimmediate_operand" "<round_constraint3>")))
+ (match_operand:VF_128 1 "register_operand" "v")
+ (const_int 1)))]
+ "TARGET_AVX512F && TARGET_64BIT"
+ "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
+ [(set (match_operand:VF1_AVX512F 0 "register_operand" "=v")
+ (float:VF1_AVX512F
+ (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
+ "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "ufloatv16siv16sf2<mask_name><round_name>"
+ [(set (match_operand:V16SF 0 "register_operand" "=v")
+ (unsigned_float:V16SF
+ (match_operand:V16SI 1 "nonimmediate_operand" "<round_constraint>")))]
+ "TARGET_AVX512F"
+ "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V16SF")])
+
(define_expand "floatuns<sseintvecmodelower><mode>2"
- [(match_operand:VF1 0 "register_operand")
+ [(match_operand:VF1_AVX512F 0 "register_operand")
(match_operand:<sseintvecmode> 1 "register_operand")]
"TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
{
@@ -2583,20 +3767,15 @@
DONE;
})
-(define_insn "avx_cvtps2dq256"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
- (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
- UNSPEC_FIX_NOTRUNC))]
- "TARGET_AVX"
- "vcvtps2dq\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
- (set_attr "mode" "OI")])
+;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
+(define_mode_attr sf2simodelower
+ [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
-(define_insn "sse2_cvtps2dq"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
- (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
- UNSPEC_FIX_NOTRUNC))]
+(define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode>"
+ [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
+ (unspec:VI4_AVX
+ [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
+ UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE2"
"%vcvtps2dq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
@@ -2606,7 +3785,39 @@
(const_string "*")
(const_string "1")))
(set_attr "prefix" "maybe_vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
+ [(set (match_operand:V16SI 0 "register_operand" "=v")
+ (unspec:V16SI
+ [(match_operand:V16SF 1 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX512F"
+ "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name><round_name>"
+ [(set (match_operand:V16SI 0 "register_operand" "=v")
+ (unspec:V16SI
+ [(match_operand:V16SF 1 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+ "TARGET_AVX512F"
+ "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
+ [(set (match_operand:V16SI 0 "register_operand" "=v")
+ (any_fix:V16SI
+ (match_operand:V16SF 1 "nonimmediate_operand" "<round_saeonly_constraint>")))]
+ "TARGET_AVX512F"
+ "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
(define_insn "fix_truncv8sfv8si2"
[(set (match_operand:V8SI 0 "register_operand" "=x")
@@ -2639,7 +3850,7 @@
(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
[(match_operand:<sseintvecmode> 0 "register_operand")
- (match_operand:VF1 1 "register_operand")]
+ (match_operand:VF1_AVX512F 1 "register_operand")]
"TARGET_SSE2"
{
rtx tmp[3];
@@ -2711,18 +3922,18 @@
(set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "DF")])
-(define_insn "sse2_cvtsi2sdq"
- [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
+(define_insn "sse2_cvtsi2sdq<round_name>"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
(vec_merge:V2DF
(vec_duplicate:V2DF
- (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
- (match_operand:V2DF 1 "register_operand" "0,0,x")
+ (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,<round_constraint3>")))
+ (match_operand:V2DF 1 "register_operand" "0,0,v")
(const_int 1)))]
"TARGET_SSE2 && TARGET_64BIT"
"@
cvtsi2sdq\t{%2, %0|%0, %2}
cvtsi2sdq\t{%2, %0|%0, %2}
- vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
+ vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,direct,*")
@@ -2730,18 +3941,118 @@
(set_attr "bdver1_decode" "double,direct,*")
(set_attr "length_vex" "*,*,4")
(set_attr "prefix_rex" "1,1,*")
- (set_attr "prefix" "orig,orig,vex")
+ (set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "DF")])
-(define_insn "sse2_cvtsd2si"
+(define_insn "avx512f_vcvtss2usi<round_name>"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI
+ [(vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "<round_constraint>")
+ (parallel [(const_int 0)]))]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+ "TARGET_AVX512F"
+ "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "SI")])
+
+(define_insn "avx512f_vcvtss2usiq<round_name>"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI
+ [(vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "<round_constraint>")
+ (parallel [(const_int 0)]))]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+ "TARGET_AVX512F && TARGET_64BIT"
+ "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "DI")])
+
+(define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unsigned_fix:SI
+ (vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (parallel [(const_int 0)]))))]
+ "TARGET_AVX512F"
+ "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "SI")])
+
+(define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unsigned_fix:DI
+ (vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (parallel [(const_int 0)]))))]
+ "TARGET_AVX512F && TARGET_64BIT"
+ "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "DI")])
+
+(define_insn "avx512f_vcvtsd2usi<round_name>"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI
+ [(vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "<round_constraint>")
+ (parallel [(const_int 0)]))]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+ "TARGET_AVX512F"
+ "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "SI")])
+
+(define_insn "avx512f_vcvtsd2usiq<round_name>"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI
+ [(vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "<round_constraint>")
+ (parallel [(const_int 0)]))]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+ "TARGET_AVX512F && TARGET_64BIT"
+ "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "DI")])
+
+(define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unsigned_fix:SI
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (parallel [(const_int 0)]))))]
+ "TARGET_AVX512F"
+ "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "SI")])
+
+(define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unsigned_fix:DI
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (parallel [(const_int 0)]))))]
+ "TARGET_AVX512F && TARGET_64BIT"
+ "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvtsd2si<round_name>"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(unspec:SI
[(vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (match_operand:V2DF 1 "nonimmediate_operand" "v,<round_constraint2>")
(parallel [(const_int 0)]))]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE2"
- "%vcvtsd2si\t{%1, %0|%0, %q1}"
+ "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "bdver1_decode" "double,double")
@@ -2752,7 +4063,7 @@
(define_insn "sse2_cvtsd2si_2"
[(set (match_operand:SI 0 "register_operand" "=r,r")
- (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
+ (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE2"
"%vcvtsd2si\t{%1, %0|%0, %q1}"
@@ -2764,15 +4075,15 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
-(define_insn "sse2_cvtsd2siq"
+(define_insn "sse2_cvtsd2siq<round_name>"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(unspec:DI
[(vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (match_operand:V2DF 1 "nonimmediate_operand" "v,<round_constraint2>")
(parallel [(const_int 0)]))]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE2 && TARGET_64BIT"
- "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
+ "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "bdver1_decode" "double,double")
@@ -2782,7 +4093,7 @@
(define_insn "sse2_cvtsd2siq_2"
[(set (match_operand:DI 0 "register_operand" "=r,r")
- (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
+ (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE2 && TARGET_64BIT"
"%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
@@ -2794,14 +4105,14 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
-(define_insn "sse2_cvttsd2si"
+(define_insn "sse2_cvttsd2si<round_saeonly_name>"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(fix:SI
(vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (match_operand:V2DF 1 "nonimmediate_operand" "v,<round_saeonly_constraint2>")
(parallel [(const_int 0)]))))]
"TARGET_SSE2"
- "%vcvttsd2si\t{%1, %0|%0, %q1}"
+ "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
@@ -2811,14 +4122,14 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
-(define_insn "sse2_cvttsd2siq"
+(define_insn "sse2_cvttsd2siq<round_saeonly_name>"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(fix:DI
(vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (match_operand:V2DF 1 "nonimmediate_operand" "v,<round_saeonly_constraint2>")
(parallel [(const_int 0)]))))]
"TARGET_SSE2 && TARGET_64BIT"
- "%vcvttsd2si{q}\t{%1, %0|%0, %q1}"
+ "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
@@ -2827,14 +4138,45 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
-(define_insn "floatv4siv4df2"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
- (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+;; For float<si2dfmode><mode>2 insn pattern
+(define_mode_attr si2dfmode
+ [(V8DF "V8SI") (V4DF "V4SI")])
+(define_mode_attr si2dfmodelower
+ [(V8DF "v8si") (V4DF "v4si")])
+
+(define_insn "float<si2dfmodelower><mode>2<mask_name>"
+ [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
+ (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "ufloatv8siv8df<mask_name>"
+ [(set (match_operand:V8DF 0 "register_operand" "=v")
+ (unsigned_float:V8DF
+ (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512F"
+ "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V8DF")])
+
+(define_insn "avx512f_cvtdq2pd512_2"
+ [(set (match_operand:V8DF 0 "register_operand" "=v")
+ (float:V8DF
+ (vec_select:V8SI
+ (match_operand:V16SI 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
"TARGET_AVX"
- "vcvtdq2pd\t{%1, %0|%0, %1}"
+ "vcvtdq2pd\t{%t1, %0|%0, %t1}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V4DF")])
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V8DF")])
(define_insn "avx_cvtdq2pd256_2"
[(set (match_operand:V4DF 0 "register_operand" "=x")
@@ -2861,6 +4203,17 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V2DF")])
+(define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
+ (unspec:V8SI
+ [(match_operand:V8DF 1 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX512F"
+ "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "OI")])
+
(define_insn "avx_cvtpd2dq256"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
@@ -2924,6 +4277,27 @@
(set_attr "athlon_decode" "vector")
(set_attr "bdver1_decode" "double")])
+(define_insn "avx512f_ufix_notruncv8dfv8si<mask_name><round_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
+ (unspec:V8SI
+ [(match_operand:V8DF 1 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+ "TARGET_AVX512F"
+ "vcvtpd2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "OI")])
+
+(define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
+ (any_fix:V8SI
+ (match_operand:V8DF 1 "nonimmediate_operand" "<round_saeonly_constraint>")))]
+ "TARGET_AVX512F"
+ "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "OI")])
+
(define_insn "fix_truncv4dfv4si2"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
@@ -2980,51 +4354,61 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "sse2_cvtsd2ss"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
+(define_insn "sse2_cvtsd2ss<mask_scalar_name><round_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
(vec_merge:V4SF
(vec_duplicate:V4SF
(float_truncate:V2SF
- (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
- (match_operand:V4SF 1 "register_operand" "0,0,x")
+ (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
+ (match_operand:V4SF 1 "register_operand" "0,0,v")
(const_int 1)))]
"TARGET_SSE2"
"@
cvtsd2ss\t{%2, %0|%0, %2}
cvtsd2ss\t{%2, %0|%0, %q2}
- vcvtsd2ss\t{%2, %1, %0|%0, %1, %q2}"
+ vcvtsd2ss\t{<round_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %q2<round_mask_scalar_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecvt")
(set_attr "athlon_decode" "vector,double,*")
(set_attr "amdfam10_decode" "vector,double,*")
(set_attr "bdver1_decode" "direct,direct,*")
+ (set_attr "prefix" "orig,orig,<mask_scalar_prefix2>")
(set_attr "btver2_decode" "double,double,double")
- (set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "SF")])
-(define_insn "sse2_cvtss2sd"
- [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
+(define_insn "sse2_cvtss2sd<mask_scalar_name><round_saeonly_name>"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
(vec_merge:V2DF
(float_extend:V2DF
(vec_select:V2SF
- (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
+ (match_operand:V4SF 2 "nonimmediate_operand" "x,m,<round_saeonly_constraint>")
(parallel [(const_int 0) (const_int 1)])))
- (match_operand:V2DF 1 "register_operand" "0,0,x")
+ (match_operand:V2DF 1 "register_operand" "0,0,v")
(const_int 1)))]
"TARGET_SSE2"
"@
cvtss2sd\t{%2, %0|%0, %2}
cvtss2sd\t{%2, %0|%0, %k2}
- vcvtss2sd\t{%2, %1, %0|%0, %1, %k2}"
+ vcvtss2sd\t{<round_saeonly_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %k2<round_saeonly_mask_scalar_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecvt")
(set_attr "amdfam10_decode" "vector,double,*")
(set_attr "athlon_decode" "direct,direct,*")
(set_attr "bdver1_decode" "direct,direct,*")
(set_attr "btver2_decode" "double,double,double")
- (set_attr "prefix" "orig,orig,vex")
+ (set_attr "prefix" "orig,orig,<mask_scalar_prefix2>")
(set_attr "mode" "DF")])
+(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
+ [(set (match_operand:V8SF 0 "register_operand" "=v")
+ (float_truncate:V8SF
+ (match_operand:V8DF 1 "nonimmediate_operand" "<round_constraint>")))]
+ "TARGET_AVX512F"
+ "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V8SF")])
+
(define_insn "avx_cvtpd2ps256"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(float_truncate:V4SF
@@ -3066,15 +4450,19 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
-(define_insn "avx_cvtps2pd256"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
- (float_extend:V4DF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
- "TARGET_AVX"
- "vcvtps2pd\t{%1, %0|%0, %1}"
+;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
+(define_mode_attr sf2dfmode
+ [(V8DF "V8SF") (V4DF "V4SF")])
+
+(define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
+ [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
+ (float_extend:VF2_512_256
+ (match_operand:<sf2dfmode> 1 "nonimmediate_operand" "<round_saeonly_constraint>")))]
+ "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
+ "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V4DF")])
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")])
(define_insn "*avx_cvtps2pd256_2"
[(set (match_operand:V4DF 0 "register_operand" "=x")
@@ -3089,6 +4477,21 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
+(define_insn "vec_unpacks_lo_v16sf"
+ [(set (match_operand:V8DF 0 "register_operand" "=v")
+ (float_extend:V8DF
+ (vec_select:V8SF
+ (match_operand:V16SF 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX512F"
+ "vcvtps2pd\t{%t1, %0|%0, %t1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V8DF")])
+
(define_insn "sse2_cvtps2pd"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(float_extend:V2DF
@@ -3133,6 +4536,20 @@
"TARGET_AVX"
"operands[2] = gen_reg_rtx (V4SFmode);")
+(define_expand "vec_unpacks_hi_v16sf"
+ [(set (match_dup 2)
+ (vec_select:V8SF
+ (match_operand:V16SF 1 "nonimmediate_operand")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))
+ (set (match_operand:V8DF 0 "register_operand")
+ (float_extend:V8DF
+ (match_dup 2)))]
+"TARGET_AVX512F"
+"operands[2] = gen_reg_rtx (V8SFmode);")
+
(define_expand "vec_unpacks_lo_v4sf"
[(set (match_operand:V2DF 0 "register_operand")
(float_extend:V2DF
@@ -3151,11 +4568,12 @@
"TARGET_AVX")
(define_mode_attr sseunpackfltmode
- [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
+ [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
+ (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
(define_expand "vec_unpacks_float_hi_<mode>"
[(match_operand:<sseunpackfltmode> 0 "register_operand")
- (match_operand:VI2_AVX2 1 "register_operand")]
+ (match_operand:VI2_AVX512F 1 "register_operand")]
"TARGET_SSE2"
{
rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
@@ -3168,7 +4586,7 @@
(define_expand "vec_unpacks_float_lo_<mode>"
[(match_operand:<sseunpackfltmode> 0 "register_operand")
- (match_operand:VI2_AVX2 1 "register_operand")]
+ (match_operand:VI2_AVX512F 1 "register_operand")]
"TARGET_SSE2"
{
rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
@@ -3181,7 +4599,7 @@
(define_expand "vec_unpacku_float_hi_<mode>"
[(match_operand:<sseunpackfltmode> 0 "register_operand")
- (match_operand:VI2_AVX2 1 "register_operand")]
+ (match_operand:VI2_AVX512F 1 "register_operand")]
"TARGET_SSE2"
{
rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
@@ -3194,7 +4612,7 @@
(define_expand "vec_unpacku_float_lo_<mode>"
[(match_operand:<sseunpackfltmode> 0 "register_operand")
- (match_operand:VI2_AVX2 1 "register_operand")]
+ (match_operand:VI2_AVX512F 1 "register_operand")]
"TARGET_SSE2"
{
rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
@@ -3248,6 +4666,31 @@
(const_int 2) (const_int 3)]))))]
"TARGET_AVX")
+(define_expand "vec_unpacks_float_hi_v16si"
+ [(set (match_dup 2)
+ (vec_select:V8SI
+ (match_operand:V16SI 1 "nonimmediate_operand")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))
+ (set (match_operand:V8DF 0 "register_operand")
+ (float:V8DF
+ (match_dup 2)))]
+ "TARGET_AVX512F"
+ "operands[2] = gen_reg_rtx (V8SImode);")
+
+(define_expand "vec_unpacks_float_lo_v16si"
+ [(set (match_operand:V8DF 0 "register_operand")
+ (float:V8DF
+ (vec_select:V8SI
+ (match_operand:V16SI 1 "nonimmediate_operand")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX512F")
+
(define_expand "vec_unpacku_float_hi_v4si"
[(set (match_dup 5)
(vec_select:V4SI
@@ -3340,6 +4783,32 @@
DONE;
})
+(define_expand "vec_unpacku_float_hi_v16si"
+ [(match_operand:V8DF 0 "register_operand")
+ (match_operand:V16SI 1 "register_operand")]
+ "TARGET_AVX512F"
+{
+ REAL_VALUE_TYPE TWO32r;
+ rtx k, x, tmp[4];
+
+ real_ldexp (&TWO32r, &dconst1, 32);
+ x = const_double_from_real_value (TWO32r, DFmode);
+
+ tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
+ tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
+ tmp[2] = gen_reg_rtx (V8DFmode);
+ tmp[3] = gen_reg_rtx (V8SImode);
+ k = gen_reg_rtx (QImode);
+
+ emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
+ emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
+ emit_insn (gen_rtx_SET (VOIDmode, k,
+ gen_rtx_LT (QImode, tmp[2], tmp[0])));
+ emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
+ emit_move_insn (operands[0], tmp[2]);
+ DONE;
+})
+
(define_expand "vec_unpacku_float_lo_v8si"
[(match_operand:V4DF 0 "register_operand")
(match_operand:V8SI 1 "nonimmediate_operand")]
@@ -3365,21 +4834,45 @@
DONE;
})
-(define_expand "vec_pack_trunc_v4df"
+(define_expand "vec_unpacku_float_lo_v16si"
+ [(match_operand:V8DF 0 "register_operand")
+ (match_operand:V16SI 1 "nonimmediate_operand")]
+ "TARGET_AVX512F"
+{
+ REAL_VALUE_TYPE TWO32r;
+ rtx k, x, tmp[3];
+
+ real_ldexp (&TWO32r, &dconst1, 32);
+ x = const_double_from_real_value (TWO32r, DFmode);
+
+ tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
+ tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
+ tmp[2] = gen_reg_rtx (V8DFmode);
+ k = gen_reg_rtx (QImode);
+
+ emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
+ emit_insn (gen_rtx_SET (VOIDmode, k,
+ gen_rtx_LT (QImode, tmp[2], tmp[0])));
+ emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
+ emit_move_insn (operands[0], tmp[2]);
+ DONE;
+})
+
+(define_expand "vec_pack_trunc_<mode>"
[(set (match_dup 3)
- (float_truncate:V4SF
- (match_operand:V4DF 1 "nonimmediate_operand")))
+ (float_truncate:<sf2dfmode>
+ (match_operand:VF2_512_256 1 "nonimmediate_operand")))
(set (match_dup 4)
- (float_truncate:V4SF
- (match_operand:V4DF 2 "nonimmediate_operand")))
- (set (match_operand:V8SF 0 "register_operand")
- (vec_concat:V8SF
+ (float_truncate:<sf2dfmode>
+ (match_operand:VF2_512_256 2 "nonimmediate_operand")))
+ (set (match_operand:<ssePSmode> 0 "register_operand")
+ (vec_concat:<ssePSmode>
(match_dup 3)
(match_dup 4)))]
"TARGET_AVX"
{
- operands[3] = gen_reg_rtx (V4SFmode);
- operands[4] = gen_reg_rtx (V4SFmode);
+ operands[3] = gen_reg_rtx (<sf2dfmode>mode);
+ operands[4] = gen_reg_rtx (<sf2dfmode>mode);
})
(define_expand "vec_pack_trunc_v2df"
@@ -3410,6 +4903,23 @@
DONE;
})
+(define_expand "vec_pack_sfix_trunc_v8df"
+ [(match_operand:V16SI 0 "register_operand")
+ (match_operand:V8DF 1 "nonimmediate_operand")
+ (match_operand:V8DF 2 "nonimmediate_operand")]
+ "TARGET_AVX512F"
+{
+ rtx r1, r2;
+
+ r1 = gen_reg_rtx (V8SImode);
+ r2 = gen_reg_rtx (V8SImode);
+
+ emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
+ emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
+ emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
+ DONE;
+})
+
(define_expand "vec_pack_sfix_trunc_v4df"
[(match_operand:V8SI 0 "register_operand")
(match_operand:V4DF 1 "nonimmediate_operand")
@@ -3459,12 +4969,12 @@
})
(define_mode_attr ssepackfltmode
- [(V4DF "V8SI") (V2DF "V4SI")])
+ [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
(define_expand "vec_pack_ufix_trunc_<mode>"
[(match_operand:<ssepackfltmode> 0 "register_operand")
- (match_operand:VF2 1 "register_operand")
- (match_operand:VF2 2 "register_operand")]
+ (match_operand:VF2_AVX512F 1 "register_operand")
+ (match_operand:VF2_AVX512F 2 "register_operand")]
"TARGET_SSE2"
{
rtx tmp[7];
@@ -3635,6 +5145,26 @@
(set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
+(define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
+ [(set (match_operand:V16SF 0 "register_operand" "=v")
+ (vec_select:V16SF
+ (vec_concat:V32SF
+ (match_operand:V16SF 1 "register_operand" "v")
+ (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 2) (const_int 18)
+ (const_int 3) (const_int 19)
+ (const_int 6) (const_int 22)
+ (const_int 7) (const_int 23)
+ (const_int 10) (const_int 26)
+ (const_int 11) (const_int 27)
+ (const_int 14) (const_int 30)
+ (const_int 15) (const_int 31)])))]
+ "TARGET_AVX512F"
+ "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V16SF")])
+
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
(define_insn "avx_unpckhps256"
[(set (match_operand:V8SF 0 "register_operand" "=x")
@@ -3703,6 +5233,26 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V4SF")])
+(define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
+ [(set (match_operand:V16SF 0 "register_operand" "=v")
+ (vec_select:V16SF
+ (vec_concat:V32SF
+ (match_operand:V16SF 1 "register_operand" "v")
+ (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 0) (const_int 16)
+ (const_int 1) (const_int 17)
+ (const_int 4) (const_int 20)
+ (const_int 5) (const_int 21)
+ (const_int 8) (const_int 24)
+ (const_int 9) (const_int 25)
+ (const_int 12) (const_int 28)
+ (const_int 13) (const_int 29)])))]
+ "TARGET_AVX512F"
+ "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V16SF")])
+
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
(define_insn "avx_unpcklps256"
[(set (match_operand:V8SF 0 "register_operand" "=x")
@@ -3806,6 +5356,26 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
+(define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
+ [(set (match_operand:V16SF 0 "register_operand" "=v")
+ (vec_select:V16SF
+ (vec_concat:V32SF
+ (match_operand:V16SF 1 "nonimmediate_operand" "vm")
+ (match_dup 1))
+ (parallel [(const_int 1) (const_int 1)
+ (const_int 3) (const_int 3)
+ (const_int 5) (const_int 5)
+ (const_int 7) (const_int 7)
+ (const_int 9) (const_int 9)
+ (const_int 11) (const_int 11)
+ (const_int 13) (const_int 13)
+ (const_int 15) (const_int 15)])))]
+ "TARGET_AVX512F"
+ "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V16SF")])
+
(define_insn "avx_movsldup256"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_select:V8SF
@@ -3839,6 +5409,26 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
+(define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
+ [(set (match_operand:V16SF 0 "register_operand" "=v")
+ (vec_select:V16SF
+ (vec_concat:V32SF
+ (match_operand:V16SF 1 "nonimmediate_operand" "vm")
+ (match_dup 1))
+ (parallel [(const_int 0) (const_int 0)
+ (const_int 2) (const_int 2)
+ (const_int 4) (const_int 4)
+ (const_int 6) (const_int 6)
+ (const_int 8) (const_int 8)
+ (const_int 10) (const_int 10)
+ (const_int 12) (const_int 12)
+ (const_int 14) (const_int 14)])))]
+ "TARGET_AVX512F"
+ "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V16SF")])
+
(define_expand "avx_shufps256"
[(match_operand:V8SF 0 "register_operand")
(match_operand:V8SF 1 "register_operand")
@@ -4368,6 +5958,218 @@
operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
})
+(define_expand "avx512f_vextract<shuffletype>32x4_mask"
+ [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
+ (match_operand:V16FI 1 "register_operand")
+ (match_operand:SI 2 "const_0_to_3_operand")
+ (match_operand:<ssequartermode> 3 "nonimmediate_operand")
+ (match_operand:QI 4 "register_operand")]
+ "TARGET_AVX512F"
+{
+ if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
+ operands[0] = force_reg (<ssequartermode>mode, operands[0]);
+ switch (INTVAL (operands[2]))
+ {
+ case 0:
+ emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+ operands[1], GEN_INT (0), GEN_INT (1), GEN_INT (2),
+ GEN_INT (3), operands[3], operands[4]));
+ break;
+ case 1:
+ emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+ operands[1], GEN_INT (4), GEN_INT (5), GEN_INT (6),
+ GEN_INT (7), operands[3], operands[4]));
+ break;
+ case 2:
+ emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+ operands[1], GEN_INT (8), GEN_INT (9), GEN_INT (10),
+ GEN_INT (11), operands[3], operands[4]));
+ break;
+ case 3:
+ emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+ operands[1], GEN_INT (12), GEN_INT (13), GEN_INT (14),
+ GEN_INT (15), operands[3], operands[4]));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ DONE;
+})
+
+(define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
+ [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
+ (vec_merge:<ssequartermode>
+ (vec_select:<ssequartermode>
+ (match_operand:V16FI 1 "register_operand" "v")
+ (parallel [(match_operand 2 "const_0_to_15_operand")
+ (match_operand 3 "const_0_to_15_operand")
+ (match_operand 4 "const_0_to_15_operand")
+ (match_operand 5 "const_0_to_15_operand")]))
+ (match_operand:<ssequartermode> 6 "memory_operand" "0")
+ (match_operand:QI 7 "register_operand" "k")))]
+ "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
+ && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
+ && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
+{
+ operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
+ return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
+ [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
+ (vec_select:<ssequartermode>
+ (match_operand:V16FI 1 "register_operand" "v")
+ (parallel [(match_operand 2 "const_0_to_15_operand")
+ (match_operand 3 "const_0_to_15_operand")
+ (match_operand 4 "const_0_to_15_operand")
+ (match_operand 5 "const_0_to_15_operand")])))]
+ "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
+ && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
+ && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
+{
+ operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
+ return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set (attr "memory")
+ (if_then_else (match_test "MEM_P (operands[0])")
+ (const_string "store")
+ (const_string "none")))
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_vextract<shuffletype>64x4_mask"
+ [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
+ (match_operand:V8FI 1 "register_operand")
+ (match_operand:SI 2 "const_0_to_1_operand")
+ (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
+ (match_operand:QI 4 "register_operand")]
+ "TARGET_AVX512F"
+{
+ rtx (*insn)(rtx, rtx, rtx, rtx);
+
+ if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
+ operands[0] = force_reg (<ssequartermode>mode, operands[0]);
+
+ switch (INTVAL (operands[2]))
+ {
+ case 0:
+ insn = gen_vec_extract_lo_<mode>_mask;
+ break;
+ case 1:
+ insn = gen_vec_extract_hi_<mode>_mask;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
+ DONE;
+})
+
+(define_split
+ [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V8FI 1 "nonimmediate_operand")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))]
+ "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
+ else
+ op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+(define_insn "vec_extract_lo_<mode>_maskm"
+ [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+ (vec_merge:<ssehalfvecmode>
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V8FI 1 "register_operand" "v")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
+ (match_operand:QI 3 "register_operand" "k")))]
+ "TARGET_AVX512F"
+"vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_lo_<mode><mask_name>"
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V8FI 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))]
+ "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ if (<mask_applied>)
+ return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
+ else
+ return "#";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set (attr "memory")
+ (if_then_else (match_test "MEM_P (operands[0])")
+ (const_string "store")
+ (const_string "none")))
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_hi_<mode>_maskm"
+ [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+ (vec_merge:<ssehalfvecmode>
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V8FI 1 "register_operand" "v")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
+ (match_operand:QI 3 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_hi_<mode><mask_name>"
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V8FI 1 "register_operand" "v")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "TARGET_AVX512F"
+ "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set (attr "memory")
+ (if_then_else (match_test "MEM_P (operands[0])")
+ (const_string "store")
+ (const_string "none")))
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_expand "avx_vextractf128<mode>"
[(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
(match_operand:V_256 1 "register_operand")
@@ -4393,6 +6195,45 @@
})
(define_insn_and_split "vec_extract_lo_<mode>"
+ [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
+ else
+ op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+(define_insn "vec_extract_hi_<mode>"
+ [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V16FI 1 "nonimmediate_operand" "v,v")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))]
+ "TARGET_AVX512F"
+ "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn_and_split "vec_extract_lo_<mode>"
[(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
(vec_select:<ssehalfvecmode>
(match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
@@ -4454,6 +6295,53 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "vec_extract_lo_v32hi"
+ [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
+ (vec_select:V16HI
+ (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))]
+ "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (V16HImode, REGNO (op1));
+ else
+ op1 = gen_lowpart (V16HImode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+(define_insn "vec_extract_hi_v32hi"
+ [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
+ (vec_select:V16HI
+ (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
+ (parallel [(const_int 16) (const_int 17)
+ (const_int 18) (const_int 19)
+ (const_int 20) (const_int 21)
+ (const_int 22) (const_int 23)
+ (const_int 24) (const_int 25)
+ (const_int 26) (const_int 27)
+ (const_int 28) (const_int 29)
+ (const_int 30) (const_int 31)])))]
+ "TARGET_AVX512F"
+ "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_insn_and_split "vec_extract_lo_v16hi"
[(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
(vec_select:V8HI
@@ -4490,6 +6378,69 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
+(define_insn_and_split "vec_extract_lo_v64qi"
+ [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
+ (vec_select:V32QI
+ (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)
+ (const_int 16) (const_int 17)
+ (const_int 18) (const_int 19)
+ (const_int 20) (const_int 21)
+ (const_int 22) (const_int 23)
+ (const_int 24) (const_int 25)
+ (const_int 26) (const_int 27)
+ (const_int 28) (const_int 29)
+ (const_int 30) (const_int 31)])))]
+ "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (V32QImode, REGNO (op1));
+ else
+ op1 = gen_lowpart (V32QImode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+(define_insn "vec_extract_hi_v64qi"
+ [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
+ (vec_select:V32QI
+ (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
+ (parallel [(const_int 32) (const_int 33)
+ (const_int 34) (const_int 35)
+ (const_int 36) (const_int 37)
+ (const_int 38) (const_int 39)
+ (const_int 40) (const_int 41)
+ (const_int 42) (const_int 43)
+ (const_int 44) (const_int 45)
+ (const_int 46) (const_int 47)
+ (const_int 48) (const_int 49)
+ (const_int 50) (const_int 51)
+ (const_int 52) (const_int 53)
+ (const_int 54) (const_int 55)
+ (const_int 56) (const_int 57)
+ (const_int 58) (const_int 59)
+ (const_int 60) (const_int 61)
+ (const_int 62) (const_int 63)])))]
+ "TARGET_AVX512F"
+ "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_insn_and_split "vec_extract_lo_v32qi"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
(vec_select:V16QI
@@ -4538,10 +6489,10 @@
(define_mode_iterator VEC_EXTRACT_MODE
[(V32QI "TARGET_AVX") V16QI
(V16HI "TARGET_AVX") V8HI
- (V8SI "TARGET_AVX") V4SI
- (V4DI "TARGET_AVX") V2DI
- (V8SF "TARGET_AVX") V4SF
- (V4DF "TARGET_AVX") V2DF])
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
(define_expand "vec_extract<mode>"
[(match_operand:<ssescalarmode> 0 "register_operand")
@@ -4560,6 +6511,22 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
+ [(set (match_operand:V8DF 0 "register_operand" "=v")
+ (vec_select:V8DF
+ (vec_concat:V16DF
+ (match_operand:V8DF 1 "nonimmediate_operand" "v")
+ (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 1) (const_int 9)
+ (const_int 3) (const_int 11)
+ (const_int 5) (const_int 13)
+ (const_int 7) (const_int 15)])))]
+ "TARGET_AVX512F"
+ "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V8DF")])
+
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
(define_insn "avx_unpckhpd256"
[(set (match_operand:V4DF 0 "register_operand" "=x")
@@ -4640,6 +6607,48 @@
(set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
(set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
+(define_expand "avx512f_movddup512<mask_name>"
+ [(set (match_operand:V8DF 0 "register_operand")
+ (vec_select:V8DF
+ (vec_concat:V16DF
+ (match_operand:V8DF 1 "nonimmediate_operand")
+ (match_dup 1))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 2) (const_int 10)
+ (const_int 4) (const_int 12)
+ (const_int 6) (const_int 14)])))]
+ "TARGET_AVX512F")
+
+(define_expand "avx512f_unpcklpd512<mask_name>"
+ [(set (match_operand:V8DF 0 "register_operand")
+ (vec_select:V8DF
+ (vec_concat:V16DF
+ (match_operand:V8DF 1 "register_operand")
+ (match_operand:V8DF 2 "nonimmediate_operand"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 2) (const_int 10)
+ (const_int 4) (const_int 12)
+ (const_int 6) (const_int 14)])))]
+ "TARGET_AVX512F")
+
+(define_insn "*avx512f_unpcklpd512<mask_name>"
+ [(set (match_operand:V8DF 0 "register_operand" "=v,v")
+ (vec_select:V8DF
+ (vec_concat:V16DF
+ (match_operand:V8DF 1 "nonimmediate_operand" "v,vm")
+ (match_operand:V8DF 2 "nonimmediate_operand" "vm,1"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 2) (const_int 10)
+ (const_int 4) (const_int 12)
+ (const_int 6) (const_int 14)])))]
+ "TARGET_AVX512F"
+ "@
+ vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
+ vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V8DF")])
+
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
(define_expand "avx_movddup256"
[(set (match_operand:V4DF 0 "register_operand")
@@ -4772,6 +6781,375 @@
operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
})
+(define_insn "<mask_scalar_codefor>avx512f_vmscalef<mode><mask_scalar_name><round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_SCALEF)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "%vscalef<ssescalarmodesuffix>\t{<round_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_mask_scalar_op3>}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx512f_scalef<mode><mask_name><round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "register_operand" "v")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_SCALEF))]
+ "TARGET_AVX512F"
+ "%vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "avx512f_vternlog<mode>_maskz"
+ [(match_operand:VI48_512 0 "register_operand")
+ (match_operand:VI48_512 1 "register_operand")
+ (match_operand:VI48_512 2 "register_operand")
+ (match_operand:VI48_512 3 "nonimmediate_operand")
+ (match_operand:SI 4 "const_0_to_255_operand")
+ (match_operand:<avx512fmaskmode> 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ emit_insn (gen_avx512f_vternlog<mode>_maskz_1 (
+ operands[0], operands[1], operands[2], operands[3],
+ operands[4], CONST0_RTX (<MODE>mode), operands[5]));
+ DONE;
+})
+
+(define_insn "avx512f_vternlog<mode><sd_maskz_name>"
+ [(set (match_operand:VI48_512 0 "register_operand" "=v")
+ (unspec:VI48_512
+ [(match_operand:VI48_512 1 "register_operand" "0")
+ (match_operand:VI48_512 2 "register_operand" "v")
+ (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
+ (match_operand:SI 4 "const_0_to_255_operand")]
+ UNSPEC_VTERNLOG))]
+ "TARGET_AVX512F"
+ "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vternlog<mode>_mask"
+ [(set (match_operand:VI48_512 0 "register_operand" "=v")
+ (vec_merge:VI48_512
+ (unspec:VI48_512
+ [(match_operand:VI48_512 1 "register_operand" "0")
+ (match_operand:VI48_512 2 "register_operand" "v")
+ (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
+ (match_operand:SI 4 "const_0_to_255_operand")]
+ UNSPEC_VTERNLOG)
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_getexp<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (unspec:VF_512 [(match_operand:VF_512 1 "nonimmediate_operand" "<round_saeonly_constraint>")]
+ UNSPEC_GETEXP))]
+ "TARGET_AVX512F"
+ "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_saeonly_constraint>")]
+ UNSPEC_GETEXP)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_saeonly_mask_scalar_op3>}";
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "<mask_codefor>avx512f_align<mode><mask_name>"
+ [(set (match_operand:VI48_512 0 "register_operand" "=v")
+ (unspec:VI48_512 [(match_operand:VI48_512 1 "register_operand" "v")
+ (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_255_operand")]
+ UNSPEC_ALIGN))]
+ "TARGET_AVX512F"
+ "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_shufps512_mask"
+ [(match_operand:V16SF 0 "register_operand")
+ (match_operand:V16SF 1 "register_operand")
+ (match_operand:V16SF 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_255_operand")
+ (match_operand:V16SF 4 "register_operand")
+ (match_operand:HI 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT (((mask >> 4) & 3) + 16),
+ GEN_INT (((mask >> 6) & 3) + 16),
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 20),
+ GEN_INT (((mask >> 6) & 3) + 20),
+ GEN_INT (((mask >> 0) & 3) + 8),
+ GEN_INT (((mask >> 2) & 3) + 8),
+ GEN_INT (((mask >> 4) & 3) + 24),
+ GEN_INT (((mask >> 6) & 3) + 24),
+ GEN_INT (((mask >> 0) & 3) + 12),
+ GEN_INT (((mask >> 2) & 3) + 12),
+ GEN_INT (((mask >> 4) & 3) + 28),
+ GEN_INT (((mask >> 6) & 3) + 28),
+ operands[4], operands[5]));
+ DONE;
+})
+
+
+(define_expand "avx512f_fixupimm<mode>_maskz<round_saeonly_expand_name5>"
+ [(match_operand:VF_512 0 "register_operand")
+ (match_operand:VF_512 1 "register_operand")
+ (match_operand:VF_512 2 "register_operand")
+ (match_operand:<ssefixupmode> 3 "<round_saeonly_expand_predicate5>")
+ (match_operand:SI 4 "const_0_to_255_operand")
+ (match_operand:<avx512fmaskmode> 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ emit_insn (gen_avx512f_fixupimm<mode>_maskz_1<round_saeonly_expand_name5> (
+ operands[0], operands[1], operands[2], operands[3],
+ operands[4], CONST0_RTX (<MODE>mode), operands[5]
+ <round_saeonly_expand_operand6>));
+ DONE;
+})
+
+(define_insn "avx512f_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "register_operand" "0")
+ (match_operand:VF_512 2 "register_operand" "v")
+ (match_operand:<ssefixupmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (match_operand:SI 4 "const_0_to_255_operand")]
+ UNSPEC_FIXUPIMM))]
+ "TARGET_AVX512F"
+ "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fixupimm<mode>_mask<round_saeonly_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (vec_merge:VF_512
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "register_operand" "0")
+ (match_operand:VF_512 2 "register_operand" "v")
+ (match_operand:<ssefixupmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (match_operand:SI 4 "const_0_to_255_operand")]
+ UNSPEC_FIXUPIMM)
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name5>"
+ [(match_operand:VF_128 0 "register_operand")
+ (match_operand:VF_128 1 "register_operand")
+ (match_operand:VF_128 2 "register_operand")
+ (match_operand:<ssefixupmode> 3 "<round_saeonly_expand_predicate5>")
+ (match_operand:SI 4 "const_0_to_255_operand")
+ (match_operand:<avx512fmaskmode> 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name5> (
+ operands[0], operands[1], operands[2], operands[3],
+ operands[4], CONST0_RTX (<MODE>mode), operands[5]
+ <round_saeonly_expand_operand6>));
+ DONE;
+})
+
+(define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "register_operand" "0")
+ (match_operand:VF_128 2 "register_operand" "v")
+ (match_operand:<ssefixupmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (match_operand:SI 4 "const_0_to_255_operand")]
+ UNSPEC_FIXUPIMM)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "register_operand" "0")
+ (match_operand:VF_128 2 "register_operand" "v")
+ (match_operand:<ssefixupmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (match_operand:SI 4 "const_0_to_255_operand")]
+ UNSPEC_FIXUPIMM)
+ (match_dup 1)
+ (const_int 1))
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx512f_rndscale<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (match_operand:SI 2 "const_0_to_255_operand")]
+ UNSPEC_ROUND))]
+ "TARGET_AVX512F"
+ "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
+ [(set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<mask_scalar_codefor>avx512f_rndscale<mode><mask_scalar_name><round_saeonly_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (match_operand:SI 3 "const_0_to_255_operand")]
+ UNSPEC_ROUND)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_mask_scalar_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2<round_saeonly_mask_scalar_op4>, %3}"
+ [(set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+;; One bit in mask selects 2 elements.
+(define_insn "avx512f_shufps512_1<mask_name>"
+ [(set (match_operand:V16SF 0 "register_operand" "=v")
+ (vec_select:V16SF
+ (vec_concat:V32SF
+ (match_operand:V16SF 1 "register_operand" "v")
+ (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
+ (parallel [(match_operand 3 "const_0_to_3_operand")
+ (match_operand 4 "const_0_to_3_operand")
+ (match_operand 5 "const_16_to_19_operand")
+ (match_operand 6 "const_16_to_19_operand")
+ (match_operand 7 "const_4_to_7_operand")
+ (match_operand 8 "const_4_to_7_operand")
+ (match_operand 9 "const_20_to_23_operand")
+ (match_operand 10 "const_20_to_23_operand")
+ (match_operand 11 "const_8_to_11_operand")
+ (match_operand 12 "const_8_to_11_operand")
+ (match_operand 13 "const_24_to_27_operand")
+ (match_operand 14 "const_24_to_27_operand")
+ (match_operand 15 "const_12_to_15_operand")
+ (match_operand 16 "const_12_to_15_operand")
+ (match_operand 17 "const_28_to_31_operand")
+ (match_operand 18 "const_28_to_31_operand")])))]
+ "TARGET_AVX512F
+ && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
+ && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
+ && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
+ && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
+ && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
+ && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
+ && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
+ && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
+ && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
+ && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
+ && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
+ && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= INTVAL (operands[4]) << 2;
+ mask |= (INTVAL (operands[5]) - 16) << 4;
+ mask |= (INTVAL (operands[6]) - 16) << 6;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V16SF")])
+
+(define_expand "avx512f_shufpd512_mask"
+ [(match_operand:V8DF 0 "register_operand")
+ (match_operand:V8DF 1 "register_operand")
+ (match_operand:V8DF 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_255_operand")
+ (match_operand:V8DF 4 "register_operand")
+ (match_operand:QI 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
+ GEN_INT (mask & 1),
+ GEN_INT (mask & 2 ? 9 : 8),
+ GEN_INT (mask & 4 ? 3 : 2),
+ GEN_INT (mask & 8 ? 11 : 10),
+ GEN_INT (mask & 16 ? 5 : 4),
+ GEN_INT (mask & 32 ? 13 : 12),
+ GEN_INT (mask & 64 ? 7 : 6),
+ GEN_INT (mask & 128 ? 15 : 14),
+ operands[4], operands[5]));
+ DONE;
+})
+
+(define_insn "avx512f_shufpd512_1<mask_name>"
+ [(set (match_operand:V8DF 0 "register_operand" "=v")
+ (vec_select:V8DF
+ (vec_concat:V16DF
+ (match_operand:V8DF 1 "register_operand" "v")
+ (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
+ (parallel [(match_operand 3 "const_0_to_1_operand")
+ (match_operand 4 "const_8_to_9_operand")
+ (match_operand 5 "const_2_to_3_operand")
+ (match_operand 6 "const_10_to_11_operand")
+ (match_operand 7 "const_4_to_5_operand")
+ (match_operand 8 "const_12_to_13_operand")
+ (match_operand 9 "const_6_to_7_operand")
+ (match_operand 10 "const_14_to_15_operand")])))]
+ "TARGET_AVX512F"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= (INTVAL (operands[4]) - 8) << 1;
+ mask |= (INTVAL (operands[5]) - 2) << 2;
+ mask |= (INTVAL (operands[6]) - 10) << 3;
+ mask |= (INTVAL (operands[7]) - 4) << 4;
+ mask |= (INTVAL (operands[8]) - 12) << 5;
+ mask |= (INTVAL (operands[9]) - 6) << 6;
+ mask |= (INTVAL (operands[10]) - 14) << 7;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V8DF")])
+
(define_expand "avx_shufpd256"
[(match_operand:V4DF 0 "register_operand")
(match_operand:V4DF 1 "register_operand")
@@ -4845,6 +7223,22 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
+(define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
+ [(set (match_operand:V8DI 0 "register_operand" "=v")
+ (vec_select:V8DI
+ (vec_concat:V16DI
+ (match_operand:V8DI 1 "register_operand" "v")
+ (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 1) (const_int 9)
+ (const_int 3) (const_int 11)
+ (const_int 5) (const_int 13)
+ (const_int 7) (const_int 15)])))]
+ "TARGET_AVX512F"
+ "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_insn "vec_interleave_highv2di"
[(set (match_operand:V2DI 0 "register_operand" "=x,x")
(vec_select:V2DI
@@ -4879,6 +7273,22 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
+(define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
+ [(set (match_operand:V8DI 0 "register_operand" "=v")
+ (vec_select:V8DI
+ (vec_concat:V16DI
+ (match_operand:V8DI 1 "register_operand" "v")
+ (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 2) (const_int 10)
+ (const_int 4) (const_int 12)
+ (const_int 6) (const_int 14)])))]
+ "TARGET_AVX512F"
+ "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_insn "vec_interleave_lowv2di"
[(set (match_operand:V2DI 0 "register_operand" "=x,x")
(vec_select:V2DI
@@ -5222,6 +7632,127 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
+;; Parallel integer down-conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_mode_iterator PMOV_DST_MODE [V16QI V16HI V8SI V8HI])
+(define_mode_attr pmov_src_mode
+ [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
+(define_mode_attr pmov_src_lower
+ [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
+(define_mode_attr pmov_suff
+ [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
+
+(define_insn "*avx512f_<code><pmov_src_lower><mode>2"
+ [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
+ (any_truncate:PMOV_DST_MODE
+ (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
+ "TARGET_AVX512F"
+ "vpmov<trunsuffix><pmov_suff>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
+ [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
+ (vec_merge:PMOV_DST_MODE
+ (any_truncate:PMOV_DST_MODE
+ (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
+ (match_operand:PMOV_DST_MODE 2 "vector_move_operand" "0C,0")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+ "vpmov<trunsuffix><pmov_suff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512f_<code>v8div16qi2"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (vec_concat:V16QI
+ (any_truncate:V8QI
+ (match_operand:V8DI 1 "register_operand" "v"))
+ (const_vector:V8QI [(const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)])))]
+ "TARGET_AVX512F"
+ "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512f_<code>v8div16qi2_store"
+ [(set (match_operand:V16QI 0 "memory_operand" "=m")
+ (vec_concat:V16QI
+ (any_truncate:V8QI
+ (match_operand:V8DI 1 "register_operand" "v"))
+ (vec_select:V8QI
+ (match_dup 0)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512F"
+ "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512f_<code>v8div16qi2_mask"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (vec_concat:V16QI
+ (vec_merge:V8QI
+ (any_truncate:V8QI
+ (match_operand:V8DI 1 "register_operand" "v"))
+ (vec_select:V8QI
+ (match_operand:V16QI 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (match_operand:QI 3 "register_operand" "k"))
+ (const_vector:V8QI [(const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)])))]
+ "TARGET_AVX512F"
+ "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512f_<code>v8div16qi2_store_mask"
+ [(set (match_operand:V16QI 0 "memory_operand" "=m")
+ (vec_concat:V16QI
+ (vec_merge:V8QI
+ (any_truncate:V8QI
+ (match_operand:V8DI 1 "register_operand" "v"))
+ (vec_select:V8QI
+ (match_dup 0)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (match_operand:QI 2 "register_operand" "k"))
+ (vec_select:V8QI
+ (match_dup 0)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512F"
+ "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
;; Parallel integral arithmetic
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -5234,27 +7765,27 @@
"TARGET_SSE2"
"operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
-(define_expand "<plusminus_insn><mode>3"
+(define_expand "<plusminus_insn><mode>3<mask_name>"
[(set (match_operand:VI_AVX2 0 "register_operand")
(plusminus:VI_AVX2
(match_operand:VI_AVX2 1 "nonimmediate_operand")
(match_operand:VI_AVX2 2 "nonimmediate_operand")))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
-(define_insn "*<plusminus_insn><mode>3"
- [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
+(define_insn "*<plusminus_insn><mode>3<mask_name>"
+ [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
(plusminus:VI_AVX2
- (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
- (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
+ (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
"@
p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
- vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<sseinsnmode>")])
(define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
@@ -5266,10 +7797,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
- [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
+ [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
(sat_plusminus:VI12_AVX2
- (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
- (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
+ (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
+ (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
"TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -5298,9 +7829,9 @@
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
(define_insn "*mul<mode>3"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
- (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
- (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
+ (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
+ (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
"TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
"@
pmullw\t{%2, %0|%0, %2}
@@ -5325,14 +7856,14 @@
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
(define_insn "*<s>mul<mode>3_highpart"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
(truncate:VI2_AVX2
(lshiftrt:<ssedoublemode>
(mult:<ssedoublemode>
(any_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
+ (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
(any_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
+ (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
(const_int 16))))]
"TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
"@
@@ -5344,6 +7875,51 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
+(define_expand "vec_widen_umult_even_v16si<mask_name>"
+ [(set (match_operand:V8DI 0 "register_operand")
+ (mult:V8DI
+ (zero_extend:V8DI
+ (vec_select:V8SI
+ (match_operand:V16SI 1 "nonimmediate_operand")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)])))
+ (zero_extend:V8DI
+ (vec_select:V8SI
+ (match_operand:V16SI 2 "nonimmediate_operand")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)])))))]
+ "TARGET_AVX512F"
+ "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
+
+(define_insn "*vec_widen_umult_even_v16si<mask_name>"
+ [(set (match_operand:V8DI 0 "register_operand" "=v")
+ (mult:V8DI
+ (zero_extend:V8DI
+ (vec_select:V8SI
+ (match_operand:V16SI 1 "nonimmediate_operand" "v")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)])))
+ (zero_extend:V8DI
+ (vec_select:V8SI
+ (match_operand:V16SI 2 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)])))))]
+ "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
+ "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "isa" "avx512f")
+ (set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_expand "vec_widen_umult_even_v8si"
[(set (match_operand:V4DI 0 "register_operand")
(mult:V4DI
@@ -5414,6 +7990,51 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_expand "vec_widen_smult_even_v16si<mask_name>"
+ [(set (match_operand:V8DI 0 "register_operand")
+ (mult:V8DI
+ (sign_extend:V8DI
+ (vec_select:V8SI
+ (match_operand:V16SI 1 "nonimmediate_operand")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)])))
+ (sign_extend:V8DI
+ (vec_select:V8SI
+ (match_operand:V16SI 2 "nonimmediate_operand")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)])))))]
+ "TARGET_AVX512F"
+ "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
+
+(define_insn "*vec_widen_smult_even_v16si<mask_name>"
+ [(set (match_operand:V8DI 0 "register_operand" "=x")
+ (mult:V8DI
+ (sign_extend:V8DI
+ (vec_select:V8SI
+ (match_operand:V16SI 1 "nonimmediate_operand" "v")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)])))
+ (sign_extend:V8DI
+ (vec_select:V8SI
+ (match_operand:V16SI 2 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 8) (const_int 10)
+ (const_int 12) (const_int 14)])))))]
+ "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
+ "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "isa" "avx512f")
+ (set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_expand "vec_widen_smult_even_v8si"
[(set (match_operand:V4DI 0 "register_operand")
(mult:V4DI
@@ -5618,12 +8239,12 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_expand "mul<mode>3"
- [(set (match_operand:VI4_AVX2 0 "register_operand")
- (mult:VI4_AVX2
- (match_operand:VI4_AVX2 1 "general_vector_operand")
- (match_operand:VI4_AVX2 2 "general_vector_operand")))]
- "TARGET_SSE2"
+(define_expand "mul<mode>3<mask_name>"
+ [(set (match_operand:VI4_AVX512F 0 "register_operand")
+ (mult:VI4_AVX512F
+ (match_operand:VI4_AVX512F 1 "general_vector_operand")
+ (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
if (TARGET_SSE4_1)
{
@@ -5640,26 +8261,27 @@
}
})
-(define_insn "*<sse4_1_avx2>_mul<mode>3"
- [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
- (mult:VI4_AVX2
- (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
- (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+(define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
+ [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
+ (mult:VI4_AVX512F
+ (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
+ (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
"@
pmulld\t{%2, %0|%0, %2}
- vpmulld\t{%2, %1, %0|%0, %1, %2}"
+ vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set_attr "btver2_decode" "vector,vector")
(set_attr "mode" "<sseinsnmode>")])
(define_expand "mul<mode>3"
- [(set (match_operand:VI8_AVX2 0 "register_operand")
- (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand")
- (match_operand:VI8_AVX2 2 "register_operand")))]
+ [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
+ (mult:VI8_AVX2_AVX512F
+ (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
+ (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
"TARGET_SSE2"
{
ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
@@ -5706,8 +8328,8 @@
(define_expand "vec_widen_<s>mult_odd_<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
(any_extend:<sseunpackmode>
- (match_operand:VI4_AVX2 1 "general_vector_operand"))
- (match_operand:VI4_AVX2 2 "general_vector_operand")]
+ (match_operand:VI4_AVX512F 1 "general_vector_operand"))
+ (match_operand:VI4_AVX512F 2 "general_vector_operand")]
"TARGET_SSE2"
{
ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
@@ -5764,23 +8386,38 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<shift_insn><mode>3"
- [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
- (any_lshift:VI248_AVX2
- (match_operand:VI248_AVX2 1 "register_operand" "0,x")
- (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
- "TARGET_SSE2"
+(define_insn "ashr<mode>3<mask_name>"
+ [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
+ (ashiftrt:VI48_512
+ (match_operand:VI48_512 1 "nonimmediate_operand" "v,vm")
+ (match_operand:SI 2 "nonmemory_operand" "v,N")))]
+ "TARGET_AVX512F && <mask_mode512bit_condition>"
+ "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set (attr "length_immediate")
+ (if_then_else (match_operand 2 "const_int_operand")
+ (const_string "1")
+ (const_string "0")))
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<shift_insn><mode>3<mask_name>"
+ [(set (match_operand:VI248_AVX512F 0 "register_operand" "=x,v,v")
+ (any_lshift:VI248_AVX512F
+ (match_operand:VI248_AVX512F 1 "register_operand" "0,v,m")
+ (match_operand:SI 2 "nonmemory_operand" "xN,xN,N")))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
- vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
+ vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "isa" "noavx,avx,avx512f")
(set_attr "type" "sseishft")
(set (attr "length_immediate")
(if_then_else (match_operand 2 "const_int_operand")
(const_string "1")
(const_string "0")))
- (set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix" "orig,vex,evex")
(set_attr "mode" "<sseinsnmode>")])
(define_expand "vec_shl_<mode>"
@@ -5858,25 +8495,45 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "avx512f_<rotate>v<mode><mask_name>"
+ [(set (match_operand:VI48_512 0 "register_operand" "=v")
+ (any_rotate:VI48_512
+ (match_operand:VI48_512 1 "register_operand" "v")
+ (match_operand:VI48_512 2 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512F"
+ "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
-(define_expand "<code><mode>3"
- [(set (match_operand:VI124_256 0 "register_operand")
- (maxmin:VI124_256
- (match_operand:VI124_256 1 "nonimmediate_operand")
- (match_operand:VI124_256 2 "nonimmediate_operand")))]
- "TARGET_AVX2"
+(define_insn "avx512f_<rotate><mode><mask_name>"
+ [(set (match_operand:VI48_512 0 "register_operand" "=v")
+ (any_rotate:VI48_512
+ (match_operand:VI48_512 1 "nonimmediate_operand" "vm")
+ (match_operand:SI 2 "const_0_to_255_operand")))]
+ "TARGET_AVX512F"
+ "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "<code><mode>3<mask_name><round_name>"
+ [(set (match_operand:VI124_256_48_512 0 "register_operand")
+ (maxmin:VI124_256_48_512
+ (match_operand:VI124_256_48_512 1 "nonimmediate_operand")
+ (match_operand:VI124_256_48_512 2 "nonimmediate_operand")))]
+ "TARGET_AVX2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
-(define_insn "*avx2_<code><mode>3"
- [(set (match_operand:VI124_256 0 "register_operand" "=x")
- (maxmin:VI124_256
- (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
- (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
- "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
- "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+(define_insn "*avx2_<code><mode>3<mask_name><round_name>"
+ [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
+ (maxmin:VI124_256_48_512
+ (match_operand:VI124_256_48_512 1 "nonimmediate_operand" "%v")
+ (match_operand:VI124_256_48_512 2 "nonimmediate_operand" "<round_constraint>")))]
+ "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "vp<maxmin_int><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
[(set_attr "type" "sseiadd")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
(define_expand "<code><mode>3"
@@ -6091,6 +8748,28 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
+(define_expand "avx512f_eq<mode>3<mask_scalar_merge_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI48_512 1 "register_operand")
+ (match_operand:VI48_512 2 "nonimmediate_operand")]
+ UNSPEC_MASKED_EQ))]
+ "TARGET_AVX512F"
+ "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
+
+(define_insn "avx512f_eq<mode>3<mask_scalar_merge_name>_1"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI48_512 1 "register_operand" "%v")
+ (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
+ UNSPEC_MASKED_EQ))]
+ "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+ "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "*sse4_1_eqv2di3"
[(set (match_operand:V2DI 0 "register_operand" "=x,x")
(eq:V2DI
@@ -6165,6 +8844,18 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
+(define_insn "avx512f_gt<mode>3<mask_scalar_merge_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI48_512 1 "register_operand" "v")
+ (match_operand:VI48_512 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
+ "TARGET_AVX512F"
+ "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "sse2_gt<mode>3"
[(set (match_operand:VI124_128 0 "register_operand" "=x,x")
(gt:VI124_128
@@ -6180,6 +8871,23 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_expand "vcond<V_512:mode><VI_512:mode>"
+ [(set (match_operand:V_512 0 "register_operand")
+ (if_then_else:V_512
+ (match_operator 3 ""
+ [(match_operand:VI_512 4 "nonimmediate_operand")
+ (match_operand:VI_512 5 "general_operand")])
+ (match_operand:V_512 1)
+ (match_operand:V_512 2)))]
+ "TARGET_AVX512F
+ && (GET_MODE_NUNITS (<V_512:MODE>mode)
+ == GET_MODE_NUNITS (<VI_512:MODE>mode))"
+{
+ bool ok = ix86_expand_int_vcond (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
(define_expand "vcond<V_256:mode><VI_256:mode>"
[(set (match_operand:V_256 0 "register_operand")
(if_then_else:V_256
@@ -6229,6 +8937,23 @@
DONE;
})
+(define_expand "vcondu<V_512:mode><VI_512:mode>"
+ [(set (match_operand:V_512 0 "register_operand")
+ (if_then_else:V_512
+ (match_operator 3 ""
+ [(match_operand:VI_512 4 "nonimmediate_operand")
+ (match_operand:VI_512 5 "nonimmediate_operand")])
+ (match_operand:V_512 1 "general_operand")
+ (match_operand:V_512 2 "general_operand")))]
+ "TARGET_AVX512F
+ && (GET_MODE_NUNITS (<V_512:MODE>mode)
+ == GET_MODE_NUNITS (<VI_512:MODE>mode))"
+{
+ bool ok = ix86_expand_int_vcond (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
(define_expand "vcondu<V_256:mode><VI_256:mode>"
[(set (match_operand:V_256 0 "register_operand")
(if_then_else:V_256
@@ -6282,7 +9007,9 @@
[V16QI V8HI V4SI V2DI V4SF V2DF
(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
- (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
+ (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
+ (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+ (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
(define_expand "vec_perm<mode>"
[(match_operand:VEC_PERM_AVX2 0 "register_operand")
@@ -6301,7 +9028,9 @@
(V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
(V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
(V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
- (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
+ (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
+ (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+ (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
(define_expand "vec_perm_const<mode>"
[(match_operand:VEC_PERM_CONST 0 "register_operand")
@@ -6337,26 +9066,32 @@
operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
})
-(define_expand "<sse2_avx2>_andnot<mode>3"
+(define_expand "<sse2_avx2>_andnot<mode>3<mask_name>"
[(set (match_operand:VI_AVX2 0 "register_operand")
(and:VI_AVX2
(not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
(match_operand:VI_AVX2 2 "nonimmediate_operand")))]
- "TARGET_SSE2")
+ "TARGET_SSE2 && <mask_mode512bit_condition>")
-(define_insn "*andnot<mode>3"
- [(set (match_operand:VI 0 "register_operand" "=x,x")
+(define_insn "*andnot<mode>3<mask_name>"
+ [(set (match_operand:VI 0 "register_operand" "=x,v")
(and:VI
- (not:VI (match_operand:VI 1 "register_operand" "0,x"))
- (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE"
+ (not:VI (match_operand:VI 1 "register_operand" "0,v"))
+ (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
+ "TARGET_SSE && <mask_mode512bit_condition>"
{
- static char buf[32];
+ static char buf[64];
const char *ops;
const char *tmp;
switch (get_attr_mode (insn))
{
+ case MODE_XI:
+ gcc_assert (TARGET_AVX512F);
+
+ tmp = "pandn<ssemodesuffix>";
+ break;
+
case MODE_OI:
gcc_assert (TARGET_AVX2);
case MODE_TI:
@@ -6383,7 +9118,7 @@
ops = "%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
- ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+ ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
break;
default:
gcc_unreachable ();
@@ -6400,7 +9135,7 @@
(eq_attr "mode" "TI"))
(const_string "1")
(const_string "*")))
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set (attr "mode")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "<ssePSmode>")
@@ -6428,20 +9163,26 @@
DONE;
})
-(define_insn "*<code><mode>3"
- [(set (match_operand:VI 0 "register_operand" "=x,x")
+(define_insn "<mask_codefor><code><mode>3<mask_name>"
+ [(set (match_operand:VI 0 "register_operand" "=x,v")
(any_logic:VI
- (match_operand:VI 1 "nonimmediate_operand" "%0,x")
- (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE
+ (match_operand:VI 1 "nonimmediate_operand" "%0,v")
+ (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
+ "TARGET_SSE && <mask_mode512bit_condition>
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
{
- static char buf[32];
+ static char buf[64];
const char *ops;
const char *tmp;
switch (get_attr_mode (insn))
{
+
+ case MODE_XI:
+ gcc_assert (TARGET_AVX512F);
+ tmp = "p<logic><ssemodesuffix>";
+ break;
+
case MODE_OI:
gcc_assert (TARGET_AVX2);
case MODE_TI:
@@ -6450,6 +9191,8 @@
tmp = "p<logic>";
break;
+ case MODE_V16SF:
+ gcc_assert (TARGET_AVX512F);
case MODE_V8SF:
gcc_assert (TARGET_AVX);
case MODE_V4SF:
@@ -6468,7 +9211,7 @@
ops = "%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
- ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+ ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
break;
default:
gcc_unreachable ();
@@ -6485,7 +9228,7 @@
(eq_attr "mode" "TI"))
(const_string "1")
(const_string "*")))
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set (attr "mode")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "<ssePSmode>")
@@ -6502,6 +9245,28 @@
]
(const_string "<sseinsnmode>")))])
+(define_insn "avx512f_testm<mode>3<mask_scalar_merge_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI48_512 1 "register_operand" "v")
+ (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
+ UNSPEC_TESTM))]
+ "TARGET_AVX512F"
+ "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_testnm<mode>3<mask_scalar_merge_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI48_512 1 "register_operand" "v")
+ (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
+ UNSPEC_TESTNM))]
+ "TARGET_AVX512F"
+ "%vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel integral element swizzling
@@ -6510,8 +9275,8 @@
(define_expand "vec_pack_trunc_<mode>"
[(match_operand:<ssepackmode> 0 "register_operand")
- (match_operand:VI248_AVX2 1 "register_operand")
- (match_operand:VI248_AVX2 2 "register_operand")]
+ (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
+ (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
"TARGET_SSE2"
{
rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
@@ -6771,6 +9536,27 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
+(define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
+ [(set (match_operand:V16SI 0 "register_operand" "=v")
+ (vec_select:V16SI
+ (vec_concat:V32SI
+ (match_operand:V16SI 1 "register_operand" "v")
+ (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 2) (const_int 18)
+ (const_int 3) (const_int 19)
+ (const_int 6) (const_int 22)
+ (const_int 7) (const_int 23)
+ (const_int 10) (const_int 26)
+ (const_int 11) (const_int 27)
+ (const_int 14) (const_int 30)
+ (const_int 15) (const_int 31)])))]
+ "TARGET_AVX512F"
+ "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+
(define_insn "vec_interleave_highv4si"
[(set (match_operand:V4SI 0 "register_operand" "=x,x")
(vec_select:V4SI
@@ -6805,6 +9591,26 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
+(define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
+ [(set (match_operand:V16SI 0 "register_operand" "=v")
+ (vec_select:V16SI
+ (vec_concat:V32SI
+ (match_operand:V16SI 1 "register_operand" "v")
+ (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 0) (const_int 16)
+ (const_int 1) (const_int 17)
+ (const_int 4) (const_int 20)
+ (const_int 5) (const_int 21)
+ (const_int 8) (const_int 24)
+ (const_int 9) (const_int 25)
+ (const_int 12) (const_int 28)
+ (const_int 13) (const_int 29)])))]
+ "TARGET_AVX512F"
+ "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_insn "vec_interleave_lowv4si"
[(set (match_operand:V4SI 0 "register_operand" "=x,x")
(vec_select:V4SI
@@ -6923,6 +9729,343 @@
(set_attr "prefix" "orig,orig,vex,vex")
(set_attr "mode" "TI")])
+(define_expand "avx512f_vinsert<shuffletype>32x4_mask"
+ [(match_operand:V16FI 0 "register_operand")
+ (match_operand:V16FI 1 "register_operand")
+ (match_operand:<ssequartermode> 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_3_operand")
+ (match_operand:V16FI 4 "register_operand")
+ (match_operand:<avx512fmaskmode> 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ switch (INTVAL (operands[3]))
+ {
+ case 0:
+ emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+ operands[1], operands[2], GEN_INT (0xFFF), operands[4],
+ operands[5]));
+ break;
+ case 1:
+ emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+ operands[1], operands[2], GEN_INT (0xF0FF), operands[4],
+ operands[5]));
+ break;
+ case 2:
+ emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+ operands[1], operands[2], GEN_INT (0xFF0F), operands[4],
+ operands[5]));
+ break;
+ case 3:
+ emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+ operands[1], operands[2], GEN_INT (0xFFF0), operands[4],
+ operands[5]));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ DONE;
+
+})
+
+(define_insn "<mask_codefor>avx512f_vinsert<shuffletype>32x4_1<mask_name>"
+ [(set (match_operand:V16FI 0 "register_operand" "=v")
+ (vec_merge:V16FI
+ (match_operand:V16FI 1 "register_operand" "v")
+ (vec_duplicate:V16FI
+ (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
+ (match_operand:SI 3 "const_int_operand" "n")))]
+ "TARGET_AVX512F"
+{
+ int mask;
+ if (INTVAL (operands[3]) == 0xFFF)
+ mask = 0;
+ else if ( INTVAL (operands[3]) == 0xF0FF)
+ mask = 1;
+ else if ( INTVAL (operands[3]) == 0xFF0F)
+ mask = 2;
+ else if ( INTVAL (operands[3]) == 0xFFF0)
+ mask = 3;
+ else
+ gcc_unreachable ();
+
+ operands[3] = GEN_INT (mask);
+
+ return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_vinsert<shuffletype>64x4_mask"
+ [(match_operand:V8FI 0 "register_operand")
+ (match_operand:V8FI 1 "register_operand")
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_1_operand")
+ (match_operand:V8FI 4 "register_operand")
+ (match_operand:<avx512fmaskmode> 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[3]);
+ if (mask == 0)
+ emit_insn (gen_vec_set_lo_<mode>_mask
+ (operands[0], operands[1], operands[2],
+ operands[4], operands[5]));
+ else
+ emit_insn (gen_vec_set_hi_<mode>_mask
+ (operands[0], operands[1], operands[2],
+ operands[4], operands[5]));
+ DONE;
+})
+
+(define_insn "vec_set_lo_<mode><mask_name>"
+ [(set (match_operand:V8FI 0 "register_operand" "=v")
+ (vec_concat:V8FI
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V8FI 1 "register_operand" "v")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX512F"
+ "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "vec_set_hi_<mode><mask_name>"
+ [(set (match_operand:V8FI 0 "register_operand" "=v")
+ (vec_concat:V8FI
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V8FI 1 "register_operand" "v")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))))]
+ "TARGET_AVX512F"
+ "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_expand "avx512f_shuf_<shuffletype>64x2_mask"
+ [(match_operand:V8FI 0 "register_operand")
+ (match_operand:V8FI 1 "register_operand")
+ (match_operand:V8FI 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_255_operand")
+ (match_operand:V8FI 4 "register_operand")
+ (match_operand:QI 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
+ (operands[0], operands[1], operands[2],
+ GEN_INT (((mask >> 0) & 3) * 2),
+ GEN_INT (((mask >> 0) & 3) * 2 + 1),
+ GEN_INT (((mask >> 2) & 3) * 2),
+ GEN_INT (((mask >> 2) & 3) * 2 + 1),
+ GEN_INT (((mask >> 4) & 3) * 2 + 8),
+ GEN_INT (((mask >> 4) & 3) * 2 + 9),
+ GEN_INT (((mask >> 6) & 3) * 2 + 8),
+ GEN_INT (((mask >> 6) & 3) * 2 + 9),
+ operands[4], operands[5]));
+ DONE;
+})
+
+(define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
+ [(set (match_operand:V8FI 0 "register_operand" "=v")
+ (vec_select:V8FI
+ (vec_concat:<ssedoublemode>
+ (match_operand:V8FI 1 "register_operand" "v")
+ (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
+ (parallel [(match_operand 3 "const_0_to_7_operand")
+ (match_operand 4 "const_0_to_7_operand")
+ (match_operand 5 "const_0_to_7_operand")
+ (match_operand 6 "const_0_to_7_operand")
+ (match_operand 7 "const_8_to_15_operand")
+ (match_operand 8 "const_8_to_15_operand")
+ (match_operand 9 "const_8_to_15_operand")
+ (match_operand 10 "const_8_to_15_operand")])))]
+ "TARGET_AVX512F
+ && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
+ && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
+ && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
+ && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
+{
+ int mask;
+ mask = INTVAL (operands[3]) / 2;
+ mask |= INTVAL (operands[5]) / 2 << 2;
+ mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
+ mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
+ operands[3] = GEN_INT (mask);
+
+ return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_shuf_<shuffletype>32x4_mask"
+ [(match_operand:V16FI 0 "register_operand")
+ (match_operand:V16FI 1 "register_operand")
+ (match_operand:V16FI 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_255_operand")
+ (match_operand:V16FI 4 "register_operand")
+ (match_operand:HI 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
+ (operands[0], operands[1], operands[2],
+ GEN_INT (((mask >> 0) & 3) * 4),
+ GEN_INT (((mask >> 0) & 3) * 4 + 1),
+ GEN_INT (((mask >> 0) & 3) * 4 + 2),
+ GEN_INT (((mask >> 0) & 3) * 4 + 3),
+ GEN_INT (((mask >> 2) & 3) * 4),
+ GEN_INT (((mask >> 2) & 3) * 4 + 1),
+ GEN_INT (((mask >> 2) & 3) * 4 + 2),
+ GEN_INT (((mask >> 2) & 3) * 4 + 3),
+ GEN_INT (((mask >> 4) & 3) * 4 + 16),
+ GEN_INT (((mask >> 4) & 3) * 4 + 17),
+ GEN_INT (((mask >> 4) & 3) * 4 + 18),
+ GEN_INT (((mask >> 4) & 3) * 4 + 19),
+ GEN_INT (((mask >> 6) & 3) * 4 + 16),
+ GEN_INT (((mask >> 6) & 3) * 4 + 17),
+ GEN_INT (((mask >> 6) & 3) * 4 + 18),
+ GEN_INT (((mask >> 6) & 3) * 4 + 19),
+ operands[4], operands[5]));
+ DONE;
+})
+
+(define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
+ [(set (match_operand:V16FI 0 "register_operand" "=v")
+ (vec_select:V16FI
+ (vec_concat:<ssedoublemode>
+ (match_operand:V16FI 1 "register_operand" "v")
+ (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
+ (parallel [(match_operand 3 "const_0_to_15_operand")
+ (match_operand 4 "const_0_to_15_operand")
+ (match_operand 5 "const_0_to_15_operand")
+ (match_operand 6 "const_0_to_15_operand")
+ (match_operand 7 "const_0_to_15_operand")
+ (match_operand 8 "const_0_to_15_operand")
+ (match_operand 9 "const_0_to_15_operand")
+ (match_operand 10 "const_0_to_15_operand")
+ (match_operand 11 "const_16_to_31_operand")
+ (match_operand 12 "const_16_to_31_operand")
+ (match_operand 13 "const_16_to_31_operand")
+ (match_operand 14 "const_16_to_31_operand")
+ (match_operand 15 "const_16_to_31_operand")
+ (match_operand 16 "const_16_to_31_operand")
+ (match_operand 17 "const_16_to_31_operand")
+ (match_operand 18 "const_16_to_31_operand")])))]
+ "TARGET_AVX512F
+ && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
+ && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
+ && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
+ && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
+ && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
+ && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
+ && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
+ && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
+ && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
+ && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
+ && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
+ && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
+{
+ int mask;
+ mask = INTVAL (operands[3]) / 4;
+ mask |= INTVAL (operands[7]) / 4 << 2;
+ mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
+ mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
+ operands[3] = GEN_INT (mask);
+
+ return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_pshufdv3_mask"
+ [(match_operand:V16SI 0 "register_operand")
+ (match_operand:V16SI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V16SI 3 "register_operand")
+ (match_operand:HI 4 "register_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3),
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4),
+ GEN_INT (((mask >> 0) & 3) + 8),
+ GEN_INT (((mask >> 2) & 3) + 8),
+ GEN_INT (((mask >> 4) & 3) + 8),
+ GEN_INT (((mask >> 6) & 3) + 8),
+ GEN_INT (((mask >> 0) & 3) + 12),
+ GEN_INT (((mask >> 2) & 3) + 12),
+ GEN_INT (((mask >> 4) & 3) + 12),
+ GEN_INT (((mask >> 6) & 3) + 12),
+ operands[3], operands[4]));
+ DONE;
+})
+
+(define_insn "avx512f_pshufd_1<mask_name>"
+ [(set (match_operand:V16SI 0 "register_operand" "=v")
+ (vec_select:V16SI
+ (match_operand:V16SI 1 "nonimmediate_operand" "vm")
+ (parallel [(match_operand 2 "const_0_to_3_operand")
+ (match_operand 3 "const_0_to_3_operand")
+ (match_operand 4 "const_0_to_3_operand")
+ (match_operand 5 "const_0_to_3_operand")
+ (match_operand 6 "const_4_to_7_operand")
+ (match_operand 7 "const_4_to_7_operand")
+ (match_operand 8 "const_4_to_7_operand")
+ (match_operand 9 "const_4_to_7_operand")
+ (match_operand 10 "const_8_to_11_operand")
+ (match_operand 11 "const_8_to_11_operand")
+ (match_operand 12 "const_8_to_11_operand")
+ (match_operand 13 "const_8_to_11_operand")
+ (match_operand 14 "const_12_to_15_operand")
+ (match_operand 15 "const_12_to_15_operand")
+ (match_operand 16 "const_12_to_15_operand")
+ (match_operand 17 "const_12_to_15_operand")])))]
+ "TARGET_AVX512F
+ && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
+ && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
+ && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
+ && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
+ && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
+ && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
+ && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
+ && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
+ && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
+ && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
+ && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
+ && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[2]) << 0;
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= INTVAL (operands[4]) << 4;
+ mask |= INTVAL (operands[5]) << 6;
+ operands[2] = GEN_INT (mask);
+
+ return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
+}
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "evex")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "XI")])
+
(define_expand "avx2_pshufdv3"
[(match_operand:V8SI 0 "register_operand")
(match_operand:V8SI 1 "nonimmediate_operand")
@@ -7597,25 +10740,25 @@
(define_expand "vec_unpacks_lo_<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI124_AVX2 1 "register_operand")]
+ (match_operand:VI124_AVX512F 1 "register_operand")]
"TARGET_SSE2"
"ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
(define_expand "vec_unpacks_hi_<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI124_AVX2 1 "register_operand")]
+ (match_operand:VI124_AVX512F 1 "register_operand")]
"TARGET_SSE2"
"ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
(define_expand "vec_unpacku_lo_<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI124_AVX2 1 "register_operand")]
+ (match_operand:VI124_AVX512F 1 "register_operand")]
"TARGET_SSE2"
"ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
(define_expand "vec_unpacku_hi_<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI124_AVX2 1 "register_operand")]
+ (match_operand:VI124_AVX512F 1 "register_operand")]
"TARGET_SSE2"
"ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
@@ -8367,12 +11510,12 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
-(define_insn "abs<mode>2"
- [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
- (abs:VI124_AVX2
- (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
- "TARGET_SSSE3"
- "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
+(define_insn "abs<mode>2<mask_name>"
+ [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
+ (abs:VI124_AVX2_48_AVX512F
+ (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
+ "TARGET_SSSE3 && <mask_mode512bit_condition>"
+ "%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sselog1")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
@@ -8713,6 +11856,16 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
+ [(set (match_operand:V16SI 0 "register_operand" "=v")
+ (any_extend:V16SI
+ (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512F"
+ "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_insn "avx2_<code>v8qiv8si2"
[(set (match_operand:V8SI 0 "register_operand" "=x")
(any_extend:V8SI
@@ -8743,6 +11896,16 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_insn "avx512f_<code>v16hiv16si2<mask_name>"
+ [(set (match_operand:V16SI 0 "register_operand" "=v")
+ (any_extend:V16SI
+ (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512F"
+ "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_insn "avx2_<code>v8hiv8si2"
[(set (match_operand:V8SI 0 "register_operand" "=x")
(any_extend:V8SI
@@ -8768,6 +11931,21 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_insn "avx512f_<code>v8qiv8di2<mask_name>"
+ [(set (match_operand:V8DI 0 "register_operand" "=v")
+ (any_extend:V8DI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX512F"
+ "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_insn "avx2_<code>v4qiv4di2"
[(set (match_operand:V4DI 0 "register_operand" "=x")
(any_extend:V4DI
@@ -8795,6 +11973,16 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_insn "avx512f_<code>v8hiv8di2<mask_name>"
+ [(set (match_operand:V8DI 0 "register_operand" "=v")
+ (any_extend:V8DI
+ (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512F"
+ "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_insn "avx2_<code>v4hiv4di2"
[(set (match_operand:V4DI 0 "register_operand" "=x")
(any_extend:V4DI
@@ -8822,6 +12010,16 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_insn "avx512f_<code>v8siv8di2<mask_name>"
+ [(set (match_operand:V8DI 0 "register_operand" "=v")
+ (any_extend:V8DI
+ (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512F"
+ "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_insn "avx2_<code>v4siv4di2"
[(set (match_operand:V4DI 0 "register_operand" "=x")
(any_extend:V4DI
@@ -8921,10 +12119,20 @@
DONE;
})
+(define_expand "avx512f_roundpd512"
+ [(match_operand:V8DF 0 "register_operand")
+ (match_operand:V8DF 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_15_operand")]
+ "TARGET_AVX512F"
+{
+ emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
[(match_operand:<ssepackfltmode> 0 "register_operand")
- (match_operand:VF2 1 "nonimmediate_operand")
- (match_operand:VF2 2 "nonimmediate_operand")
+ (match_operand:VF2_AVX512F 1 "nonimmediate_operand")
+ (match_operand:VF2_AVX512F 2 "nonimmediate_operand")
(match_operand:SI 3 "const_0_to_15_operand")]
"TARGET_ROUND"
{
@@ -8982,11 +12190,11 @@
(define_expand "round<mode>2"
[(set (match_dup 4)
- (plus:VF
- (match_operand:VF 1 "register_operand")
+ (plus:VF_AVX512F
+ (match_operand:VF_AVX512F 1 "register_operand")
(match_dup 3)))
- (set (match_operand:VF 0 "register_operand")
- (unspec:VF
+ (set (match_operand:VF_AVX512F 0 "register_operand")
+ (unspec:VF_AVX512F
[(match_dup 4) (match_dup 5)]
UNSPEC_ROUND))]
"TARGET_ROUND && !flag_trapping_math"
@@ -9030,8 +12238,8 @@
(define_expand "round<mode>2_vec_pack_sfix"
[(match_operand:<ssepackfltmode> 0 "register_operand")
- (match_operand:VF2 1 "register_operand")
- (match_operand:VF2 2 "register_operand")]
+ (match_operand:VF2_AVX512F 1 "register_operand")
+ (match_operand:VF2_AVX512F 2 "register_operand")]
"TARGET_ROUND && !flag_trapping_math"
{
rtx tmp0, tmp1;
@@ -9448,6 +12656,178 @@
(set_attr "btver2_decode" "vector,vector,vector,vector")
(set_attr "mode" "TI")])
+(define_expand "avx512pf_gatherpf<mode>"
+ [(unspec
+ [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
+ (mem:<ssescalarmode>
+ (match_par_dup 5
+ [(match_operand 2 "vsib_address_operand")
+ (match_operand:VI48_512 1 "register_operand")
+ (match_operand:SI 3 "const1248_operand")]))
+ (match_operand:SI 4 "const_0_to_1_operand")]
+ UNSPEC_GATHER_PREFETCH)]
+ "TARGET_AVX512PF"
+{
+ operands[5]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
+ operands[3]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512pf_gatherpf<mode>_mask"
+ [(unspec
+ [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
+ (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 2 "vsib_address_operand" "p")
+ (match_operand:VI48_512 1 "register_operand" "v")
+ (match_operand:SI 3 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (match_operand:SI 4 "const_0_to_1_operand" "n")]
+ UNSPEC_GATHER_PREFETCH)]
+ "TARGET_AVX512PF"
+{
+ switch (INTVAL (operands[4]))
+ {
+ case 0:
+ return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+ case 1:
+ return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "*avx512pf_gatherpf<mode>"
+ [(unspec
+ [(const_int -1)
+ (match_operator:<ssescalarmode> 4 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 1 "vsib_address_operand" "p")
+ (match_operand:VI48_512 0 "register_operand" "v")
+ (match_operand:SI 2 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (match_operand:SI 3 "const_0_to_1_operand" "n")]
+ UNSPEC_GATHER_PREFETCH)]
+ "TARGET_AVX512PF"
+{
+ switch (INTVAL (operands[3]))
+ {
+ case 0:
+ return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
+ case 1:
+ return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_expand "avx512pf_scatterpf<mode>"
+ [(unspec
+ [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
+ (mem:<ssescalarmode>
+ (match_par_dup 5
+ [(match_operand 2 "vsib_address_operand")
+ (match_operand:VI48_512 1 "register_operand")
+ (match_operand:SI 3 "const1248_operand")]))
+ (match_operand:SI 4 "const_0_to_1_operand")]
+ UNSPEC_SCATTER_PREFETCH)]
+ "TARGET_AVX512PF"
+{
+ operands[5]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
+ operands[3]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512pf_scatterpf<mode>_mask"
+ [(unspec
+ [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
+ (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 2 "vsib_address_operand" "p")
+ (match_operand:VI48_512 1 "register_operand" "v")
+ (match_operand:SI 3 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (match_operand:SI 4 "const_0_to_1_operand" "n")]
+ UNSPEC_SCATTER_PREFETCH)]
+ "TARGET_AVX512PF"
+{
+ switch (INTVAL (operands[4]))
+ {
+ case 0:
+ return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+ case 1:
+ return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "*avx512pf_scatterpf<mode>"
+ [(unspec
+ [(const_int -1)
+ (match_operator:<ssescalarmode> 4 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 1 "vsib_address_operand" "p")
+ (match_operand:VI48_512 0 "register_operand" "v")
+ (match_operand:SI 2 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (match_operand:SI 3 "const_0_to_1_operand" "n")]
+ UNSPEC_SCATTER_PREFETCH)]
+ "TARGET_AVX512PF"
+{
+ switch (INTVAL (operands[3]))
+ {
+ case 0:
+ return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
+ case 1:
+ return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "<mask_codefor>avx512er_exp2<mode><mask_name><round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_EXP2))]
+ "TARGET_AVX512ER"
+ "vexp2<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_RCP28))]
+ "TARGET_AVX512ER"
+ "vrcp28<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_RSQRT28))]
+ "TARGET_AVX512ER"
+ "vrsqrt28<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; XOP instructions
@@ -9995,6 +13375,13 @@
})
(define_expand "vlshr<mode>3"
+ [(set (match_operand:VI48_512 0 "register_operand")
+ (lshiftrt:VI48_512
+ (match_operand:VI48_512 1 "register_operand")
+ (match_operand:VI48_512 2 "nonimmediate_operand")))]
+ "TARGET_AVX512F")
+
+(define_expand "vlshr<mode>3"
[(set (match_operand:VI48_256 0 "register_operand")
(lshiftrt:VI48_256
(match_operand:VI48_256 1 "register_operand")
@@ -10029,6 +13416,12 @@
}
})
+(define_expand "vashrv16si3"
+ [(set (match_operand:V16SI 0 "register_operand")
+ (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
+ (match_operand:V16SI 2 "nonimmediate_operand")))]
+ "TARGET_AVX512F")
+
(define_expand "vashrv8si3"
[(set (match_operand:V8SI 0 "register_operand")
(ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
@@ -10062,6 +13455,13 @@
})
(define_expand "vashl<mode>3"
+ [(set (match_operand:VI48_512 0 "register_operand")
+ (ashift:VI48_512
+ (match_operand:VI48_512 1 "register_operand")
+ (match_operand:VI48_512 2 "nonimmediate_operand")))]
+ "TARGET_AVX512F")
+
+(define_expand "vashl<mode>3"
[(set (match_operand:VI48_256 0 "register_operand")
(ashift:VI48_256
(match_operand:VI48_256 1 "register_operand")
@@ -10446,17 +13846,11 @@
(set_attr "btver2_decode" "vector")
(set_attr "mode" "OI")])
-(define_mode_attr AVXTOSSEMODE
- [(V4DI "V2DI") (V2DI "V2DI")
- (V8SI "V4SI") (V4SI "V4SI")
- (V16HI "V8HI") (V8HI "V8HI")
- (V32QI "V16QI") (V16QI "V16QI")])
-
(define_insn "avx2_pbroadcast<mode>"
[(set (match_operand:VI 0 "register_operand" "=x")
(vec_duplicate:VI
(vec_select:<ssescalarmode>
- (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
+ (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))))]
"TARGET_AVX2"
"vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
@@ -10480,42 +13874,60 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx2_permvar<mode>"
- [(set (match_operand:VI4F_256 0 "register_operand" "=x")
- (unspec:VI4F_256
- [(match_operand:VI4F_256 1 "nonimmediate_operand" "xm")
- (match_operand:V8SI 2 "register_operand" "x")]
+(define_insn "<avx2_avx512f>_permvar<mode><mask_name>"
+ [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
+ (unspec:VI48F_256_512
+ [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
+ (match_operand:<sseintvecmode> 2 "register_operand" "v")]
UNSPEC_VPERMVAR))]
- "TARGET_AVX2"
- "vperm<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
- (set_attr "mode" "OI")])
+ (set_attr "prefix" "<mask_prefix2>")
+ (set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx2_perm<mode>"
- [(match_operand:VI8F_256 0 "register_operand")
- (match_operand:VI8F_256 1 "nonimmediate_operand")
+(define_expand "<avx2_avx512f>_perm<mode>"
+ [(match_operand:VI8F_256_512 0 "register_operand")
+ (match_operand:VI8F_256_512 1 "nonimmediate_operand")
(match_operand:SI 2 "const_0_to_255_operand")]
"TARGET_AVX2"
{
int mask = INTVAL (operands[2]);
- emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
- GEN_INT ((mask >> 0) & 3),
- GEN_INT ((mask >> 2) & 3),
- GEN_INT ((mask >> 4) & 3),
- GEN_INT ((mask >> 6) & 3)));
+ emit_insn (gen_<avx2_avx512f>_perm<mode>_1 (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3)));
DONE;
})
-(define_insn "avx2_perm<mode>_1"
- [(set (match_operand:VI8F_256 0 "register_operand" "=x")
- (vec_select:VI8F_256
- (match_operand:VI8F_256 1 "nonimmediate_operand" "xm")
+(define_expand "avx512f_perm<mode>_mask"
+ [(match_operand:V8FI 0 "register_operand")
+ (match_operand:V8FI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V8FI 3 "vector_move_operand")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_<avx2_avx512f>_perm<mode>_1_mask (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3),
+ operands[3], operands[4]));
+ DONE;
+})
+
+(define_insn "<avx2_avx512f>_perm<mode>_1<mask_name>"
+ [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
+ (vec_select:VI8F_256_512
+ (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
(match_operand 5 "const_0_to_3_operand")])))]
- "TARGET_AVX2"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
{
int mask = 0;
mask |= INTVAL (operands[2]) << 0;
@@ -10523,10 +13935,10 @@
mask |= INTVAL (operands[4]) << 4;
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
- return "vperm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+ return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
}
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "<mask_prefix2>")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx2_permv2ti"
@@ -10573,6 +13985,62 @@
(set_attr "isa" "*,avx2,noavx2")
(set_attr "mode" "V8SF")])
+(define_insn "<mask_codefor>avx512f_vec_dup<mode><mask_name>"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+ (vec_duplicate:VI48F_512
+ (vec_select:<ssescalarmode>
+ (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0)]))))]
+ "TARGET_AVX512F"
+ "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
+ [(set (match_operand:V16FI 0 "register_operand" "=v,v")
+ (vec_duplicate:V16FI
+ (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
+ "TARGET_AVX512F"
+ "@
+ vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
+ vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
+ [(set (match_operand:V8FI 0 "register_operand" "=v,v")
+ (vec_duplicate:V8FI
+ (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
+ "TARGET_AVX512F"
+ "@
+ vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
+ vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512f_vec_dup_gpr<mode><mask_name>"
+ [(set (match_operand:VI48_512 0 "register_operand" "=v")
+ (vec_duplicate:VI48_512
+ (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
+ "TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
+ "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512f_vec_dup_mem<mode><mask_name>"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=x")
+ (vec_duplicate:VI48F_512
+ (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX512F"
+ "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "avx2_vbroadcasti128_<mode>"
[(set (match_operand:VI_256 0 "register_operand" "=x")
(vec_concat:VI_256
@@ -10612,6 +14080,28 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "avx512cd_maskb_vec_dupv8di"
+ [(set (match_operand:V8DI 0 "register_operand" "=v")
+ (vec_duplicate:V8DI
+ (zero_extend:DI
+ (match_operand:QI 1 "register_operand" "k"))))]
+ "TARGET_AVX512CD"
+ "vpbroadcastmb2q\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mskmov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "avx512cd_maskw_vec_dupv16si"
+ [(set (match_operand:V16SI 0 "register_operand" "=v")
+ (vec_duplicate:V16SI
+ (zero_extend:SI
+ (match_operand:HI 1 "register_operand" "k"))))]
+ "TARGET_AVX512CD"
+ "vpbroadcastmw2d\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mskmov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
;; If it so happens that the input is in memory, use vbroadcast.
;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
@@ -10686,86 +14176,168 @@
elt * GET_MODE_SIZE (<ssescalarmode>mode));
})
-(define_expand "avx_vpermil<mode>"
- [(set (match_operand:VF2 0 "register_operand")
- (vec_select:VF2
- (match_operand:VF2 1 "nonimmediate_operand")
+(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
+ [(set (match_operand:VF2_AVX512F 0 "register_operand")
+ (vec_select:VF2_AVX512F
+ (match_operand:VF2_AVX512F 1 "nonimmediate_operand")
(match_operand:SI 2 "const_0_to_255_operand")))]
- "TARGET_AVX"
+ "TARGET_AVX && <mask_mode512bit_condition>"
{
int mask = INTVAL (operands[2]);
rtx perm[<ssescalarnum>];
- perm[0] = GEN_INT (mask & 1);
- perm[1] = GEN_INT ((mask >> 1) & 1);
- if (<MODE>mode == V4DFmode)
+ int i;
+ for (i = 0; i < <ssescalarnum>; i = i + 2)
{
- perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
- perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
+ perm[i] = GEN_INT (((mask >> i) & 1) + i);
+ perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
}
operands[2]
= gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
})
-(define_expand "avx_vpermil<mode>"
- [(set (match_operand:VF1 0 "register_operand")
- (vec_select:VF1
- (match_operand:VF1 1 "nonimmediate_operand")
+(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
+ [(set (match_operand:VF1_AVX512F 0 "register_operand")
+ (vec_select:VF1_AVX512F
+ (match_operand:VF1_AVX512F 1 "nonimmediate_operand")
(match_operand:SI 2 "const_0_to_255_operand")))]
- "TARGET_AVX"
+ "TARGET_AVX && <mask_mode512bit_condition>"
{
int mask = INTVAL (operands[2]);
rtx perm[<ssescalarnum>];
- perm[0] = GEN_INT (mask & 3);
- perm[1] = GEN_INT ((mask >> 2) & 3);
- perm[2] = GEN_INT ((mask >> 4) & 3);
- perm[3] = GEN_INT ((mask >> 6) & 3);
- if (<MODE>mode == V8SFmode)
+ int i;
+ for (i = 0; i < <ssescalarnum>; i = i + 4)
{
- perm[4] = GEN_INT ((mask & 3) + 4);
- perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
- perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
- perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
+ perm[i] = GEN_INT (((mask >> 0) & 3) + i);
+ perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
+ perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
+ perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
}
operands[2]
= gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
})
-(define_insn "*avx_vpermilp<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x")
- (vec_select:VF
- (match_operand:VF 1 "nonimmediate_operand" "xm")
+(define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
+ [(set (match_operand:VF_AVX512F 0 "register_operand" "=v")
+ (vec_select:VF_AVX512F
+ (match_operand:VF_AVX512F 1 "nonimmediate_operand" "vm")
(match_parallel 2 ""
[(match_operand 3 "const_int_operand")])))]
- "TARGET_AVX
+ "TARGET_AVX && <mask_mode512bit_condition>
&& avx_vpermilp_parallel (operands[2], <MODE>mode)"
{
int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
operands[2] = GEN_INT (mask);
- return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+ return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "vex")
- (set_attr "mode" "<MODE>")])
+ (set_attr "prefix" "<mask_prefix>")
+ (set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx_vpermilvar<mode>3"
- [(set (match_operand:VF 0 "register_operand" "=x")
- (unspec:VF
- [(match_operand:VF 1 "register_operand" "x")
- (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
+(define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
+ [(set (match_operand:VF_AVX512F 0 "register_operand" "=v")
+ (unspec:VF_AVX512F
+ [(match_operand:VF_AVX512F 1 "register_operand" "v")
+ (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
UNSPEC_VPERMIL))]
- "TARGET_AVX"
- "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
(set_attr "btver2_decode" "vector")
- (set_attr "mode" "<MODE>")])
+ (set_attr "prefix" "<mask_prefix>")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_vpermi2var<mode>3_maskz"
+ [(match_operand:VI48F_512 0 "register_operand" "=v")
+ (match_operand:VI48F_512 1 "register_operand" "v")
+ (match_operand:<sseintvecmode> 2 "register_operand" "0")
+ (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")]
+ "TARGET_AVX512F"
+{
+ emit_insn (gen_avx512f_vpermi2var<mode>3_maskz_1 (
+ operands[0], operands[1], operands[2], operands[3],
+ CONST0_RTX (<MODE>mode), operands[4]));
+ DONE;
+})
+
+(define_insn "avx512f_vpermi2var<mode>3<sd_maskz_name>"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+ (unspec:VI48F_512
+ [(match_operand:VI48F_512 1 "register_operand" "v")
+ (match_operand:<sseintvecmode> 2 "register_operand" "0")
+ (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+ UNSPEC_VPERMI2))]
+ "TARGET_AVX512F"
+ "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vpermi2var<mode>3_mask"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+ (vec_merge:VI48F_512
+ (unspec:VI48F_512
+ [(match_operand:VI48F_512 1 "register_operand" "v")
+ (match_operand:<sseintvecmode> 2 "register_operand" "0")
+ (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+ UNSPEC_VPERMI2_MASK)
+ (match_dup 0)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_vpermt2var<mode>3_maskz"
+ [(match_operand:VI48F_512 0 "register_operand" "=v")
+ (match_operand:<sseintvecmode> 1 "register_operand" "v")
+ (match_operand:VI48F_512 2 "register_operand" "0")
+ (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")]
+ "TARGET_AVX512F"
+{
+ emit_insn (gen_avx512f_vpermt2var<mode>3_maskz_1 (
+ operands[0], operands[1], operands[2], operands[3],
+ CONST0_RTX (<MODE>mode), operands[4]));
+ DONE;
+})
+
+(define_insn "avx512f_vpermt2var<mode>3<sd_maskz_name>"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+ (unspec:VI48F_512
+ [(match_operand:<sseintvecmode> 1 "register_operand" "v")
+ (match_operand:VI48F_512 2 "register_operand" "0")
+ (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+ UNSPEC_VPERMT2))]
+ "TARGET_AVX512F"
+ "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vpermt2var<mode>3_mask"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+ (vec_merge:VI48F_512
+ (unspec:VI48F_512
+ [(match_operand:<sseintvecmode> 1 "register_operand" "v")
+ (match_operand:VI48F_512 2 "register_operand" "0")
+ (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+ UNSPEC_VPERMT2)
+ (match_dup 2)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
(define_expand "avx_vperm2f128<mode>3"
[(set (match_operand:AVX256MODE2P 0 "register_operand")
@@ -11099,6 +14671,15 @@
DONE;
})
+(define_expand "vec_init<mode>"
+ [(match_operand:VI48F_512 0 "register_operand")
+ (match_operand 1)]
+ "TARGET_AVX512F"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
(define_expand "avx2_extracti128"
[(match_operand:V2DI 0 "nonimmediate_operand")
(match_operand:V4DI 1 "register_operand")
@@ -11148,31 +14729,36 @@
DONE;
})
-(define_insn "avx2_ashrv<mode>"
- [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
- (ashiftrt:VI4_AVX2
- (match_operand:VI4_AVX2 1 "register_operand" "x")
- (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))]
- "TARGET_AVX2"
- "vpsravd\t{%2, %1, %0|%0, %1, %2}"
+(define_insn "<avx2_avx512f>_ashrv<mode><mask_name>"
+ [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
+ (ashiftrt:VI48_AVX512F
+ (match_operand:VI48_AVX512F 1 "register_operand" "v")
+ (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseishft")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx2_<shift_insn>v<mode>"
- [(set (match_operand:VI48_AVX2 0 "register_operand" "=x")
- (any_lshift:VI48_AVX2
- (match_operand:VI48_AVX2 1 "register_operand" "x")
- (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))]
- "TARGET_AVX2"
- "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+(define_insn "<avx2_avx512f>_<shift_insn>v<mode><mask_name>"
+ [(set (match_operand:VI48_AVX2_48_AVX512F 0 "register_operand" "=v")
+ (any_lshift:VI48_AVX2_48_AVX512F
+ (match_operand:VI48_AVX2_48_AVX512F 1 "register_operand" "v")
+ (match_operand:VI48_AVX2_48_AVX512F 2 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseishft")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
+;; For avx_vec_concat<mode> insn pattern
+(define_mode_attr concat_tg_mode
+ [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
+ (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
+
(define_insn "avx_vec_concat<mode>"
- [(set (match_operand:V_256 0 "register_operand" "=x,x")
- (vec_concat:V_256
+ [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
+ (vec_concat:V_256_512
(match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
(match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
"TARGET_AVX"
@@ -11180,16 +14766,24 @@
switch (which_alternative)
{
case 0:
- return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
+ return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
case 1:
switch (get_attr_mode (insn))
{
+ case MODE_V16SF:
+ return "vmovaps\t{%1, %t0|%t0, %1}";
+ case MODE_V8DF:
+ return "vmovapd\t{%1, %t0|%t0, %1}";
case MODE_V8SF:
return "vmovaps\t{%1, %x0|%x0, %1}";
case MODE_V4DF:
return "vmovapd\t{%1, %x0|%x0, %1}";
- default:
+ case MODE_XI:
+ return "vmovdqa\t{%1, %t0|%t0, %1}";
+ case MODE_OI:
return "vmovdqa\t{%1, %x0|%x0, %1}";
+ default:
+ gcc_unreachable ();
}
default:
gcc_unreachable ();
@@ -11198,7 +14792,7 @@
[(set_attr "type" "sselog,ssemov")
(set_attr "prefix_extra" "1,*")
(set_attr "length_immediate" "1,*")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "vcvtph2ps"
@@ -11235,6 +14829,17 @@
(set_attr "btver2_decode" "double")
(set_attr "mode" "V8SF")])
+(define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
+ [(set (match_operand:V16SF 0 "register_operand" "=v")
+ (unspec:V16SF
+ [(match_operand:V16HI 1 "nonimmediate_operand" "<round_saeonly_constraint>")]
+ UNSPEC_VCVTPH2PS))]
+ "TARGET_AVX512F"
+ "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V16SF")])
+
(define_expand "vcvtps2ph"
[(set (match_operand:V8HI 0 "register_operand")
(vec_concat:V8HI
@@ -11281,24 +14886,39 @@
(set_attr "btver2_decode" "vector")
(set_attr "mode" "V8SF")])
+(define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
+ [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
+ (unspec:V16HI
+ [(match_operand:V16SF 1 "register_operand" "v")
+ (match_operand:SI 2 "const_0_to_255_operand" "N")]
+ UNSPEC_VCVTPS2PH))]
+ "TARGET_AVX512F"
+ "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V16SF")])
+
;; For gather* insn patterns
(define_mode_iterator VEC_GATHER_MODE
[V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
+
(define_mode_attr VEC_GATHER_IDXSI
- [(V2DI "V4SI") (V2DF "V4SI")
- (V4DI "V4SI") (V4DF "V4SI")
- (V4SI "V4SI") (V4SF "V4SI")
- (V8SI "V8SI") (V8SF "V8SI")])
+ [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
+ (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
+ (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
+ (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
+
(define_mode_attr VEC_GATHER_IDXDI
- [(V2DI "V2DI") (V2DF "V2DI")
- (V4DI "V4DI") (V4DF "V4DI")
- (V4SI "V2DI") (V4SF "V2DI")
- (V8SI "V4DI") (V8SF "V4DI")])
+ [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
+ (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
+ (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
+ (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
+
(define_mode_attr VEC_GATHER_SRCDI
- [(V2DI "V2DI") (V2DF "V2DF")
- (V4DI "V4DI") (V4DF "V4DF")
- (V4SI "V4SI") (V4SF "V4SF")
- (V8SI "V4SI") (V8SF "V4SF")])
+ [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
+ (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
+ (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
+ (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
(define_expand "avx2_gathersi<mode>"
[(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
@@ -11472,3 +15092,373 @@
[(set_attr "type" "ssemov")
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_gathersi<mode>"
+ [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
+ (unspec:VI48F_512
+ [(match_operand:VI48F_512 1 "register_operand")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")
+ (mem:<ssescalarmode>
+ (match_par_dup 6
+ [(match_operand 2 "vsib_address_operand")
+ (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
+ (match_operand:SI 5 "const1248_operand")]))]
+ UNSPEC_GATHER))
+ (clobber (match_scratch:<avx512fmaskmode> 7))])]
+ "TARGET_AVX512F"
+{
+ operands[6]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
+ operands[5]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512f_gathersi<mode>"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
+ (unspec:VI48F_512
+ [(match_operand:VI48F_512 1 "register_operand" "0")
+ (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
+ (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 4 "vsib_address_operand" "p")
+ (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
+ (match_operand:SI 5 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])]
+ UNSPEC_GATHER))
+ (clobber (match_scratch:<avx512fmaskmode> 2 "=&k"))]
+ "TARGET_AVX512F"
+ "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512f_gathersi<mode>_2"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
+ (unspec:VI48F_512
+ [(pc)
+ (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
+ (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 3 "vsib_address_operand" "p")
+ (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
+ (match_operand:SI 4 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])]
+ UNSPEC_GATHER))
+ (clobber (match_scratch:<avx512fmaskmode> 1 "=&k"))]
+ "TARGET_AVX512F"
+ "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+
+(define_expand "avx512f_gatherdi<mode>"
+ [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
+ (unspec:VI48F_512
+ [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
+ (match_operand:QI 4 "register_operand")
+ (mem:<ssescalarmode>
+ (match_par_dup 6
+ [(match_operand 2 "vsib_address_operand")
+ (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
+ (match_operand:SI 5 "const1248_operand")]))]
+ UNSPEC_GATHER))
+ (clobber (match_scratch:QI 7))])]
+ "TARGET_AVX512F"
+{
+ operands[6]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
+ operands[5]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512f_gatherdi<mode>"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
+ (unspec:VI48F_512
+ [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
+ (match_operand:QI 7 "register_operand" "2")
+ (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 4 "vsib_address_operand" "p")
+ (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
+ (match_operand:SI 5 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])]
+ UNSPEC_GATHER))
+ (clobber (match_scratch:QI 2 "=&k"))]
+ "TARGET_AVX512F"
+ "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512f_gatherdi<mode>_2"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
+ (unspec:VI48F_512
+ [(pc)
+ (match_operand:QI 6 "register_operand" "1")
+ (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 3 "vsib_address_operand" "p")
+ (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
+ (match_operand:SI 4 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])]
+ UNSPEC_GATHER))
+ (clobber (match_scratch:QI 1 "=&k"))]
+ "TARGET_AVX512F"
+{
+ if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_scattersi<mode>"
+ [(parallel [(set (mem:VI48F_512
+ (match_par_dup 5
+ [(match_operand 0 "vsib_address_operand")
+ (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
+ (match_operand:SI 4 "const1248_operand")]))
+ (unspec:VI48F_512
+ [(match_operand:<avx512fmaskmode> 1 "register_operand")
+ (match_operand:VI48F_512 3 "register_operand")]
+ UNSPEC_SCATTER))
+ (clobber (match_scratch:<avx512fmaskmode> 6))])]
+ "TARGET_AVX512F"
+{
+ operands[5]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
+ operands[4]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512f_scattersi<mode>"
+ [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 0 "vsib_address_operand" "p")
+ (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
+ (match_operand:SI 4 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (unspec:VI48F_512
+ [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
+ (match_operand:VI48F_512 3 "register_operand" "v")]
+ UNSPEC_SCATTER))
+ (clobber (match_scratch:<avx512fmaskmode> 1 "=&k"))]
+ "TARGET_AVX512F"
+ "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_scatterdi<mode>"
+ [(parallel [(set (mem:VI48F_512
+ (match_par_dup 5
+ [(match_operand 0 "vsib_address_operand")
+ (match_operand:V8DI 2 "register_operand")
+ (match_operand:SI 4 "const1248_operand")]))
+ (unspec:VI48F_512
+ [(match_operand:QI 1 "register_operand")
+ (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
+ UNSPEC_SCATTER))
+ (clobber (match_scratch:QI 6))])]
+ "TARGET_AVX512F"
+{
+ operands[5]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
+ operands[4]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512f_scatterdi<mode>"
+ [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 0 "vsib_address_operand" "p")
+ (match_operand:V8DI 2 "register_operand" "v")
+ (match_operand:SI 4 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (unspec:VI48F_512
+ [(match_operand:QI 6 "register_operand" "1")
+ (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
+ UNSPEC_SCATTER))
+ (clobber (match_scratch:QI 1 "=&k"))]
+ "TARGET_AVX512F"
+ "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_compress<mode>_mask"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+ (unspec:VI48F_512
+ [(match_operand:VI48F_512 1 "register_operand" "v")
+ (match_operand:VI48F_512 2 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k")]
+ UNSPEC_COMPRESS))]
+ "TARGET_AVX512F"
+ "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_compressstore<mode>_mask"
+ [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
+ (unspec:VI48F_512
+ [(match_operand:VI48F_512 1 "register_operand" "x")
+ (match_dup 0)
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "k")]
+ UNSPEC_COMPRESS_STORE))]
+ "TARGET_AVX512F"
+ "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "memory" "store")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_expand<mode>_maskz"
+ [(set (match_operand:VI48F_512 0 "register_operand")
+ (unspec:VI48F_512
+ [(match_operand:VI48F_512 1 "nonimmediate_operand")
+ (match_operand:VI48F_512 2 "vector_move_operand")
+ (match_operand:<avx512fmaskmode> 3 "register_operand")]
+ UNSPEC_EXPAND))]
+ "TARGET_AVX512F"
+ "operands[2] = CONST0_RTX (<MODE>mode);")
+
+(define_insn "avx512f_expand<mode>_mask"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
+ (unspec:VI48F_512
+ [(match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
+ (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")]
+ UNSPEC_EXPAND))]
+ "TARGET_AVX512F"
+ "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_getmant<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (match_operand:SI 2 "const_0_to_15_operand")]
+ UNSPEC_GETMANT))]
+ "TARGET_AVX512F"
+ "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_getmant<mode><mask_scalar_name><round_saeonly_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (match_operand:SI 3 "const_0_to_15_operand")]
+ UNSPEC_GETMANT)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_mask_scalar_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2<round_saeonly_mask_scalar_op4>, %3}";
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "clz<mode>2<mask_name>"
+ [(set (match_operand:VI48_512 0 "register_operand" "=v")
+ (clz:VI48_512
+ (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512CD"
+ "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>conflict<mode><mask_name>"
+ [(set (match_operand:VI48_512 0 "register_operand" "=v")
+ (unspec:VI48_512
+ [(match_operand:VI48_512 1 "nonimmediate_operand" "vm")]
+ UNSPEC_CONFLICT))]
+ "TARGET_AVX512CD"
+ "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "sha1msg1"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI
+ [(match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_SHA1MSG1))]
+ "TARGET_SHA"
+ "sha1msg1\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sha1msg2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI
+ [(match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_SHA1MSG2))]
+ "TARGET_SHA"
+ "sha1msg2\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sha1nexte"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI
+ [(match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_SHA1NEXTE))]
+ "TARGET_SHA"
+ "sha1nexte\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sha1rnds4"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI
+ [(match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_3_operand" "n")]
+ UNSPEC_SHA1RNDS4))]
+ "TARGET_SHA"
+ "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "sselog1")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sha256msg1"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI
+ [(match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_SHA256MSG1))]
+ "TARGET_SHA"
+ "sha256msg1\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sha256msg2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI
+ [(match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_SHA256MSG2))]
+ "TARGET_SHA"
+ "sha256msg2\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sha256rnds2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI
+ [(match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (match_operand:V4SI 3 "register_operand" "Yz")]
+ UNSPEC_SHA256RNDS2))]
+ "TARGET_SHA"
+ "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "sselog1")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "TI")])
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
new file mode 100644
index 00000000000..fcc5e8ce5a2
--- /dev/null
+++ b/gcc/config/i386/subst.md
@@ -0,0 +1,222 @@
+;; GCC machine description for AVX512F instructions
+;; Copyright (C) 2013 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Some iterators for extending subst as much as possible
+;; All vectors (Use it for destination)
+(define_mode_iterator SUBST_V
+ [V16QI
+ V16HI V8HI
+ V16SI V8SI V4SI
+ V8DI V4DI V2DI
+ V16SF V8SF V4SF
+ V8DF V4DF V2DF])
+
+(define_mode_iterator SUBST_S
+ [QI HI SI DI])
+
+(define_mode_iterator SUBST_A
+ [V16QI
+ V16HI V8HI
+ V16SI V8SI V4SI
+ V8DI V4DI V2DI
+ V16SF V8SF V4SF
+ V8DF V4DF V2DF
+ QI HI SI DI SF DF
+ CCFP CCFPU])
+
+(define_subst_attr "mask_name" "mask" "" "_mask")
+(define_subst_attr "mask_applied" "mask" "false" "true")
+(define_subst_attr "mask_operand2" "mask" "" "%{%3%}%N2")
+(define_subst_attr "mask_operand3" "mask" "" "%{%4%}%N3")
+(define_subst_attr "mask_operand3_1" "mask" "" "%%{%%4%%}%%N3") ;; for sprintf
+(define_subst_attr "mask_operand4" "mask" "" "%{%5%}%N4")
+(define_subst_attr "mask_operand6" "mask" "" "%{%7%}%N6")
+(define_subst_attr "mask_operand11" "mask" "" "%{%12%}%N11")
+(define_subst_attr "mask_operand18" "mask" "" "%{%19%}%N18")
+(define_subst_attr "mask_operand19" "mask" "" "%{%20%}%N19")
+(define_subst_attr "mask_codefor" "mask" "*" "")
+(define_subst_attr "mask_mode512bit_condition" "mask" "1" "(GET_MODE_SIZE (GET_MODE (operands[0])) == 64)")
+(define_subst_attr "store_mask_constraint" "mask" "vm" "v")
+(define_subst_attr "store_mask_predicate" "mask" "nonimmediate_operand" "register_operand")
+(define_subst_attr "mask_prefix" "mask" "vex" "evex")
+(define_subst_attr "mask_prefix2" "mask" "maybe_vex" "evex")
+(define_subst_attr "mask_prefix3" "mask" "orig,vex" "evex")
+
+(define_subst "mask"
+ [(set (match_operand:SUBST_V 0)
+ (match_operand:SUBST_V 1))]
+ "TARGET_AVX512F"
+ [(set (match_dup 0)
+ (vec_merge:SUBST_V
+ (match_dup 1)
+ (match_operand:SUBST_V 2 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k")))])
+
+(define_subst_attr "mask_scalar_name" "mask_scalar" "" "_mask")
+(define_subst_attr "mask_scalar_operand3" "mask_scalar" "" "%{%4%}%N3")
+(define_subst_attr "mask_scalar_operand4" "mask_scalar" "" "%{%5%}%N4")
+(define_subst_attr "mask_scalar_codefor" "mask_scalar" "*" "")
+(define_subst_attr "mask_scalar_prefix" "mask_scalar" "orig,vex" "evex")
+(define_subst_attr "mask_scalar_prefix2" "mask_scalar" "vex" "evex")
+
+(define_subst "mask_scalar"
+ [(set (match_operand:SUBST_V 0)
+ (vec_merge:SUBST_V
+ (match_operand:SUBST_V 1)
+ (match_operand:SUBST_V 2)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ [(set (match_dup 0)
+ (vec_merge:SUBST_V
+ (vec_merge:SUBST_V
+ (match_dup 1)
+ (match_operand:SUBST_V 4 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 5 "register_operand" "k"))
+ (match_dup 2)
+ (const_int 1)))])
+
+(define_subst_attr "mask_scalar_merge_name" "mask_scalar_merge" "" "_mask")
+(define_subst_attr "mask_scalar_merge_operand3" "mask_scalar_merge" "" "%{%3%}")
+(define_subst_attr "mask_scalar_merge_operand4" "mask_scalar_merge" "" "%{%4%}")
+
+(define_subst "mask_scalar_merge"
+ [(set (match_operand:SUBST_S 0)
+ (match_operand:SUBST_S 1))]
+ "TARGET_AVX512F"
+ [(set (match_dup 0)
+ (and:SUBST_S
+ (match_dup 1)
+ (match_operand:SUBST_S 3 "register_operand" "k")))])
+
+(define_subst_attr "sd_maskz_name" "sd" "" "_maskz_1")
+(define_subst_attr "sd_mask_op4" "sd" "" "%{%5%}%N4")
+(define_subst_attr "sd_mask_op5" "sd" "" "%{%6%}%N5")
+(define_subst_attr "sd_mask_codefor" "sd" "*" "")
+(define_subst_attr "sd_mask_mode512bit_condition" "sd" "1" "(GET_MODE_SIZE (GET_MODE (operands[0])) == 64)")
+
+(define_subst "sd"
+ [(set (match_operand:SUBST_V 0)
+ (match_operand:SUBST_V 1))]
+ ""
+ [(set (match_dup 0)
+ (vec_merge:SUBST_V
+ (match_dup 1)
+ (match_operand:SUBST_V 2 "const0_operand" "C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k")))
+])
+
+(define_subst_attr "round_name" "round" "" "_round")
+(define_subst_attr "round_mask_operand2" "mask" "%R2" "%R4")
+(define_subst_attr "round_mask_operand3" "mask" "%R3" "%R5")
+(define_subst_attr "round_mask_scalar_operand3" "mask_scalar" "%R3" "%R5")
+(define_subst_attr "round_sd_mask_operand4" "sd" "%R4" "%R6")
+(define_subst_attr "round_op2" "round" "" "%R2")
+(define_subst_attr "round_op3" "round" "" "%R3")
+(define_subst_attr "round_op4" "round" "" "%R4")
+(define_subst_attr "round_op5" "round" "" "%R5")
+(define_subst_attr "round_op6" "round" "" "%R6")
+(define_subst_attr "round_mask_op2" "round" "" "<round_mask_operand2>")
+(define_subst_attr "round_mask_op3" "round" "" "<round_mask_operand3>")
+(define_subst_attr "round_mask_scalar_op3" "round" "" "<round_mask_scalar_operand3>")
+(define_subst_attr "round_sd_mask_op4" "round" "" "<round_sd_mask_operand4>")
+(define_subst_attr "round_constraint" "round" "vm" "v")
+(define_subst_attr "round_constraint2" "round" "m" "v")
+(define_subst_attr "round_constraint3" "round" "rm" "r")
+(define_subst_attr "round_mode512bit_condition" "round" "1" "(GET_MODE (operands[0]) == V16SFmode || GET_MODE (operands[0]) == V8DFmode)")
+(define_subst_attr "round_modev4sf_condition" "round" "1" "(GET_MODE (operands[0]) == V4SFmode)")
+(define_subst_attr "round_codefor" "round" "*" "")
+(define_subst_attr "round_opnum" "round" "5" "6")
+
+(define_subst "round"
+ [(set (match_operand:SUBST_A 0)
+ (match_operand:SUBST_A 1))]
+ "TARGET_AVX512F"
+ [(parallel[
+ (set (match_dup 0)
+ (match_dup 1))
+ (unspec [(match_operand:SI 2 "const_0_to_4_operand")] UNSPEC_EMBEDDED_ROUNDING)])])
+
+(define_subst_attr "round_saeonly_name" "round_saeonly" "" "_round")
+(define_subst_attr "round_saeonly_mask_operand2" "mask" "%R2" "%R4")
+(define_subst_attr "round_saeonly_mask_operand3" "mask" "%R3" "%R5")
+(define_subst_attr "round_saeonly_mask_scalar_operand3" "mask_scalar" "%R3" "%R5")
+(define_subst_attr "round_saeonly_mask_scalar_operand4" "mask_scalar" "%R4" "%R6")
+(define_subst_attr "round_saeonly_mask_scalar_merge_operand4" "mask_scalar_merge" "%R4" "%R5")
+(define_subst_attr "round_saeonly_sd_mask_operand5" "sd" "%R5" "%R7")
+(define_subst_attr "round_saeonly_op2" "round_saeonly" "" "%R2")
+(define_subst_attr "round_saeonly_op4" "round_saeonly" "" "%R4")
+(define_subst_attr "round_saeonly_op5" "round_saeonly" "" "%R5")
+(define_subst_attr "round_saeonly_op6" "round_saeonly" "" "%R6")
+(define_subst_attr "round_saeonly_mask_op2" "round_saeonly" "" "<round_saeonly_mask_operand2>")
+(define_subst_attr "round_saeonly_mask_op3" "round_saeonly" "" "<round_saeonly_mask_operand3>")
+(define_subst_attr "round_saeonly_mask_scalar_op3" "round_saeonly" "" "<round_saeonly_mask_scalar_operand3>")
+(define_subst_attr "round_saeonly_mask_scalar_op4" "round_saeonly" "" "<round_saeonly_mask_scalar_operand4>")
+(define_subst_attr "round_saeonly_mask_scalar_merge_op4" "round_saeonly" "" "<round_saeonly_mask_scalar_merge_operand4>")
+(define_subst_attr "round_saeonly_sd_mask_op5" "round_saeonly" "" "<round_saeonly_sd_mask_operand5>")
+(define_subst_attr "round_saeonly_constraint" "round_saeonly" "vm" "v")
+(define_subst_attr "round_saeonly_constraint2" "round_saeonly" "m" "v")
+(define_subst_attr "round_saeonly_mode512bit_condition" "round_saeonly" "1" "(GET_MODE (operands[0]) == V16SFmode || GET_MODE (operands[0]) == V8DFmode)")
+(define_subst_attr "round_saeonly_mode512bit_condition_op1" "round_saeonly" "1" "(GET_MODE (operands[1]) == V16SFmode || GET_MODE (operands[1]) == V8DFmode)")
+
+(define_subst "round_saeonly"
+ [(set (match_operand:SUBST_A 0)
+ (match_operand:SUBST_A 1))]
+ "TARGET_AVX512F"
+ [(parallel[
+ (set (match_dup 0)
+ (match_dup 1))
+ (unspec [(match_operand:SI 2 "const_4_to_5_operand")] UNSPEC_EMBEDDED_ROUNDING)])])
+
+(define_subst_attr "round_expand_name" "round_expand" "" "_round")
+(define_subst_attr "round_expand_predicate" "round_expand" "nonimmediate_operand" "register_operand")
+(define_subst_attr "round_expand_operand" "round_expand" "" ", operands[5]")
+
+(define_subst "round_expand"
+ [(match_operand:SUBST_V 0)
+ (match_operand:SUBST_V 1)
+ (match_operand:SUBST_V 2)
+ (match_operand:SUBST_V 3)
+ (match_operand:SUBST_S 4)]
+ "TARGET_AVX512F"
+ [(match_dup 0)
+ (match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (unspec [(match_operand:SI 5 "const_0_to_4_operand")] UNSPEC_EMBEDDED_ROUNDING)])
+
+(define_subst_attr "round_saeonly_expand_name5" "round_saeonly_expand5" "" "_round")
+(define_subst_attr "round_saeonly_expand_predicate5" "round_saeonly_expand5" "nonimmediate_operand" "register_operand")
+(define_subst_attr "round_saeonly_expand_operand6" "round_saeonly_expand5" "" ", operands[6]")
+
+(define_subst "round_saeonly_expand5"
+ [(match_operand:SUBST_V 0)
+ (match_operand:SUBST_V 1)
+ (match_operand:SUBST_V 2)
+ (match_operand:SUBST_A 3)
+ (match_operand:SI 4)
+ (match_operand:SUBST_S 5)]
+ "TARGET_AVX512F"
+ [(match_dup 0)
+ (match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)
+ (unspec [(match_operand:SI 6 "const_4_to_5_operand")] UNSPEC_EMBEDDED_ROUNDING)])
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 445780955a9..2adeb0c3906 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -644,7 +644,8 @@ Objective-C and Objective-C++ Dialects}.
-mrecip -mrecip=@var{opt} @gol
-mvzeroupper -mprefer-avx128 @gol
-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
--mavx2 -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol
+-mavx2 -mavx512f -mavx512pf -mavx512er -mavx512cd -msha -mno-sha @gol
+-maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol
-msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlzcnt @gol
-mbmi2 -mrtm -mlwp -mthreads @gol
-mno-align-stringops -minline-all-stringops @gol
@@ -14309,11 +14310,22 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@itemx -mno-avx
@itemx -mavx2
@itemx -mno-avx2
+@itemx -mavx512f
+@itemx -mno-avx512f
+@need 800
+@itemx -mavx512pf
+@itemx -mno-avx512pf
+@itemx -mavx512er
+@itemx -mno-avx512er
+@itemx -mavx512cd
+@itemx -mno-avx512cd
+@itemx -msha
+@itemx -mno-sha
@itemx -maes
@itemx -mno-aes
@itemx -mpclmul
-@need 800
@itemx -mno-pclmul
+@need 800
@itemx -mfsgsbase
@itemx -mno-fsgsbase
@itemx -mrdrnd
@@ -14325,8 +14337,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@itemx -msse4a
@itemx -mno-sse4a
@itemx -mfma4
-@need 800
@itemx -mno-fma4
+@need 800
@itemx -mxop
@itemx -mno-xop
@itemx -mlwp
@@ -14353,8 +14365,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@opindex m3dnow
@opindex mno-3dnow
These switches enable or disable the use of instructions in the MMX, SSE,
-SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, F16C,
-FMA, SSE4A, FMA4, XOP, LWP, ABM, BMI, BMI2, LZCNT, RTM or 3DNow!@:
+SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD,
+SHA, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP, ABM, BMI,
+BMI2, LZCNT, RTM or 3DNow!@:
extended instruction sets.
These extensions are also available as built-in functions: see
@ref{X86 Built-in Functions}, for details of the functions enabled and
diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi
index f14e11f6166..1d6222382b8 100644
--- a/gcc/doc/rtl.texi
+++ b/gcc/doc/rtl.texi
@@ -1097,6 +1097,10 @@ this is the right mode to use for certain pointers.
@item OImode
``Octa Integer'' (?) mode represents a thirty-two-byte integer.
+@findex XImode
+@item XImode
+``Hexadeca Integer'' (?) mode represents a sixty-four-byte integer.
+
@findex QFmode
@item QFmode
``Quarter-Floating'' mode represents a quarter-precision (single byte)
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index 7496746aec8..bffb96cce36 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -166,6 +166,202 @@
/* rtmintrin.h */
#define __builtin_ia32_xabort(I) __builtin_ia32_xabort(0)
+/* avx512fintrin.h */
+#define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_addsd_mask(A, B, C, D, E) __builtin_ia32_addsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_addss_mask(A, B, C, D, E) __builtin_ia32_addss_mask(A, B, C, D, 1)
+#define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E)
+#define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E)
+#define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D)
+#define __builtin_ia32_cmppd512_mask(A, B, F, D, E) __builtin_ia32_cmppd512_mask(A, B, 1, D, 5)
+#define __builtin_ia32_cmpps512_mask(A, B, F, D, E) __builtin_ia32_cmpps512_mask(A, B, 1, D, 5)
+#define __builtin_ia32_cmpq512_mask(A, B, E, D) __builtin_ia32_cmpq512_mask(A, B, 1, D)
+#define __builtin_ia32_cmpsd_mask(A, B, F, D, E) __builtin_ia32_cmpsd_mask(A, B, 1, D, 5)
+#define __builtin_ia32_cmpss_mask(A, B, F, D, E) __builtin_ia32_cmpss_mask(A, B, 1, D, 5)
+#define __builtin_ia32_cvtdq2ps512_mask(A, B, C, D) __builtin_ia32_cvtdq2ps512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtpd2dq512_mask(A, B, C, D) __builtin_ia32_cvtpd2dq512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtpd2ps512_mask(A, B, C, D) __builtin_ia32_cvtpd2ps512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtpd2udq512_mask(A, B, C, D) __builtin_ia32_cvtpd2udq512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtps2dq512_mask(A, B, C, D) __builtin_ia32_cvtps2dq512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtps2pd512_mask(A, B, C, D) __builtin_ia32_cvtps2pd512_mask(A, B, C, 5)
+#define __builtin_ia32_cvtps2udq512_mask(A, B, C, D) __builtin_ia32_cvtps2udq512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtsd2ss_mask(A, B, C, D, E) __builtin_ia32_cvtsd2ss_mask(A, B, C, D, 1)
+#define __builtin_ia32_cvtsi2sd64(A, B, C) __builtin_ia32_cvtsi2sd64(A, B, 1)
+#define __builtin_ia32_cvtsi2ss32(A, B, C) __builtin_ia32_cvtsi2ss32(A, B, 1)
+#define __builtin_ia32_cvtsi2ss64(A, B, C) __builtin_ia32_cvtsi2ss64(A, B, 1)
+#define __builtin_ia32_cvtss2sd_mask(A, B, C, D, E) __builtin_ia32_cvtss2sd_mask(A, B, C, D, 5)
+#define __builtin_ia32_cvttpd2dq512_mask(A, B, C, D) __builtin_ia32_cvttpd2dq512_mask(A, B, C, 5)
+#define __builtin_ia32_cvttpd2udq512_mask(A, B, C, D) __builtin_ia32_cvttpd2udq512_mask(A, B, C, 5)
+#define __builtin_ia32_cvttps2dq512_mask(A, B, C, D) __builtin_ia32_cvttps2dq512_mask(A, B, C, 5)
+#define __builtin_ia32_cvttps2udq512_mask(A, B, C, D) __builtin_ia32_cvttps2udq512_mask(A, B, C, 5)
+#define __builtin_ia32_cvtudq2ps512_mask(A, B, C, D) __builtin_ia32_cvtudq2ps512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtusi2sd64(A, B, C) __builtin_ia32_cvtusi2sd64(A, B, 1)
+#define __builtin_ia32_cvtusi2ss32(A, B, C) __builtin_ia32_cvtusi2ss32(A, B, 1)
+#define __builtin_ia32_cvtusi2ss64(A, B, C) __builtin_ia32_cvtusi2ss64(A, B, 1)
+#define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_divsd_mask(A, B, C, D, E) __builtin_ia32_divsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_divss_mask(A, B, C, D, E) __builtin_ia32_divss_mask(A, B, C, D, 1)
+#define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D)
+#define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D)
+#define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D)
+#define __builtin_ia32_extracti64x4_mask(A, E, C, D) __builtin_ia32_extracti64x4_mask(A, 1, C, D)
+#define __builtin_ia32_fixupimmpd512_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmpd512_mask(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmpd512_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmpd512_maskz(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmps512_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmps512_mask(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmps512_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmps512_maskz(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmsd_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmsd_mask(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmsd_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmsd_maskz(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmss_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmss_mask(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmss_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmss_maskz(A, B, C, 1, E, 5)
+#define __builtin_ia32_gatherdiv8df(A, B, C, D, F) __builtin_ia32_gatherdiv8df(A, B, C, D, 1)
+#define __builtin_ia32_gatherdiv8di(A, B, C, D, F) __builtin_ia32_gatherdiv8di(A, B, C, D, 1)
+#define __builtin_ia32_gatherdiv16sf(A, B, C, D, F) __builtin_ia32_gatherdiv16sf(A, B, C, D, 1)
+#define __builtin_ia32_gatherdiv16si(A, B, C, D, F) __builtin_ia32_gatherdiv16si(A, B, C, D, 1)
+#define __builtin_ia32_gathersiv16sf(A, B, C, D, F) __builtin_ia32_gathersiv16sf(A, B, C, D, 1)
+#define __builtin_ia32_gathersiv16si(A, B, C, D, F) __builtin_ia32_gathersiv16si(A, B, C, D, 1)
+#define __builtin_ia32_gathersiv8df(A, B, C, D, F) __builtin_ia32_gathersiv8df(A, B, C, D, 1)
+#define __builtin_ia32_gathersiv8di(A, B, C, D, F) __builtin_ia32_gathersiv8di(A, B, C, D, 1)
+#define __builtin_ia32_getexppd512_mask(A, B, C, D) __builtin_ia32_getexppd512_mask(A, B, C, 5)
+#define __builtin_ia32_getexpps512_mask(A, B, C, D) __builtin_ia32_getexpps512_mask(A, B, C, 5)
+#define __builtin_ia32_getexpsd128_mask(A, B, C, D, E) __builtin_ia32_getexpsd128_mask(A, B, C, D, 5)
+#define __builtin_ia32_getexpss128_mask(A, B, C, D, E) __builtin_ia32_getexpss128_mask(A, B, C, D, 5)
+#define __builtin_ia32_getmantpd512_mask(A, F, C, D, E) __builtin_ia32_getmantpd512_mask(A, 1, C, D, 5)
+#define __builtin_ia32_getmantps512_mask(A, F, C, D, E) __builtin_ia32_getmantps512_mask(A, 1, C, D, 5)
+#define __builtin_ia32_getmantsd_mask(A, B, I, D, E, F) __builtin_ia32_getmantsd_mask(A, B, 1, D, E, 5)
+#define __builtin_ia32_getmantss_mask(A, B, I, D, E, F) __builtin_ia32_getmantss_mask(A, B, 1, D, E, 5)
+#define __builtin_ia32_insertf32x4_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_insertf64x4_mask(A, B, F, D, E) __builtin_ia32_insertf64x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti32x4_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti64x4_mask(A, B, F, D, E) __builtin_ia32_inserti64x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 5)
+#define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 5)
+#define __builtin_ia32_maxsd_mask(A, B, C, D, E) __builtin_ia32_maxsd_mask(A, B, C, D, 5)
+#define __builtin_ia32_maxss_mask(A, B, C, D, E) __builtin_ia32_maxss_mask(A, B, C, D, 5)
+#define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 5)
+#define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 5)
+#define __builtin_ia32_minsd_mask(A, B, C, D, E) __builtin_ia32_minsd_mask(A, B, C, D, 5)
+#define __builtin_ia32_minss_mask(A, B, C, D, E) __builtin_ia32_minss_mask(A, B, C, D, 5)
+#define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_mulsd_mask(A, B, C, D, E) __builtin_ia32_mulsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_mulss_mask(A, B, C, D, E) __builtin_ia32_mulss_mask(A, B, C, D, 1)
+#define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D)
+#define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D)
+#define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D)
+#define __builtin_ia32_prolq512_mask(A, E, C, D) __builtin_ia32_prolq512_mask(A, 1, C, D)
+#define __builtin_ia32_prord512_mask(A, E, C, D) __builtin_ia32_prord512_mask(A, 1, C, D)
+#define __builtin_ia32_prorq512_mask(A, E, C, D) __builtin_ia32_prorq512_mask(A, 1, C, D)
+#define __builtin_ia32_pshufd512_mask(A, E, C, D) __builtin_ia32_pshufd512_mask(A, 1, C, D)
+#define __builtin_ia32_pslldi512_mask(A, E, C, D) __builtin_ia32_pslldi512_mask(A, 1, C, D)
+#define __builtin_ia32_psllqi512_mask(A, E, C, D) __builtin_ia32_psllqi512_mask(A, 1, C, D)
+#define __builtin_ia32_psradi512_mask(A, E, C, D) __builtin_ia32_psradi512_mask(A, 1, C, D)
+#define __builtin_ia32_psraqi512_mask(A, E, C, D) __builtin_ia32_psraqi512_mask(A, 1, C, D)
+#define __builtin_ia32_psrldi512_mask(A, E, C, D) __builtin_ia32_psrldi512_mask(A, 1, C, D)
+#define __builtin_ia32_psrlqi512_mask(A, E, C, D) __builtin_ia32_psrlqi512_mask(A, 1, C, D)
+#define __builtin_ia32_pternlogd512_mask(A, B, C, F, E) __builtin_ia32_pternlogd512_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogd512_maskz(A, B, C, F, E) __builtin_ia32_pternlogd512_maskz(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq512_mask(A, B, C, F, E) __builtin_ia32_pternlogq512_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E)
+#define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 5)
+#define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 5)
+#define __builtin_ia32_rndscalesd_mask(A, B, I, D, E, F) __builtin_ia32_rndscalesd_mask(A, B, 1, D, E, 5)
+#define __builtin_ia32_rndscaless_mask(A, B, I, D, E, F) __builtin_ia32_rndscaless_mask(A, B, 1, D, E, 5)
+#define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_scalefsd_mask(A, B, C, D, E) __builtin_ia32_scalefsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_scalefss_mask(A, B, C, D, E) __builtin_ia32_scalefss_mask(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv16si(A, B, C, D, F) __builtin_ia32_scatterdiv16si(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv16sf(A, B, C, D, F) __builtin_ia32_scattersiv16sf(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv16si(A, B, C, D, F) __builtin_ia32_scattersiv16si(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv8df(A, B, C, D, F) __builtin_ia32_scattersiv8df(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv8di(A, B, C, D, F) __builtin_ia32_scattersiv8di(A, B, C, D, 1)
+#define __builtin_ia32_shuf_f32x4_mask(A, B, F, D, E) __builtin_ia32_shuf_f32x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_f64x2_mask(A, B, F, D, E) __builtin_ia32_shuf_f64x2_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_i32x4_mask(A, B, F, D, E) __builtin_ia32_shuf_i32x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_i64x2_mask(A, B, F, D, E) __builtin_ia32_shuf_i64x2_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufpd512_mask(A, B, F, D, E) __builtin_ia32_shufpd512_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E)
+#define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 1)
+#define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 1)
+#define __builtin_ia32_sqrtsd_mask(A, B, C, D, E) __builtin_ia32_sqrtsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_sqrtss_mask(A, B, C, D, E) __builtin_ia32_sqrtss_mask(A, B, C, D, 1)
+#define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_subsd_mask(A, B, C, D, E) __builtin_ia32_subsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_subss_mask(A, B, C, D, E) __builtin_ia32_subss_mask(A, B, C, D, 1)
+#define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D)
+#define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 5)
+#define __builtin_ia32_vcomiss(A, B, C, D) __builtin_ia32_vcomiss(A, B, 1, 5)
+#define __builtin_ia32_vcvtph2ps512_mask(A, B, C, D) __builtin_ia32_vcvtph2ps512_mask(A, B, C, 5)
+#define __builtin_ia32_vcvtps2ph512_mask(A, E, C, D) __builtin_ia32_vcvtps2ph512_mask(A, 1, C, D)
+#define __builtin_ia32_vcvtsd2si32(A, B) __builtin_ia32_vcvtsd2si32(A, 1)
+#define __builtin_ia32_vcvtsd2si64(A, B) __builtin_ia32_vcvtsd2si64(A, 1)
+#define __builtin_ia32_vcvtsd2usi32(A, B) __builtin_ia32_vcvtsd2usi32(A, 1)
+#define __builtin_ia32_vcvtsd2usi64(A, B) __builtin_ia32_vcvtsd2usi64(A, 1)
+#define __builtin_ia32_vcvtss2si32(A, B) __builtin_ia32_vcvtss2si32(A, 1)
+#define __builtin_ia32_vcvtss2si64(A, B) __builtin_ia32_vcvtss2si64(A, 1)
+#define __builtin_ia32_vcvtss2usi32(A, B) __builtin_ia32_vcvtss2usi32(A, 1)
+#define __builtin_ia32_vcvtss2usi64(A, B) __builtin_ia32_vcvtss2usi64(A, 1)
+#define __builtin_ia32_vcvttsd2si32(A, B) __builtin_ia32_vcvttsd2si32(A, 5)
+#define __builtin_ia32_vcvttsd2si64(A, B) __builtin_ia32_vcvttsd2si64(A, 5)
+#define __builtin_ia32_vcvttsd2usi32(A, B) __builtin_ia32_vcvttsd2usi32(A, 5)
+#define __builtin_ia32_vcvttsd2usi64(A, B) __builtin_ia32_vcvttsd2usi64(A, 5)
+#define __builtin_ia32_vcvttss2si32(A, B) __builtin_ia32_vcvttss2si32(A, 5)
+#define __builtin_ia32_vcvttss2si64(A, B) __builtin_ia32_vcvttss2si64(A, 5)
+#define __builtin_ia32_vcvttss2usi32(A, B) __builtin_ia32_vcvttss2usi32(A, 5)
+#define __builtin_ia32_vcvttss2usi64(A, B) __builtin_ia32_vcvttss2usi64(A, 5)
+#define __builtin_ia32_vfmaddpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddpd512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddpd512_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddps512_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubaddpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddpd512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubps512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfnmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfnmsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfnmsubps512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfnmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubps512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vpermilpd512_mask(A, E, C, D) __builtin_ia32_vpermilpd512_mask(A, 1, C, D)
+#define __builtin_ia32_vpermilps512_mask(A, E, C, D) __builtin_ia32_vpermilps512_mask(A, 1, C, D)
+#define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask(A, B, C, 1)
+#define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask(A, B, C, 1)
+#define __builtin_ia32_rcp28ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask(A, B, C, 1)
+#define __builtin_ia32_rcp28pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask(A, B, C, 1)
+#define __builtin_ia32_rsqrt28ps_mask(A, B, C, D) __builtin_ia32_rsqrt28ps_mask(A, B, C, 1)
+#define __builtin_ia32_rsqrt28pd_mask(A, B, C, D) __builtin_ia32_rsqrt28pd_mask(A, B, C, 1)
+#define __builtin_ia32_gatherpfdps(A, B, C, D, E) __builtin_ia32_gatherpfdps(A, B, C, 1, 1)
+#define __builtin_ia32_gatherpfqps(A, B, C, D, E) __builtin_ia32_gatherpfqps(A, B, C, 1, 1)
+#define __builtin_ia32_scatterpfdps(A, B, C, D, E) __builtin_ia32_scatterpfdps(A, B, C, 1, 1)
+#define __builtin_ia32_scatterpfqps(A, B, C, D, E) __builtin_ia32_scatterpfqps(A, B, C, 1, 1)
+
+/* shaintrin.h */
+#define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
+
#include <wmmintrin.h>
#include <immintrin.h>
#include <mm3dnow.h>
diff --git a/gcc/testsuite/gcc.target/i386/avx512cd-check.h b/gcc/testsuite/gcc.target/i386/avx512cd-check.h
new file mode 100644
index 00000000000..bccf8b48e06
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512cd-check.h
@@ -0,0 +1,46 @@
+#include <stdlib.h>
+#include "cpuid.h"
+#include "m512-check.h"
+#include "avx512f-os-support.h"
+
+static void avx512cd_test (void);
+
+static void __attribute__ ((noinline)) do_test (void)
+{
+ avx512cd_test ();
+}
+
+int
+main ()
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ if ((ecx & bit_OSXSAVE) == (bit_OSXSAVE))
+ {
+ if (__get_cpuid_max (0, NULL) < 7)
+ return 0;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+
+ if ((avx512f_os_support ()) && ((ebx & (bit_AVX512CD)) == (bit_AVX512CD)))
+ {
+ do_test ();
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+ return 0;
+ }
+#ifdef DEBUG
+ printf ("SKIPPED\n");
+#endif
+ }
+#ifdef DEBUG
+ else
+ printf ("SKIPPED\n");
+#endif
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmb2q-1.c b/gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmb2q-1.c
new file mode 100644
index 00000000000..036031b7659
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmb2q-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512cd -O2" } */
+/* { dg-final { scan-assembler "vpbroadcastmb2q\[ \\t\]+\[^\n\]*k\[1-7\]\[^\n\]*%zmm\[0-7\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m8;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_broadcastmb_epi64 (m8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmb2q-2.c b/gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmb2q-2.c
new file mode 100644
index 00000000000..5a6bb379106
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmb2q-2.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512cd -DHAVE_512 -DAVX512CD" } */
+/* { dg-require-effective-target avx512cd } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+
+CALC (long long *res, __mmask8 src)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ res[i] = src;
+}
+
+static void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_q) res;
+ long long res_ref[SIZE];
+ __mmask8 src;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res.a[i] = -1;
+ }
+
+ res.x = INTRINSIC (_broadcastmb_epi64) (src);
+
+ CALC (res_ref, src);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmw2d-1.c b/gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmw2d-1.c
new file mode 100644
index 00000000000..36abb5e7bc1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmw2d-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512cd -O2" } */
+/* { dg-final { scan-assembler "vpbroadcastmw2d\[ \\t\]+\[^\n\]*k\[1-7\]\[^\n\]*%zmm\[0-7\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m16;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_broadcastmw_epi32 (m16);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmw2d-2.c b/gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmw2d-2.c
new file mode 100644
index 00000000000..a830d9ac630
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512cd-vpbroadcastmw2d-2.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512cd -DHAVE_512 -DAVX512CD" } */
+/* { dg-require-effective-target avx512cd } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+
+CALC (int *res, __mmask16 src)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ res[i] = src;
+}
+
+static void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_d) res;
+ int res_ref[SIZE];
+ __mmask16 src;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res.a[i] = -1;
+ }
+
+ res.x = INTRINSIC (_broadcastmw_epi32) (src);
+
+ CALC (res_ref, src);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512cd-vpconflictd-1.c b/gcc/testsuite/gcc.target/i386/avx512cd-vpconflictd-1.c
new file mode 100644
index 00000000000..d3f2a258dbf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512cd-vpconflictd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512cd -O2" } */
+/* { dg-final { scan-assembler-times "vpconflictd\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpconflictd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpconflictd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m512i res;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_conflict_epi32 (s);
+ res = _mm512_mask_conflict_epi32 (res, 2, s);
+ res = _mm512_maskz_conflict_epi32 (2, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512cd-vpconflictd-2.c b/gcc/testsuite/gcc.target/i386/avx512cd-vpconflictd-2.c
new file mode 100644
index 00000000000..b267c415f0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512cd-vpconflictd-2.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512cd -DHAVE_512 -DAVX512CD" } */
+/* { dg-require-effective-target avx512cd } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *s, int *r)
+{
+ int i, j;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = 0;
+ for (j = 0; j < i; j++)
+ {
+ r[i] |= s[j] == s[i] ? 1 << j : 0;
+ }
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s, res1, res2, res3;
+ int res_ref[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 1234 * (i % 5);
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_conflict_epi32) (s.x);
+ res2.x = INTRINSIC (_mask_conflict_epi32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_conflict_epi32) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512cd-vpconflictq-1.c b/gcc/testsuite/gcc.target/i386/avx512cd-vpconflictq-1.c
new file mode 100644
index 00000000000..795fa6add48
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512cd-vpconflictq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512cd -O2" } */
+/* { dg-final { scan-assembler-times "vpconflictq\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpconflictq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpconflictq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m512i res;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_conflict_epi64 (s);
+ res = _mm512_mask_conflict_epi64 (res, 2, s);
+ res = _mm512_maskz_conflict_epi64 (2, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512cd-vpconflictq-2.c b/gcc/testsuite/gcc.target/i386/avx512cd-vpconflictq-2.c
new file mode 100644
index 00000000000..8930a38d979
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512cd-vpconflictq-2.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512cd -DHAVE_512 -DAVX512CD" } */
+/* { dg-require-effective-target avx512cd } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (long long *s, long long *r)
+{
+ int i, j;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = 0;
+ for (j = 0; j < i; j++)
+ {
+ r[i] |= s[i] == s[j] ? 1 << j : 0;
+ }
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s, res1, res2, res3;
+ long long res_ref[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 12345678 * (i % 5);
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_conflict_epi64) (s.x);
+ res2.x = INTRINSIC (_mask_conflict_epi64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_conflict_epi64) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512cd-vplzcntd-1.c b/gcc/testsuite/gcc.target/i386/avx512cd-vplzcntd-1.c
new file mode 100644
index 00000000000..65a2a32751a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512cd-vplzcntd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512cd -O2" } */
+/* { dg-final { scan-assembler-times "vplzcntd\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vplzcntd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1} } */
+/* { dg-final { scan-assembler-times "vplzcntd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m512i res;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_lzcnt_epi32 (s);
+ res = _mm512_mask_lzcnt_epi32 (res, 2, s);
+ res = _mm512_maskz_lzcnt_epi32 (2, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512cd-vplzcntd-2.c b/gcc/testsuite/gcc.target/i386/avx512cd-vplzcntd-2.c
new file mode 100644
index 00000000000..feccdced4a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512cd-vplzcntd-2.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512cd -DHAVE_512 -DAVX512CD" } */
+/* { dg-require-effective-target avx512cd } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include <strings.h>
+
+static void
+CALC (int *s, int *r)
+{
+ int i, res;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res = 0;
+ while ((res < 32) && (((s[i] >> (31 - res)) & 1) == 0))
+ ++res;
+ r[i] = res;
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s, res1, res2, res3;
+ int res_ref[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 12345678 * (i % 5);
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_lzcnt_epi32) (s.x);
+ res2.x = INTRINSIC (_mask_lzcnt_epi32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_lzcnt_epi32) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512cd-vplzcntq-1.c b/gcc/testsuite/gcc.target/i386/avx512cd-vplzcntq-1.c
new file mode 100644
index 00000000000..0324cd0c2be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512cd-vplzcntq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512cd -O2" } */
+/* { dg-final { scan-assembler-times "vplzcntq\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vplzcntq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vplzcntq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m512i res;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_lzcnt_epi64 (s);
+ res = _mm512_maskz_lzcnt_epi64 (2, s);
+ res = _mm512_mask_lzcnt_epi64 (res, 2, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512cd-vplzcntq-2.c b/gcc/testsuite/gcc.target/i386/avx512cd-vplzcntq-2.c
new file mode 100644
index 00000000000..4aa652b15c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512cd-vplzcntq-2.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512cd -DHAVE_512 -DAVX512CD" } */
+/* { dg-require-effective-target avx512cd } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include <strings.h>
+
+static void
+CALC (long long *s, long long *r)
+{
+ int i, res;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res = 0;
+ while ((res < 64) && (((s[i] >> (63 - res)) & 1) == 0))
+ ++res;
+ r[i] = res;
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s, res1, res2, res3;
+ long long res_ref[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 12345678 * (i % 5);
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_lzcnt_epi64) (s.x);
+ res2.x = INTRINSIC (_mask_lzcnt_epi64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_lzcnt_epi64) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-check.h b/gcc/testsuite/gcc.target/i386/avx512er-check.h
new file mode 100644
index 00000000000..e820677268b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512er-check.h
@@ -0,0 +1,46 @@
+#include <stdlib.h>
+#include "cpuid.h"
+#include "m512-check.h"
+#include "avx512f-os-support.h"
+
+static void avx512er_test (void);
+
+static void __attribute__ ((noinline)) do_test (void)
+{
+ avx512er_test ();
+}
+
+int
+main ()
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ if ((ecx & bit_OSXSAVE) == (bit_OSXSAVE))
+ {
+ if (__get_cpuid_max (0, NULL) < 7)
+ return 0;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+
+ if ((avx512f_os_support ()) && ((ebx & (bit_AVX512ER)) == (bit_AVX512ER)))
+ {
+ do_test ();
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+ return 0;
+ }
+#ifdef DEBUG
+ printf ("SKIPPED\n");
+#endif
+ }
+#ifdef DEBUG
+ else
+ printf ("SKIPPED\n");
+#endif
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-1.c
new file mode 100644
index 00000000000..9fb87cfb8ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512er -O2" } */
+/* { dg-final { scan-assembler-times "vexp2pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vexp2pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vexp2pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vexp2pd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vexp2pd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vexp2pd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512er_test (void)
+{
+ x = _mm512_exp2a23_pd (x);
+ x = _mm512_mask_exp2a23_pd (x, m, x);
+ x = _mm512_maskz_exp2a23_pd (m, x);
+ x = _mm512_exp2a23_round_pd (x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_exp2a23_round_pd (x, m, x, _MM_FROUND_TO_NEG_INF);
+ x = _mm512_maskz_exp2a23_round_pd (m, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-2.c
new file mode 100644
index 00000000000..ce4e86c1f95
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512er } */
+/* { dg-options "-O2 -mavx512er" } */
+
+#include "avx512er-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+#include <math.h>
+
+void static
+compute_vexp2pd (double *s, double *r)
+{
+ int i;
+ for (i = 0; i < 8; i++)
+ r[i] = pow (2.0, s[i]);
+}
+
+void static
+avx512er_test (void)
+{
+ union512d src, res1, res2, res3;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[8];
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ src.a[i] = 179.345 - 6.5645 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = _mm512_exp2a23_pd (src.x);
+ res2.x = _mm512_mask_exp2a23_pd (res2.x, mask, src.x);
+ res3.x = _mm512_maskz_exp2a23_pd (mask, src.x);
+
+ compute_vexp2pd (src.a, res_ref);
+
+ if (check_rough_union512d (res1, res_ref, 0.0001))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, 8);
+ if (check_rough_union512d (res2, res_ref, 0.0001))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 8);
+ if (check_rough_union512d (res3, res_ref, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-1.c
new file mode 100644
index 00000000000..a7e7009ec01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512er -O2" } */
+/* { dg-final { scan-assembler-times "vexp2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vexp2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vexp2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vexp2ps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vexp2ps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vexp2ps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512er_test (void)
+{
+ x = _mm512_exp2a23_ps (x);
+ x = _mm512_mask_exp2a23_ps (x, m, x);
+ x = _mm512_maskz_exp2a23_ps (m, x);
+ x = _mm512_exp2a23_round_ps (x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_exp2a23_round_ps (x, m, x, _MM_FROUND_TO_POS_INF);
+ x = _mm512_maskz_exp2a23_round_ps (m, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-2.c
new file mode 100644
index 00000000000..06ef68c3d2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512er } */
+/* { dg-options "-O2 -mavx512er" } */
+
+#include "avx512er-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+#include <math.h>
+
+void static
+compute_vexp2ps (float *s, float *r)
+{
+ int i;
+ for (i = 0; i < 16; i++)
+ r[i] = pow (2.0, s[i]);
+}
+
+void static
+avx512er_test (void)
+{
+ union512 src, res1, res2, res3;
+ __mmask16 mask = MASK_VALUE;
+ float res_ref[16];
+ int i;
+
+ for (i = 0; i < 16; i++)
+ {
+ src.a[i] = 179.345 - 6.5645 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = _mm512_exp2a23_ps (src.x);
+ res2.x = _mm512_mask_exp2a23_ps (res2.x, mask, src.x);
+ res3.x = _mm512_maskz_exp2a23_ps (mask, src.x);
+
+ compute_vexp2ps (src.a, res_ref);
+
+ if (check_rough_union512 (res1, res_ref, 0.0001))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, 16);
+ if (check_rough_union512 (res2, res_ref, 0.0001))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, 16);
+ if (check_rough_union512 (res3, res_ref, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-1.c
new file mode 100644
index 00000000000..06b61609f14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512er -O2" } */
+/* { dg-final { scan-assembler-times "vrcp28pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vrcp28pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vrcp28pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vrcp28pd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrcp28pd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrcp28pd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512er_test (void)
+{
+ x = _mm512_rcp28_pd (x);
+ x = _mm512_mask_rcp28_pd (x, m, x);
+ x = _mm512_maskz_rcp28_pd (m, x);
+ x = _mm512_rcp28_round_pd (x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_rcp28_round_pd (x, m, x, _MM_FROUND_TO_NEG_INF);
+ x = _mm512_maskz_rcp28_round_pd (m, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-2.c
new file mode 100644
index 00000000000..609aeaa31c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-2.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512er } */
+/* { dg-options "-O2 -mavx512er" } */
+
+#include "avx512er-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+void static
+compute_vrcp28pd (double *s, double *r)
+{
+ int i;
+ for (i = 0; i < 8; i++)
+ r[i] = 1.0 / s[i];
+}
+
+void static
+avx512er_test (void)
+{
+ union512d src, res1, res2, res3;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[8];
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ src.a[i] = 179.345 - 6.5645 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = _mm512_rcp28_pd (src.x);
+ res2.x = _mm512_mask_rcp28_pd (res2.x, mask, src.x);
+ res3.x = _mm512_maskz_rcp28_pd (mask, src.x);
+
+ compute_vrcp28pd (src.a, res_ref);
+
+ if (check_rough_union512d (res1, res_ref, 0.0001))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, 8);
+ if (check_rough_union512d (res2, res_ref, 0.0001))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 8);
+ if (check_rough_union512d (res3, res_ref, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-1.c
new file mode 100644
index 00000000000..023d6b2f519
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512er -O2" } */
+/* { dg-final { scan-assembler-times "vrcp28ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vrcp28ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vrcp28ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vrcp28ps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrcp28ps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrcp28ps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512er_test (void)
+{
+ x = _mm512_rcp28_ps (x);
+ x = _mm512_mask_rcp28_ps (x, m, x);
+ x = _mm512_maskz_rcp28_ps (m, x);
+ x = _mm512_rcp28_round_ps (x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_rcp28_round_ps (x, m, x, _MM_FROUND_TO_POS_INF);
+ x = _mm512_maskz_rcp28_round_ps (m, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-2.c
new file mode 100644
index 00000000000..4059e0e7f52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-2.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512er } */
+/* { dg-options "-O2 -mavx512er" } */
+
+#include "avx512er-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+void static
+compute_vrcp28ps (float *s, float *r)
+{
+ int i;
+ for (i = 0; i < 16; i++)
+ r[i] = 1.0 / s[i];
+}
+
+void static
+avx512er_test (void)
+{
+ union512 src, res1, res2, res3;
+ __mmask16 mask = MASK_VALUE;
+ float res_ref[16];
+ int i;
+
+ for (i = 0; i < 16; i++)
+ {
+ src.a[i] = 179.345 - 6.5645 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = _mm512_rcp28_ps (src.x);
+ res2.x = _mm512_mask_rcp28_ps (res2.x, mask, src.x);
+ res3.x = _mm512_maskz_rcp28_ps (mask, src.x);
+
+ compute_vrcp28ps (src.a, res_ref);
+
+ if (check_rough_union512 (res1, res_ref, 0.0001))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, 16);
+ if (check_rough_union512 (res2, res_ref, 0.0001))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, 16);
+ if (check_rough_union512 (res3, res_ref, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-1.c
new file mode 100644
index 00000000000..dfb95b2bf30
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512er -O2" } */
+/* { dg-final { scan-assembler-times "vrsqrt28pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vrsqrt28pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vrsqrt28pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vrsqrt28pd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt28pd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt28pd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512er_test (void)
+{
+ x = _mm512_rsqrt28_pd (x);
+ x = _mm512_mask_rsqrt28_pd (x, m, x);
+ x = _mm512_maskz_rsqrt28_pd (m, x);
+ x = _mm512_rsqrt28_round_pd (x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_rsqrt28_round_pd (x, m, x, _MM_FROUND_TO_NEG_INF);
+ x = _mm512_maskz_rsqrt28_round_pd (m, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-2.c
new file mode 100644
index 00000000000..84a66addd55
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512er } */
+/* { dg-options "-O2 -mavx512er" } */
+
+#include "avx512er-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+#include <math.h>
+
+void static
+compute_vrsqrt28pd (double *s, double *r)
+{
+ int i;
+ for (i = 0; i < 8; i++)
+ r[i] = 1.0 / sqrt (s[i]);
+}
+
+void static
+avx512er_test (void)
+{
+ union512d src, res1, res2, res3;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[8];
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ src.a[i] = 179.345 - 6.5645 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = _mm512_rsqrt28_pd (src.x);
+ res2.x = _mm512_mask_rsqrt28_pd (res2.x, mask, src.x);
+ res3.x = _mm512_maskz_rsqrt28_pd (mask, src.x);
+
+ compute_vrsqrt28pd (src.a, res_ref);
+
+ if (check_rough_union512d (res1, res_ref, 0.0001))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, 8);
+ if (check_rough_union512d (res2, res_ref, 0.0001))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 8);
+ if (check_rough_union512d (res3, res_ref, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-1.c
new file mode 100644
index 00000000000..ecd3a6fbf12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512er -O2" } */
+/* { dg-final { scan-assembler-times "vrsqrt28ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vrsqrt28ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vrsqrt28ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vrsqrt28ps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt28ps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt28ps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512er_test (void)
+{
+ x = _mm512_rsqrt28_ps (x);
+ x = _mm512_mask_rsqrt28_ps (x, m, x);
+ x = _mm512_maskz_rsqrt28_ps (m, x);
+ x = _mm512_rsqrt28_round_ps (x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_rsqrt28_round_ps (x, m, x, _MM_FROUND_TO_POS_INF);
+ x = _mm512_maskz_rsqrt28_round_ps (m, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-2.c
new file mode 100644
index 00000000000..a92472e6191
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512er } */
+/* { dg-options "-O2 -mavx512er" } */
+
+#include "avx512er-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+#include <math.h>
+
+void static
+compute_vrsqrt28ps (float *s, float *r)
+{
+ int i;
+ for (i = 0; i < 16; i++)
+ r[i] = 1.0 / sqrt (s[i]);
+}
+
+void static
+avx512er_test (void)
+{
+ union512 src, res1, res2, res3;
+ __mmask16 mask = MASK_VALUE;
+ float res_ref[16];
+ int i;
+
+ for (i = 0; i < 16; i++)
+ {
+ src.a[i] = 179.345 - 6.5645 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = _mm512_rsqrt28_ps (src.x);
+ res2.x = _mm512_mask_rsqrt28_ps (res2.x, mask, src.x);
+ res3.x = _mm512_maskz_rsqrt28_ps (mask, src.x);
+
+ compute_vrsqrt28ps (src.a, res_ref);
+
+ if (check_rough_union512 (res1, res_ref, 0.0001))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, 16);
+ if (check_rough_union512 (res2, res_ref, 0.0001))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, 16);
+ if (check_rough_union512 (res3, res_ref, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-broadcast-gpr-1.c b/gcc/testsuite/gcc.target/i386/avx512f-broadcast-gpr-1.c
new file mode 100644
index 00000000000..f550e22471b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-broadcast-gpr-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+%r\[^\n\]+%zmm\[0-9\]\[^\{\]" 1 { target { ! { ia32 } } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+%e\[^\n\]+%zmm\[0-9\]\[^\{\]" 1 { target { ! { ia32 } } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+\[^\n\]+%zmm\[0-9\]\[^\{\]" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+\[^\n\]+%zmm\[0-9\]\[^\{\]" 1 { target ia32 } } } */
+
+#include <x86intrin.h>
+
+__m512i
+foo_1 (long long y)
+{
+ return __extension__ (__m512i)(__v8di){ y, y, y, y, y, y, y, y };
+}
+
+__m512i
+foo_2 (int y)
+{
+ return __extension__ (__m512i)(__v16si){ y, y, y, y, y, y, y, y, y,
+ y, y, y, y, y, y, y };
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-broadcast-gpr-2.c b/gcc/testsuite/gcc.target/i386/avx512f-broadcast-gpr-2.c
new file mode 100644
index 00000000000..91665b299ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-broadcast-gpr-2.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-broadcast-gpr-1.c"
+
+void
+avx512f_test (void)
+{
+ union512i_q q;
+ union512i_d d;
+ int i;
+
+ q.x = foo_1 (3);
+ d.x = foo_2 (5);
+
+ for (i = 0; i < 8; i++)
+ {
+ if (q.a[i] != 3)
+ abort ();
+ }
+
+ for (i = 0; i < 16; i++)
+ {
+ if (d.a[i] != 5)
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-ceil-sfix-vec-1.c b/gcc/testsuite/gcc.target/i386/avx512f-ceil-sfix-vec-1.c
new file mode 100644
index 00000000000..038d25e3582
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-ceil-sfix-vec-1.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include <math.h>
+#include "avx512f-check.h"
+
+extern double ceil (double);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+ int i, sign = 1;
+ double f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx512f_test (void)
+{
+ double a[NUM];
+ int r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = (int) ceil (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != (int) ceil (a[i]))
+ abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-ceil-sfix-vec-2.c b/gcc/testsuite/gcc.target/i386/avx512f-ceil-sfix-vec-2.c
new file mode 100644
index 00000000000..8dafb1bf815
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-ceil-sfix-vec-2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+
+#include "avx512f-ceil-sfix-vec-1.c"
+
+/* { dg-final { scan-assembler "vrndscalepd\[^\n\]*zmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vcvttpd2dq\[^\n\]*zmm\[0-9\]" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-check.h b/gcc/testsuite/gcc.target/i386/avx512f-check.h
new file mode 100644
index 00000000000..9e01367205c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-check.h
@@ -0,0 +1,47 @@
+#include <stdlib.h>
+#include "cpuid.h"
+#include "m512-check.h"
+#include "avx512f-os-support.h"
+
+static void avx512f_test (void);
+
+static void __attribute__ ((noinline)) do_test (void)
+{
+ avx512f_test ();
+}
+
+int
+main ()
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ /* Run AVX512F test only if host has AVX512F support. */
+ if ((ecx & bit_OSXSAVE) == (bit_OSXSAVE))
+ {
+ if (__get_cpuid_max (0, NULL) < 7)
+ return 0;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+
+ if ((avx512f_os_support ()) && ((ebx & bit_AVX512F) == bit_AVX512F))
+ {
+ do_test ();
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+ return 0;
+ }
+#ifdef DEBUG
+ printf ("SKIPPED\n");
+#endif
+ }
+#ifdef DEBUG
+ else
+ printf ("SKIPPED\n");
+#endif
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-dummy.c b/gcc/testsuite/gcc.target/i386/avx512f-dummy.c
new file mode 100644
index 00000000000..84b062789b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-dummy.c
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void static
+avx512f_test (void)
+{
+ union512i_q u, s1, s2;
+ long long e[8];
+ volatile int tst = check_union512i_q (u, e);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-floor-sfix-vec-1.c b/gcc/testsuite/gcc.target/i386/avx512f-floor-sfix-vec-1.c
new file mode 100644
index 00000000000..fab7e6528ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-floor-sfix-vec-1.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include <math.h>
+#include "avx512f-check.h"
+
+extern double floor (double);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+ int i, sign = 1;
+ double f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx512f_test (void)
+{
+ double a[NUM];
+ int r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = (int) floor (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != (int) floor (a[i]))
+ abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-floor-sfix-vec-2.c b/gcc/testsuite/gcc.target/i386/avx512f-floor-sfix-vec-2.c
new file mode 100644
index 00000000000..90e625abcd5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-floor-sfix-vec-2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+
+#include "avx512f-floor-sfix-vec-1.c"
+
+/* { dg-final { scan-assembler "vrndscalepd\[^\n\]*zmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vcvttpd2dq\[^\n\]*zmm\[0-9\]" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-1.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-1.c
new file mode 100644
index 00000000000..5ccb03a1f49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-1.c
@@ -0,0 +1,217 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O3 -mavx512f" } */
+
+#include "avx512f-check.h"
+
+#define N 1024
+float vf1[N+16], vf2[N];
+double vd1[N+16], vd2[N];
+int vi1[N+16], vi2[N], k[N];
+long long vl1[N+16], vl2[N];
+long l[N];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[i] = vf1[k[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vi2[i] = vi1[k[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f3 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[i] = vf1[k[i] + x];
+}
+
+__attribute__((noinline, noclone)) void
+f4 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vi2[i] = vi1[k[i] + x];
+}
+
+__attribute__((noinline, noclone)) void
+f5 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[i] = vd1[k[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f6 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vl2[i] = vl1[k[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f7 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[i] = vd1[k[i] + x];
+}
+
+__attribute__((noinline, noclone)) void
+f8 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vl2[i] = vl1[k[i] + x];
+}
+
+__attribute__((noinline, noclone)) void
+f9 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[i] = vf1[l[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f10 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vi2[i] = vi1[l[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f11 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[i] = vf1[l[i] + x];
+}
+
+__attribute__((noinline, noclone)) void
+f12 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vi2[i] = vi1[l[i] + x];
+}
+
+__attribute__((noinline, noclone)) void
+f13 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[i] = vd1[l[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f14 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vl2[i] = vl1[l[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f15 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[i] = vd1[l[i] + x];
+}
+
+__attribute__((noinline, noclone)) void
+f16 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vl2[i] = vl1[l[i] + x];
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+
+ for (i = 0; i < N + 16; i++)
+ {
+ asm ("");
+ vf1[i] = 17.0f + i;
+ vd1[i] = 19.0 + i;
+ vi1[i] = 21 + i;
+ vl1[i] = 23L + i;
+ }
+ for (i = 0; i < N; i++)
+ {
+ asm ("");
+ k[i] = (i * 731) & (N - 1);
+ l[i] = (i * 657) & (N - 1);
+ }
+
+ f1 ();
+ f2 ();
+ for (i = 0; i < N; i++)
+ if (vf2[i] != ((i * 731) & (N - 1)) + 17
+ || vi2[i] != ((i * 731) & (N - 1)) + 21)
+ abort ();
+
+ f3 (12);
+ f4 (14);
+ for (i = 0; i < N; i++)
+ if (vf2[i] != ((i * 731) & (N - 1)) + 17 + 12
+ || vi2[i] != ((i * 731) & (N - 1)) + 21 + 14)
+ abort ();
+
+ f5 ();
+ f6 ();
+ for (i = 0; i < N; i++)
+ if (vd2[i] != ((i * 731) & (N - 1)) + 19
+ || vl2[i] != ((i * 731) & (N - 1)) + 23)
+ abort ();
+
+ f7 (6);
+ f8 (3);
+ for (i = 0; i < N; i++)
+ if (vd2[i] != ((i * 731) & (N - 1)) + 19 + 6
+ || vl2[i] != ((i * 731) & (N - 1)) + 23 + 3)
+ abort ();
+
+ f9 ();
+ f10 ();
+ for (i = 0; i < N; i++)
+ if (vf2[i] != ((i * 657) & (N - 1)) + 17
+ || vi2[i] != ((i * 657) & (N - 1)) + 21)
+ abort ();
+
+ f11 (7);
+ f12 (9);
+ for (i = 0; i < N; i++)
+ if (vf2[i] != ((i * 657) & (N - 1)) + 17 + 7
+ || vi2[i] != ((i * 657) & (N - 1)) + 21 + 9)
+ abort ();
+
+ f13 ();
+ f14 ();
+ for (i = 0; i < N; i++)
+ if (vd2[i] != ((i * 657) & (N - 1)) + 19
+ || vl2[i] != ((i * 657) & (N - 1)) + 23)
+ abort ();
+
+ f15 (2);
+ f16 (12);
+ for (i = 0; i < N; i++)
+ if (vd2[i] != ((i * 657) & (N - 1)) + 19 + 2
+ || vl2[i] != ((i * 657) & (N - 1)) + 23 + 12)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c
new file mode 100644
index 00000000000..f5443119049
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512f -fdump-tree-vect-details" } */
+
+#include "avx512f-gather-1.c"
+
+/* { dg-final { scan-assembler-not "gather\[^\n\]*ymm\[^\n\]*ymm" } } */
+/* { dg-final { scan-assembler-not "gather\[^\n\]*xmm\[^\n\]*ymm" } } */
+/* { dg-final { scan-assembler-not "gather\[^\n\]*ymm\[^\n\]*xmm" } } */
+/* { dg-final { scan-assembler-not "gather\[^\n\]*xmm\[^\n\]*xmm" } } */
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops in function" 16 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-3.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-3.c
new file mode 100644
index 00000000000..5e20dd8898a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-3.c
@@ -0,0 +1,169 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O3 -mavx512f -ffast-math" } */
+
+#include "avx512f-check.h"
+
+#define N 1024
+float f[N];
+double d[N];
+int k[N];
+float *l[N];
+double *n[N];
+int **m[N];
+long q[N];
+long long **o[N];
+long long t[N];
+long long *r[N];
+int *s[N];
+
+__attribute__((noinline, noclone)) float
+f1 (void)
+{
+ int i;
+ float g = 0.0;
+ for (i = 0; i < N / 2; i++)
+ g += f[k[i]];
+ return g;
+}
+
+__attribute__((noinline, noclone)) float
+f2 (float *p)
+{
+ int i;
+ float g = 0.0;
+ for (i = 0; i < N / 2; i++)
+ g += p[k[i]];
+ return g;
+}
+
+__attribute__((noinline, noclone)) float
+f3 (void)
+{
+ int i;
+ float g = 0.0;
+ for (i = 0; i < N / 2; i++)
+ g += *l[i];
+ return g;
+}
+
+__attribute__((noinline, noclone)) int
+f4 (void)
+{
+ int i;
+ int g = 0;
+ for (i = 0; i < N / 2; i++)
+ g += **m[i];
+ return g;
+}
+
+__attribute__((noinline, noclone)) double
+f5 (void)
+{
+ int i;
+ double g = 0.0;
+ for (i = 0; i < N / 2; i++)
+ g += d[k[i]];
+ return g;
+}
+
+__attribute__((noinline, noclone)) double
+f6 (double *p)
+{
+ int i;
+ double g = 0.0;
+ for (i = 0; i < N / 2; i++)
+ g += p[k[i]];
+ return g;
+}
+
+__attribute__((noinline, noclone)) double
+f7 (void)
+{
+ int i;
+ double g = 0.0;
+ for (i = 0; i < N / 2; i++)
+ g += *n[i];
+ return g;
+}
+
+__attribute__((noinline, noclone)) int
+f8 (void)
+{
+ int i;
+ int g = 0;
+ for (i = 0; i < N / 2; i++)
+ g += **o[i];
+ return g;
+}
+
+__attribute__((noinline, noclone)) float
+f9 (void)
+{
+ int i;
+ float g = 0.0;
+ for (i = 0; i < N / 2; i++)
+ g += f[q[i]];
+ return g;
+}
+
+__attribute__((noinline, noclone)) float
+f10 (float *p)
+{
+ int i;
+ float g = 0.0;
+ for (i = 0; i < N / 2; i++)
+ g += p[q[i]];
+ return g;
+}
+
+__attribute__((noinline, noclone)) double
+f11 (void)
+{
+ int i;
+ double g = 0.0;
+ for (i = 0; i < N / 2; i++)
+ g += d[q[i]];
+ return g;
+}
+
+__attribute__((noinline, noclone)) double
+f12 (double *p)
+{
+ int i;
+ double g = 0.0;
+ for (i = 0; i < N / 2; i++)
+ g += p[q[i]];
+ return g;
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ asm ("");
+ f[i] = -256.0f + i;
+ d[i] = -258.0 + i;
+ k[i] = (i * 731) & (N - 1);
+ q[i] = (i * 657) & (N - 1);
+ t[i] = (i * 657) & (N - 1);
+ l[i] = &f[(i * 239) & (N - 1)];
+ n[i] = &d[(i * 271) & (N - 1)];
+ r[i] = &t[(i * 323) & (N - 1)];
+ s[i] = &k[(i * 565) & (N - 1)];
+ m[i] = &s[(i * 13) & (N - 1)];
+ o[i] = &r[(i * 19) & (N - 1)];
+ }
+
+ if (f1 () != 136448.0f || f2 (f) != 136448.0f || f3 () != 130304.0)
+ abort ();
+ if (f4 () != 261376 || f5 () != 135424.0 || f6 (d) != 135424.0)
+ abort ();
+ if (f7 () != 129280.0 || f8 () != 259840L || f9 () != 130816.0f)
+ abort ();
+ if (f10 (f) != 130816.0f || f11 () != 129792.0 || f12 (d) != 129792.0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-4.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-4.c
new file mode 100644
index 00000000000..bea8c24b8cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-4.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O3 -mavx512f" } */
+
+#include "avx512f-check.h"
+
+#define N 1024
+int a[N], b[N], c[N], d[N];
+
+__attribute__((noinline, noclone)) void
+foo (float *__restrict p, float *__restrict q, float *__restrict r,
+ int s1, int s2, int s3)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ p[i] = q[a[i] * s1 + b[i] * s2 + s3] * r[c[i] * s1 + d[i] * s2 + s3];
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ float e[N], f[N], g[N];
+ for (i = 0; i < N; i++)
+ {
+ a[i] = (i * 7) & (N / 8 - 1);
+ b[i] = (i * 13) & (N / 8 - 1);
+ c[i] = (i * 23) & (N / 8 - 1);
+ d[i] = (i * 5) & (N / 8 - 1);
+ e[i] = 16.5 + i;
+ f[i] = 127.5 - i;
+ }
+ foo (g, e, f, 3, 2, 4);
+ for (i = 0; i < N; i++)
+ if (g[i] != (float) ((20.5 + a[i] * 3 + b[i] * 2)
+ * (123.5 - c[i] * 3 - d[i] * 2)))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c
new file mode 100644
index 00000000000..d2237da1566
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512f" } */
+
+#include "avx512f-gather-4.c"
+
+/* { dg-final { scan-assembler "gather\[^\n\]*zmm" } } */
+/* { dg-final { scan-assembler-not "gather\[^\n\]*ymm\[^\n\]*ymm" } } */
+/* { dg-final { scan-assembler-not "gather\[^\n\]*xmm\[^\n\]*ymm" } } */
+/* { dg-final { scan-assembler-not "gather\[^\n\]*ymm\[^\n\]*xmm" } } */
+/* { dg-final { scan-assembler-not "gather\[^\n\]*xmm\[^\n\]*xmm" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-helper.h b/gcc/testsuite/gcc.target/i386/avx512f-helper.h
new file mode 100644
index 00000000000..61b2e90d197
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-helper.h
@@ -0,0 +1,96 @@
+/* This file is used to reduce a number of runtime tests for AVX512F
+ instructions. Idea is to create one file per instruction -
+ avx512f-insn-2.c - using defines from this file instead of intrinsic
+ name, vector length etc. Then dg-options are set with appropriate
+ -Dwhatever options in that .c file producing tests for specific
+ length. */
+
+#if defined (AVX512F)
+#include "avx512f-check.h"
+#elif defined (AVX512ER)
+#include "avx512er-check.h"
+#elif defined (AVX512CD)
+#include "avx512cd-check.h"
+#endif
+
+/* Macros expansion. */
+#define CONCAT(a,b,c) a ## b ## c
+#define EVAL(a,b,c) CONCAT(a,b,c)
+
+/* Value to be written into destination.
+ We have one value for all types so it must be small enough
+ to fit into signed char. */
+#define DEFAULT_VALUE 117
+
+#define MAKE_MASK_MERGE(NAME, TYPE) \
+static void \
+__attribute__((noinline, unused)) \
+merge_masking_##NAME (TYPE *arr, unsigned long long mask, int size) \
+{ \
+ int i; \
+ for (i = 0; i < size; i++) \
+ { \
+ arr[i] = (mask & (1LL << i)) ? arr[i] : DEFAULT_VALUE; \
+ } \
+}
+
+MAKE_MASK_MERGE(i_b, char)
+MAKE_MASK_MERGE(i_w, short)
+MAKE_MASK_MERGE(i_d, int)
+MAKE_MASK_MERGE(i_q, long long)
+MAKE_MASK_MERGE(, float)
+MAKE_MASK_MERGE(d, double)
+
+#define MASK_MERGE(TYPE) merge_masking_##TYPE
+
+#define MAKE_MASK_ZERO(NAME, TYPE) \
+static void \
+__attribute__((noinline, unused)) \
+zero_masking_##NAME (TYPE *arr, unsigned long long mask, int size) \
+{ \
+ int i; \
+ for (i = 0; i < size; i++) \
+ { \
+ arr[i] = (mask & (1LL << i)) ? arr[i] : 0; \
+ } \
+}
+
+MAKE_MASK_ZERO(i_b, char)
+MAKE_MASK_ZERO(i_w, short)
+MAKE_MASK_ZERO(i_d, int)
+MAKE_MASK_ZERO(i_q, long long)
+MAKE_MASK_ZERO(, float)
+MAKE_MASK_ZERO(d, double)
+
+#define MASK_ZERO(TYPE) zero_masking_##TYPE
+
+/* Intrinsic being tested. */
+#define INTRINSIC(NAME) EVAL(_mm, AVX512F_LEN, NAME)
+/* Unions used for testing (for example union512d, union256d etc.). */
+#define UNION_TYPE(SIZE, NAME) EVAL(union, SIZE, NAME)
+/* Corresponding union check. */
+#define UNION_CHECK(SIZE, NAME) EVAL(check_union, SIZE, NAME)
+/* Corresponding fp union check. */
+#define UNION_FP_CHECK(SIZE, NAME) EVAL(check_fp_union, SIZE, NAME)
+/* Corresponding rough union check. */
+#define UNION_ROUGH_CHECK(SIZE, NAME) \
+ EVAL(check_rough_union, SIZE, NAME)
+/* Function which tests intrinsic for given length. */
+#define TEST EVAL(test_, AVX512F_LEN,)
+/* Function which calculates result. */
+#define CALC EVAL(calc_, AVX512F_LEN,)
+
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+static void test_512 ();
+
+#if defined (AVX512F)
+void
+avx512f_test (void) { test_512 (); }
+#elif defined (AVX512CD)
+void
+avx512cd_test (void) { test_512 (); }
+#elif defined (AVX512ER)
+void
+avx512er_test (void) { test_512 (); }
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-1.c
new file mode 100644
index 00000000000..7a0ee9978fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpgatherdd\[ \\t\]+\[^\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, idx;
+volatile __mmask16 m16;
+int *base;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_i32gather_epi32 (idx, base, 8);
+ x = _mm512_mask_i32gather_epi32 (x, m16, idx, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-2.c
new file mode 100644
index 00000000000..d89ef048d82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_gatherdd (int *res, __mmask16 m16, int *idx,
+ int *src, int scale, int *r)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ {
+ if (m16 & (1 << i))
+ r[i] = *(int *) (((unsigned char *) src) + idx[i] * scale);
+ else
+ r[i] = res[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union512i_d idx, res;
+ int src[16];
+ int res_ref[16];
+ __mmask16 m16 = 0xBC5D;
+
+ for (i = 0; i < 16; i++)
+ {
+ src[i] = 1973 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (64 - (i + 1) * 4) >> 1;
+ }
+
+ res.x = _mm512_mask_i32gather_epi32 (res.x, m16, idx.x, src, SCALE);
+ compute_gatherdd (res.a, m16, idx.a, src, SCALE, res_ref);
+
+ if (check_union512i_d (res, res_ref))
+ abort ();
+
+ res.x = _mm512_i32gather_epi32 (idx.x, src, SCALE);
+ compute_gatherdd (res.a, 0xFFFF, idx.a, src, SCALE, res_ref);
+
+ if (check_union512i_d (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-1.c
new file mode 100644
index 00000000000..88b9ae62455
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vgatherdpd\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __m256i idx;
+volatile __mmask8 m8;
+double *base;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_i32gather_pd (idx, base, 8);
+ x = _mm512_mask_i32gather_pd (x, m8, idx, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-2.c
new file mode 100644
index 00000000000..3af491548ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_gatherdpd (double *res, __mmask8 m8, int *idx,
+ double *src, int scale, double *r)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ if (m8 & (1 << i))
+ r[i] = *(double *) (((unsigned char *) src) + idx[i] * scale);
+ else
+ r[i] = res[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union512d res;
+ union256i_d idx;
+ double src[8];
+ double res_ref[8];
+ __mmask8 m8 = 0xC5;
+
+ res.x = _mm512_setzero_pd();
+
+ for (i = 0; i < 8; i++)
+ {
+ src[i] = 2.718281828459045 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (64 - (i + 1) * 8) >> 1;
+ }
+
+ res.x = _mm512_mask_i32gather_pd (res.x, m8, idx.x, src, SCALE);
+ compute_gatherdpd (res.a, m8, idx.a, src, SCALE, res_ref);
+
+ if (check_union512d (res, res_ref))
+ abort ();
+
+ res.x = _mm512_i32gather_pd (idx.x, src, SCALE);
+ compute_gatherdpd (res.a, 0xFF, idx.a, src, SCALE, res_ref);
+
+ if (check_union512d (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherps512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherps512-1.c
new file mode 100644
index 00000000000..6abc2301d57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherps512-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vgatherdps\[ \\t\]+\[^\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __m512i idx;
+volatile __mmask16 m16;
+float *base;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_i32gather_ps (idx, base, 8);
+ x = _mm512_mask_i32gather_ps (x, m16, idx, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherps512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherps512-2.c
new file mode 100644
index 00000000000..691413ab2ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherps512-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_gatherdps (float *res, __mmask16 m16, int *idx,
+ float *src, int scale, float *r)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ {
+ if (m16 & (1 << i))
+ r[i] = *(float *) (((unsigned char *) src) + idx[i] * scale);
+ else
+ r[i] = res[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union512 res;
+ union512i_d idx;
+ float src[16];
+ float res_ref[16];
+ __mmask16 m16 = 0xBC5D;
+
+ res.x = _mm512_setzero_ps();
+
+ for (i = 0; i < 16; i++)
+ {
+ src[i] = 2.718281828459045 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (64 - (i + 1) * 4) >> 1;
+ }
+
+ res.x = _mm512_mask_i32gather_ps (res.x, m16, idx.x, src, SCALE);
+ compute_gatherdps (res.a, m16, idx.a, src, SCALE, res_ref);
+
+ if (check_union512 (res, res_ref))
+ abort ();
+
+ res.x = _mm512_i32gather_ps (idx.x, src, SCALE);
+ compute_gatherdps (res.a, 0xFFFF, idx.a, src, SCALE, res_ref);
+
+ if (check_union512 (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherq512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherq512-1.c
new file mode 100644
index 00000000000..ee4491eb1db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherq512-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpgatherdq\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i idx;
+volatile __mmask8 m8;
+long long *base;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_i32gather_epi64 (idx, base, 8);
+ x = _mm512_mask_i32gather_epi64 (x, m8, idx, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherq512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherq512-2.c
new file mode 100644
index 00000000000..4d472faa2ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherq512-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_gatherdq (long long *res, __mmask8 m8, int *idx,
+ long long *src, int scale, long long *r)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ if (m8 & (1 << i))
+ r[i] = *(long long *)
+ (((unsigned char *) src) + idx[i] * scale);
+ else
+ r[i] = res[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union256i_d idx;
+ union512i_q res;
+ long long src[8];
+ long long res_ref[8];
+ __mmask8 m8 = 0xC5;
+
+ for (i = 0; i < 8; i++)
+ {
+ src[i] = 1983 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (64 - (i + 1) * 8) >> 1;
+ }
+
+ res.x = _mm512_mask_i32gather_epi64 (res.x, m8, idx.x, src, SCALE);
+ compute_gatherdq (res.a, m8, idx.a, src, SCALE, res_ref);
+
+ if (check_union512i_q (res, res_ref))
+ abort ();
+
+ res.x = _mm512_i32gather_epi64 (idx.x, src, SCALE);
+ compute_gatherdq (res.a, 0xFF, idx.a, src, SCALE, res_ref);
+
+ if (check_union512i_q (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32scatterd512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterd512-1.c
new file mode 100644
index 00000000000..7a5c311661e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterd512-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpscatterdd\[ \\t\]+\[^\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m512i src, idx;
+volatile __mmask16 m16;
+int *addr;
+
+void extern
+avx512f_test (void)
+{
+ _mm512_i32scatter_epi32 (addr, idx, src, 8);
+ _mm512_mask_i32scatter_epi32 (addr, m16, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32scatterd512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterd512-2.c
new file mode 100644
index 00000000000..569690021ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterd512-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_scatterdd (__mmask16 m16, int *idx,
+ int *src, int scale, int *r)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ {
+ if (m16 & (1 << i))
+ *(int *) (((unsigned char *) r) + idx[i] * scale) = src[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union512i_d src, idx;
+ int res[16] = { 0 };
+ int res_ref[16] = { 0 };
+ __mmask16 m16 = 0xBC5D;
+
+ for (i = 0; i < 16; i++)
+ {
+ src.a[i] = 1973 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (64 - (i + 1) * 4) >> 1;
+ }
+
+ _mm512_mask_i32scatter_epi32 (res, m16, idx.x, src.x, SCALE);
+ compute_scatterdd (m16, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVi (res, res_ref, 16))
+ abort ();
+
+ _mm512_i32scatter_epi32 (res, idx.x, src.x, SCALE);
+ compute_scatterdd (0xFFFF, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVi (res, res_ref, 16))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32scatterpd512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterpd512-1.c
new file mode 100644
index 00000000000..6c5ddc0a9c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterpd512-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vscatterdpd\[ \\t\]+\[^\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m512d src;
+volatile __m256i idx;
+volatile __mmask8 m8;
+double *addr;
+
+void extern
+avx512f_test (void)
+{
+ _mm512_i32scatter_pd (addr, idx, src, 8);
+ _mm512_mask_i32scatter_pd (addr, m8, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32scatterpd512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterpd512-2.c
new file mode 100644
index 00000000000..987b3f437f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterpd512-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_scatterdpd (__mmask8 m8, int *idx, double *src,
+ int scale, double *r)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ if (m8 & (1 << i))
+ *(double *) (((unsigned char *) r) + idx[i] * scale) = src[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union512d src;
+ union256i_d idx;
+ double res[8] = { 0.0 };
+ double res_ref[8] = { 0.0 };
+ __mmask8 m8 = 0xC5;
+
+ for (i = 0; i < 8; i++)
+ {
+ src.a[i] = 2.718281828459045 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (64 - (i + 1) * 8) >> 1;
+ }
+
+ _mm512_mask_i32scatter_pd (res, m8, idx.x, src.x, SCALE);
+ compute_scatterdpd (m8, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVd (res, res_ref, 8))
+ abort ();
+
+ _mm512_i32scatter_pd (res, idx.x, src.x, SCALE);
+ compute_scatterdpd (0xFF, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVd (res, res_ref, 8))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32scatterps512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterps512-1.c
new file mode 100644
index 00000000000..c24344a28d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterps512-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vscatterdps\[ \\t\]+\[^\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m512 src;
+volatile __m512i idx;
+volatile __mmask16 m16;
+float *addr;
+
+void extern
+avx512f_test (void)
+{
+ _mm512_i32scatter_ps (addr, idx, src, 8);
+ _mm512_mask_i32scatter_ps (addr, m16, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32scatterps512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterps512-2.c
new file mode 100644
index 00000000000..8604c8d5c1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterps512-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_scatterdps (__mmask16 m16, int *idx,
+ float *src, int scale, float *r)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ {
+ if (m16 & (1 << i))
+ *(float *) (((unsigned char *) r) + idx[i] * scale) = src[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union512 src;
+ union512i_d idx;
+ float res[16] = { 0.0 };
+ float res_ref[16] = { 0.0 };
+ __mmask16 m16 = 0xBC5D;
+
+ for (i = 0; i < 16; i++)
+ {
+ src.a[i] = 2.718281828459045 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (64 - (i + 1) * 4) >> 1;
+ }
+
+ _mm512_mask_i32scatter_ps (res, m16, idx.x, src.x, SCALE);
+ compute_scatterdps (m16, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVf (res, res_ref, 16))
+ abort ();
+
+ _mm512_i32scatter_ps (res, idx.x, src.x, SCALE);
+ compute_scatterdps (0xFFFF, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVf (res, res_ref, 16))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32scatterq512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterq512-1.c
new file mode 100644
index 00000000000..5b28175465a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterq512-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpscatterdq\[ \\t\]+\[^\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m512i src;
+volatile __m256i idx;
+volatile __mmask8 m8;
+long long *addr;
+
+void extern
+avx512f_test (void)
+{
+ _mm512_i32scatter_epi64 (addr, idx, src, 8);
+ _mm512_mask_i32scatter_epi64 (addr, m8, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32scatterq512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterq512-2.c
new file mode 100644
index 00000000000..fe5c3ade1a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i32scatterq512-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_scatterdq (__mmask8 m8, int *idx, long long *src,
+ int scale, long long *r)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ if (m8 & (1 << i))
+ *(long long *) (((unsigned char *) r) + idx[i] * scale) =
+ src[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union256i_d idx;
+ union512i_q src;
+ long long res[8] = { 0 };
+ long long res_ref[8] = { 0 };
+ __mmask8 m8 = 0xC5;
+
+ for (i = 0; i < 8; i++)
+ {
+ src.a[i] = 1983 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (64 - (i + 1) * 8) >> 1;
+ }
+
+ _mm512_mask_i32scatter_epi64 (res, m8, idx.x, src.x, SCALE);
+ compute_scatterdq (m8, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVl (res, res_ref, 8))
+ abort ();
+
+ _mm512_i32scatter_epi64 (res, idx.x, src.x, SCALE);
+ compute_scatterdq (0xFF, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVl (res, res_ref, 8))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64gatherd512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherd512-1.c
new file mode 100644
index 00000000000..66dcf6f60c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherd512-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpgatherqd\[ \\t\]+\[^\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m512i idx;
+volatile __mmask8 m8;
+int *base;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_i64gather_epi32 (idx, base, 8);
+ x = _mm512_mask_i64gather_epi32 (x, m8, idx, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64gatherd512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherd512-2.c
new file mode 100644
index 00000000000..dff818db4ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherd512-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_gatherqd (int *res, __mmask8 m8, long long *idx,
+ int *src, int scale, int *r)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ if (m8 & (1 << i))
+ r[i] = *(int *) (((unsigned char *) src) + idx[i] * scale);
+ else
+ r[i] = res[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union256i_d res;
+ union512i_q idx;
+ int src[8];
+ int res_ref[8];
+ __mmask8 m8 = 0xC5;
+
+ for (i = 0; i < 8; i++)
+ {
+ src[i] = 1973 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (32 - (i + 1) * 4) >> 1;
+ }
+
+ res.x = _mm512_mask_i64gather_epi32 (res.x, m8, idx.x, src, SCALE);
+ compute_gatherqd (res.a, m8, idx.a, src, SCALE, res_ref);
+
+ if (check_union256i_d (res, res_ref))
+ abort ();
+
+ res.x = _mm512_i64gather_epi32 (idx.x, src, SCALE);
+ compute_gatherqd (res.a, 0xFF, idx.a, src, SCALE, res_ref);
+
+ if (check_union256i_d (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64gatherpd512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherpd512-1.c
new file mode 100644
index 00000000000..4a3df890497
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherpd512-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vgatherqpd\[ \\t\]+\[^\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __m512i idx;
+volatile __mmask8 m8;
+double *base;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_i64gather_pd (idx, base, 8);
+ x = _mm512_mask_i64gather_pd (x, m8, idx, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64gatherpd512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherpd512-2.c
new file mode 100644
index 00000000000..7cb6d82eb00
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherpd512-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_gatherqpd (double *res, __mmask8 m8, long long *idx,
+ double *src, int scale, double *r)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ if (m8 & (1 << i))
+ r[i] = *(double *) (((unsigned char *) src) + idx[i] * scale);
+ else
+ r[i] = res[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union512d res;
+ union512i_q idx;
+ double src[8];
+ double res_ref[8];
+ __mmask8 m8 = 0xC5;
+
+ res.x = _mm512_setzero_pd();
+
+ for (i = 0; i < 8; i++)
+ {
+ src[i] = 2.718281828459045 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (64 - (i + 1) * 8) >> 1;
+ }
+
+ res.x = _mm512_mask_i64gather_pd (res.x, m8, idx.x, src, SCALE);
+ compute_gatherqpd (res.a, m8, idx.a, src, SCALE, res_ref);
+
+ if (check_union512d (res, res_ref))
+ abort ();
+
+ res.x = _mm512_i64gather_pd (idx.x, src, SCALE);
+ compute_gatherqpd (res.a, 0xFF, idx.a, src, SCALE, res_ref);
+
+ if (check_union512d (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64gatherps512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherps512-1.c
new file mode 100644
index 00000000000..4caee0569ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherps512-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vgatherqps\[ \\t\]+\[^\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x;
+volatile __m512i idx;
+volatile __mmask8 m8;
+float *base;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_i64gather_ps (idx, base, 8);
+ x = _mm512_mask_i64gather_ps (x, m8, idx, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64gatherps512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherps512-2.c
new file mode 100644
index 00000000000..8ed0fcef409
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherps512-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_gatherqps (float *res, __mmask8 m8, long long *idx,
+ float *src, int scale, float *r)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ if (m8 & (1 << i))
+ r[i] = *(float *) (((unsigned char *) src) + idx[i] * scale);
+ else
+ r[i] = res[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union256 res;
+ union512i_q idx;
+ float src[8];
+ float res_ref[8];
+ __mmask8 m8 = 0xC5;
+
+ res.x = _mm256_setzero_ps();
+
+ for (i = 0; i < 8; i++)
+ {
+ src[i] = 2.718281828459045 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (32 - (i + 1) * 4) >> 1;
+ }
+
+ res.x = _mm512_mask_i64gather_ps (res.x, m8, idx.x, src, SCALE);
+ compute_gatherqps (res.a, m8, idx.a, src, SCALE, res_ref);
+
+ if (check_union256 (res, res_ref))
+ abort ();
+
+ res.x = _mm512_i64gather_ps (idx.x, src, SCALE);
+ compute_gatherqps (res.a, 0xFF, idx.a, src, SCALE, res_ref);
+
+ if (check_union256 (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64gatherq512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherq512-1.c
new file mode 100644
index 00000000000..20d39e74849
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherq512-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpgatherqq\[ \\t\]+\[^\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, idx;
+volatile __mmask8 m8;
+long long *base;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_i64gather_epi64 (idx, base, 8);
+ x = _mm512_mask_i64gather_epi64 (x, m8, idx, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64gatherq512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherq512-2.c
new file mode 100644
index 00000000000..134fd18b82d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64gatherq512-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_gatherqq (long long *res, __mmask8 m8, long long *idx,
+ long long *src, int scale, long long *r)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ if (m8 & (1 << i))
+ r[i] = *(long long *)
+ (((unsigned char *) src) + idx[i] * scale);
+ else
+ r[i] = res[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union512i_q idx, res;
+ long long src[8];
+ long long res_ref[8];
+ __mmask8 m8 = 0xC5;
+
+ for (i = 0; i < 8; i++)
+ {
+ src[i] = 1983 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (64 - (i + 1) * 8) >> 1;
+ }
+
+ res.x = _mm512_mask_i64gather_epi64 (res.x, m8, idx.x, src, SCALE);
+ compute_gatherqq (res.a, m8, idx.a, src, SCALE, res_ref);
+
+ if (check_union512i_q (res, res_ref))
+ abort ();
+
+ res.x = _mm512_i64gather_epi64 (idx.x, src, SCALE);
+ compute_gatherqq (res.a, 0xFF, idx.a, src, SCALE, res_ref);
+
+ if (check_union512i_q (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64scatterd512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterd512-1.c
new file mode 100644
index 00000000000..a2f5275d67e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterd512-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpscatterqd\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256i src;
+volatile __m512i idx;
+volatile __mmask8 m8;
+int *addr;
+
+void extern
+avx512f_test (void)
+{
+ _mm512_i64scatter_epi32 (addr, idx, src, 8);
+ _mm512_mask_i64scatter_epi32 (addr, m8, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64scatterd512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterd512-2.c
new file mode 100644
index 00000000000..877ef906205
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterd512-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_scatterqd (__mmask8 m8, long long *idx,
+ int *src, int scale, int *r)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ if (m8 & (1 << i))
+ *(int *) (((unsigned char *) r) + idx[i] * scale) = src[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union256i_d src;
+ union512i_q idx;
+ int res[8] = { 0 };
+ int res_ref[8] = { 0 };
+ __mmask8 m8 = 0xC5;
+
+ for (i = 0; i < 8; i++)
+ {
+ src.a[i] = 1973 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (32 - (i + 1) * 4) >> 1;
+ }
+
+ _mm512_mask_i64scatter_epi32 (res, m8, idx.x, src.x, SCALE);
+ compute_scatterqd (m8, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVi (res, res_ref, 8))
+ abort ();
+
+ _mm512_i64scatter_epi32 (res, idx.x, src.x, SCALE);
+ compute_scatterqd (0xFF, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVi (res, res_ref, 8))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64scatterpd512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterpd512-1.c
new file mode 100644
index 00000000000..288a2183b0c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterpd512-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vscatterqpd\[ \\t\]+\[^\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m512d src;
+volatile __m512i idx;
+volatile __mmask8 m8;
+double *addr;
+
+void extern
+avx512f_test (void)
+{
+ _mm512_i64scatter_pd (addr, idx, src, 8);
+ _mm512_mask_i64scatter_pd (addr, m8, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64scatterpd512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterpd512-2.c
new file mode 100644
index 00000000000..2ded7bc7628
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterpd512-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_scatterqpd (__mmask8 m8, long long *idx, double *src,
+ int scale, double *r)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ if (m8 & (1 << i))
+ *(double *) (((unsigned char *) r) + idx[i] * scale) = src[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union512d src;
+ union512i_q idx;
+ double res[8] = { 0.0 };
+ double res_ref[8] = { 0.0 };
+ __mmask8 m8 = 0xC5;
+
+ for (i = 0; i < 8; i++)
+ {
+ src.a[i] = 2.718281828459045 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (64 - (i + 1) * 8) >> 1;
+ }
+
+ _mm512_mask_i64scatter_pd (res, m8, idx.x, src.x, SCALE);
+ compute_scatterqpd (m8, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVd (res, res_ref, 8))
+ abort ();
+
+ _mm512_i64scatter_pd (res, idx.x, src.x, SCALE);
+ compute_scatterqpd (0xFF, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVd (res, res_ref, 8))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64scatterps512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterps512-1.c
new file mode 100644
index 00000000000..6a0b05d7997
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterps512-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vscatterqps\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256 src;
+volatile __m512i idx;
+volatile __mmask8 m8;
+float *addr;
+
+void extern
+avx512f_test (void)
+{
+ _mm512_i64scatter_ps (addr, idx, src, 8);
+ _mm512_mask_i64scatter_ps (addr, m8, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64scatterps512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterps512-2.c
new file mode 100644
index 00000000000..4a74d4667ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterps512-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_scatterqps (__mmask8 m8, long long *idx,
+ float *src, int scale, float *r)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ if (m8 & (1 << i))
+ *(float *) (((unsigned char *) r) + idx[i] * scale) = src[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union256 src;
+ union512i_q idx;
+ float res[8] = { 0.0 };
+ float res_ref[8] = { 0.0 };
+ __mmask8 m8 = 0xC5;
+
+ for (i = 0; i < 8; i++)
+ {
+ src.a[i] = 2.718281828459045 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (32 - (i + 1) * 4) >> 1;
+ }
+
+ _mm512_mask_i64scatter_ps (res, m8, idx.x, src.x, SCALE);
+ compute_scatterqps (m8, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVf (res, res_ref, 8))
+ abort ();
+
+ _mm512_i64scatter_ps (res, idx.x, src.x, SCALE);
+ compute_scatterqps (0xFF, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVf (res, res_ref, 8))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64scatterq512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterq512-1.c
new file mode 100644
index 00000000000..10a7a4be6f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterq512-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpscatterqq\[ \\t\]+\[^\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m512i src, idx;
+volatile __mmask8 m8;
+long long *addr;
+
+void extern
+avx512f_test (void)
+{
+ _mm512_i64scatter_epi64 (addr, idx, src, 8);
+ _mm512_mask_i64scatter_epi64 (addr, m8, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i64scatterq512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterq512-2.c
new file mode 100644
index 00000000000..975973f34f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-i64scatterq512-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define SCALE 2
+
+static void
+compute_scatterqq (__mmask8 m8, long long *idx, long long *src,
+ int scale, long long *r)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ if (m8 & (1 << i))
+ *(long long *) (((unsigned char *) r) + idx[i] * scale) =
+ src[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union512i_q src, idx;
+ long long res[8] = { 0 };
+ long long res_ref[8] = { 0 };
+ __mmask8 m8 = 0xC5;
+
+ for (i = 0; i < 8; i++)
+ {
+ src.a[i] = 1983 * (i + 1) * (i + 2);
+
+ /* About to gather in reverse order,
+ divide by 2 to demonstrate scale */
+ idx.a[i] = (64 - (i + 1) * 8) >> 1;
+ }
+
+ _mm512_mask_i64scatter_epi64 (res, m8, idx.x, src.x, SCALE);
+ compute_scatterqq (m8, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVl (res, res_ref, 8))
+ abort ();
+
+ _mm512_i64scatter_epi64 (res, idx.x, src.x, SCALE);
+ compute_scatterqq (0xFF, idx.a, src.a, SCALE, res_ref);
+
+ if (checkVl (res, res_ref, 8))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-inline-asm.c b/gcc/testsuite/gcc.target/i386/avx512f-inline-asm.c
new file mode 100644
index 00000000000..4e675e09618
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-inline-asm.c
@@ -0,0 +1,68 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static void
+init_vpadd_mask (int* dst, int *src1, int *src2, int seed)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ {
+ dst[i] = -1;
+ src1[i] = seed * 2 * i + 1;
+ src2[i] = seed * 2 * i;
+ }
+}
+
+static inline void
+calc_vpadd_mask_zeroed (int *dst, __mmask16 m, int *src1, int *src2)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ {
+ if (m & (1 << i))
+ dst[i] = src1[i] + src2[i];
+ else
+ dst[i] = 0;
+ }
+}
+
+void static
+avx512f_test (void)
+{
+ /* Checking mask arithmetic instruction */
+
+ __mmask16 msk_dst, msk_src1, msk_src2, msk_dst_ref;
+
+ msk_src1 = 0x0FFB;
+ msk_src2 = 0x0F0F;
+
+ asm ("kandw\t%2, %1, %0"
+ : "=Yk" (msk_dst)
+ : "Yk" (msk_src1), "Yk" (msk_src2));
+
+ msk_dst_ref = _mm512_kand (msk_src1, msk_src2);
+ if (msk_dst != msk_dst_ref)
+ abort ();
+
+
+ /* Checking zero-masked vector instruction */
+ union512i_d dst, src1, src2;
+ int dst_ref[16];
+
+ init_vpadd_mask (dst.a, src1.a, src2.a, 1);
+ init_vpadd_mask (dst_ref, src1.a, src2.a, 1);
+
+ asm ("vpaddd\t%2, %1, %0 %{%3%}%{z%}"
+ : "=x" (dst.x)
+ : "x" (src1.x), "x" (src2.x), "k" (msk_dst));
+
+ calc_vpadd_mask_zeroed (dst_ref, msk_dst, src1.a, src2.a);
+
+ if (check_union512i_d (dst, dst_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kandnw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-kandnw-1.c
new file mode 100644
index 00000000000..3d777c83015
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kandnw-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kandnw\[ \\t\]+\[^\n\]*%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test ()
+{
+ __mmask16 k1, k2, k3;
+ volatile __m512 x;
+
+ __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (1) );
+ __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (2) );
+
+ k3 = _mm512_kandn (k1, k2);
+ x = _mm512_mask_add_ps (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kandw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-kandw-1.c
new file mode 100644
index 00000000000..19a3cf4dbc3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kandw-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kandw\[ \\t\]+\[^\n\]*%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test ()
+{
+ __mmask16 k1, k2, k3;
+ volatile __m512 x;
+
+ __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (1) );
+ __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (2) );
+
+ k3 = _mm512_kand (k1, k2);
+ x = _mm512_mask_add_ps (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-klogic-2.c b/gcc/testsuite/gcc.target/i386/avx512f-klogic-2.c
new file mode 100644
index 00000000000..df7fc9b7b7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-klogic-2.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void
+avx512f_test (void)
+{
+ __mmask16 dst, src1, src2, dst_ref;
+ volatile __m512 x;
+
+ __asm__( "kmovw %1, %0" : "=k" (src1) : "r" (0x0FFF) );
+ __asm__( "kmovw %1, %0" : "=k" (src2) : "r" (0x0F0F) );
+
+ dst = _mm512_kand (src1, src2);
+ x = _mm512_mask_add_ps (x, dst, x, x);
+ dst_ref = src1 & src2;
+ if (dst != dst_ref)
+ abort ();
+
+ dst = _mm512_kandn (src1, src2);
+ x = _mm512_mask_add_ps (x, dst, x, x);
+ dst_ref = ~src1 & src2;
+ if (dst != dst_ref)
+ abort ();
+
+ dst = _mm512_kor (src1, src2);
+ x = _mm512_mask_add_ps (x, dst, x, x);
+ dst_ref = src1 | src2;
+ if (dst != dst_ref)
+ abort ();
+
+ dst = _mm512_kxnor (src1, src2);
+ x = _mm512_mask_add_ps (x, dst, x, x);
+ dst_ref = ~(src1 ^ src2);
+ if (dst != dst_ref)
+ abort ();
+
+ dst = _mm512_kxor (src1, src2);
+ x = _mm512_mask_add_ps (x, dst, x, x);
+ dst_ref = src1 ^ src2;
+ if (dst != dst_ref)
+ abort ();
+
+ dst = _mm512_knot (src1);
+ x = _mm512_mask_add_ps (x, dst, x, x);
+ dst_ref = ~src1;
+ if (dst != dst_ref)
+ abort ();
+
+ dst = _mm512_kunpackb (src1, src2);
+ x = _mm512_mask_add_ps (x, dst, x, x);
+ dst_ref = 0xFF0F;
+
+ if (dst != dst_ref)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-knotw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-knotw-1.c
new file mode 100644
index 00000000000..a8f8f10b6be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-knotw-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "knotw\[ \\t\]+\[^\n\]*%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test ()
+{
+ __mmask16 k1, k2;
+ volatile __m512 x;
+
+ __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (45) );
+
+ k2 = _mm512_knot (k1);
+
+ x = _mm512_mask_add_ps (x, k1, x, x);
+ x = _mm512_mask_add_ps (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kortestw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-kortestw-1.c
new file mode 100644
index 00000000000..a3cdd4a1ab7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kortestw-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -mavx512f" } */
+/* { dg-final { scan-assembler-times "kortestw\[ \\t\]+\[^\n\]*%k\[0-7\]" 4 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test () {
+ volatile __mmask16 k1;
+ __mmask16 k2;
+ volatile __mmask8 k3;
+ __mmask8 k4;
+
+ volatile short r;
+
+ /* Check that appropriate insn sequence is generated at -O0. */
+ r = _mm512_kortestc (k1, k2);
+ r = _mm512_kortestz (k1, k2);
+
+ r = _mm512_kortestc (k3, k4);
+ r = _mm512_kortestz (k3, k4);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kortestw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-kortestw-2.c
new file mode 100644
index 00000000000..4b9cadcc2d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kortestw-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void
+avx512f_test () {
+ volatile __mmask16 k1;
+ __mmask16 k2;
+ volatile short r = 0;
+
+ /* Test kortestc. */
+ __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (0) );
+ __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (45) );
+
+ r += _mm512_kortestc (k1, k2);
+
+ __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (0) );
+ __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (0) );
+
+ r += _mm512_kortestc (k1, k2);
+ if (r)
+ abort ();
+
+ __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (-1) );
+ __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (0) );
+
+ r += _mm512_kortestc (k1, k2);
+ if (!r)
+ abort ();
+
+ r = 0;
+ /* Test kortestz. */
+ __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (0) );
+ __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (45) );
+
+ r += _mm512_kortestz (k1, k2);
+
+ __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (-1) );
+ __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (0) );
+
+ r += _mm512_kortestz (k1, k2);
+ if (r)
+ abort ();
+
+ __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (0) );
+ __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (0) );
+
+ r += _mm512_kortestz (k1, k2);
+ if (!r)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-korw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-korw-1.c
new file mode 100644
index 00000000000..96f837b96b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-korw-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "korw\[ \\t\]+\[^\n\]*%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test ()
+{
+ __mmask16 k1, k2, k3;
+ volatile __m512 x;
+
+ __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (1) );
+ __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (2) );
+
+ k3 = _mm512_kor (k1, k2);
+ x = _mm512_mask_add_ps (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-1.c
new file mode 100644
index 00000000000..bc55f8b301c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kunpckbw\[ \\t\]+\[^\n\]*%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test () {
+ __mmask16 k1, k2, k3;
+ volatile __m512 x;
+
+ __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (1) );
+ __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (2) );
+
+ k3 = _mm512_kunpackb (k1, k2);
+ x = _mm512_mask_add_ps (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kxnorw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-kxnorw-1.c
new file mode 100644
index 00000000000..8b12b2ac896
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kxnorw-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kxnorw\[ \\t\]+\[^\n\]*%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test ()
+{
+ __mmask16 k1, k2, k3;
+ volatile __m512 x;
+
+ __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (1) );
+ __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (2) );
+
+ k3 = _mm512_kxnor (k1, k2);
+ x = _mm512_mask_add_ps (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kxorw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-kxorw-1.c
new file mode 100644
index 00000000000..7ae1bc46204
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kxorw-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kxorw\[ \\t\]+\[^\n\]*%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test ()
+{
+ __mmask16 k1, k2, k3;
+ volatile __m512 x;
+
+ __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (1) );
+ __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (2) );
+
+ k3 = _mm512_kxor (k1, k2);
+ x = _mm512_mask_add_ps (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-mask-type.h b/gcc/testsuite/gcc.target/i386/avx512f-mask-type.h
new file mode 100644
index 00000000000..53c439e24d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-mask-type.h
@@ -0,0 +1,8 @@
+/* Type of mask. */
+#if SIZE <= 8
+#define MASK_TYPE __mmask8
+#define MASK_VALUE 0xB9
+#elif SIZE <= 16
+#define MASK_TYPE __mmask16
+#define MASK_VALUE 0xA6BA
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-os-support.h b/gcc/testsuite/gcc.target/i386/avx512f-os-support.h
new file mode 100644
index 00000000000..deefa5e1105
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-os-support.h
@@ -0,0 +1,10 @@
+/* Check if the OS supports executing AVX512F instructions. */
+
+static int
+avx512f_os_support (void)
+{
+ unsigned int eax, edx;
+
+ __asm__ ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
+ return (eax & 230) == 230;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-rounding.c b/gcc/testsuite/gcc.target/i386/avx512f-rounding.c
new file mode 100644
index 00000000000..254e3a418f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-rounding.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -mavx512f" } */
+
+#include <x86intrin.h>
+
+int
+test_rounding (__m128d x, int r)
+{
+ return _mm_cvt_roundsd_i32 (x, r); /* { dg-error "incorrect rounding operand." } */
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-1.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-1.c
new file mode 100644
index 00000000000..0ae82bc4138
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-1.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512
+__attribute__ ((noinline))
+foo (float *v)
+{
+ return _mm512_set_ps (v[15], v[14], v[13], v[12],
+ v[11], v[10], v[9], v[8],
+ v[7], v[6], v[5], v[4],
+ v[3], v[2], v[1], v[0]);
+}
+
+static __m512
+__attribute__ ((noinline))
+foo_r (float *v)
+{
+ return _mm512_setr_ps (v[0], v[1], v[2], v[3],
+ v[4], v[5], v[6], v[7],
+ v[8], v[9], v[10], v[11],
+ v[12], v[13], v[14], v[15]);
+}
+
+static void
+avx512f_test (void)
+{
+ float v[16] = { -3.3, 2.6, 1.48, 9.104, -23.9, 17, -13.48, 4,
+ 69.78, 0.33, 81, 0.4, -8.9, -173.37, 0.8, 68 };
+ union512 res;
+
+ res.x = foo (v);
+
+ if (check_union512 (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_ps ();
+
+ res.x = foo_r (v);
+
+ if (check_union512 (res, v))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-2.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-2.c
new file mode 100644
index 00000000000..1884c2f334f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512
+__attribute__ ((noinline))
+foo (float x1, float x2, float x3, float x4,
+ float x5, float x6, float x7, float x8,
+ float x9, float x10, float x11, float x12,
+ float x13, float x14, float x15, float x16)
+{
+ return _mm512_set_ps (x1, x2, x3, x4, x5, x6, x7, x8,
+ x9, x10, x11, x12, x13, x14, x15, x16);
+}
+
+static __m512
+__attribute__ ((noinline))
+foo_r (float x1, float x2, float x3, float x4,
+ float x5, float x6, float x7, float x8,
+ float x9, float x10, float x11, float x12,
+ float x13, float x14, float x15, float x16)
+{
+ return _mm512_setr_ps (x16, x15, x14, x13, x12, x11, x10, x9,
+ x8, x7, x6, x5, x4, x3, x2, x1);
+}
+
+static void
+avx512f_test (void)
+{
+ float v[16] = { -3.3, 2.6, 1.48, 9.104, -23.9, 17, -13.48, 4,
+ 69.78, 0.33, 81, 0.4, -8.9, -173.37, 0.8, 68 };
+ union512 res;
+
+ res.x = foo (v[15], v[14], v[13], v[12], v[11], v[10], v[9], v[8],
+ v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
+
+ if (check_union512 (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_ps ();
+
+ res.x = foo_r (v[15], v[14], v[13], v[12], v[11], v[10], v[9], v[8],
+ v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
+
+ if (check_union512 (res, v))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-3.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-3.c
new file mode 100644
index 00000000000..7ec166a5886
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-3.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512
+__attribute__ ((noinline))
+foo (float x)
+{
+ return _mm512_set_ps (x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x);
+}
+
+static __m512
+__attribute__ ((noinline))
+foo_r (float x)
+{
+ return _mm512_setr_ps (x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x);
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ float e = 34.5;
+ float v[16];
+ union512 res;
+
+ for (i = 0; i < 16; i++)
+ v[i] = e;
+
+ res.x = foo (e);
+
+ if (check_union512 (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_ps ();
+
+ res.x = foo_r (e);
+
+ if (check_union512 (res, v))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-4.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-4.c
new file mode 100644
index 00000000000..cd37e006450
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-4.c
@@ -0,0 +1,119 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512
+__attribute__ ((noinline))
+foo (float x, int i)
+{
+ switch (i)
+ {
+ case 15:
+ return _mm512_set_ps (x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 14:
+ return _mm512_set_ps (0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 13:
+ return _mm512_set_ps (0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 12:
+ return _mm512_set_ps (0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 11:
+ return _mm512_set_ps (0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 10:
+ return _mm512_set_ps (0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 9:
+ return _mm512_set_ps (0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 8:
+ return _mm512_set_ps (0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 7:
+ return _mm512_set_ps (0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0);
+ case 6:
+ return _mm512_set_ps (0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0);
+ case 5:
+ return _mm512_set_ps (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0);
+ case 4:
+ return _mm512_set_ps (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0);
+ case 3:
+ return _mm512_set_ps (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0);
+ case 2:
+ return _mm512_set_ps (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0);
+ case 1:
+ return _mm512_set_ps (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0);
+ case 0:
+ return _mm512_set_ps (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x);
+ default:
+ abort ();
+ }
+}
+
+static __m512
+__attribute__ ((noinline))
+foo_r (float x, int i)
+{
+ switch (i)
+ {
+ case 0:
+ return _mm512_setr_ps (x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 1:
+ return _mm512_setr_ps (0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 2:
+ return _mm512_setr_ps (0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 3:
+ return _mm512_setr_ps (0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 4:
+ return _mm512_setr_ps (0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 5:
+ return _mm512_setr_ps (0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 6:
+ return _mm512_setr_ps (0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 7:
+ return _mm512_setr_ps (0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 8:
+ return _mm512_setr_ps (0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0);
+ case 9:
+ return _mm512_setr_ps (0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0);
+ case 10:
+ return _mm512_setr_ps (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0);
+ case 11:
+ return _mm512_setr_ps (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0);
+ case 12:
+ return _mm512_setr_ps (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0);
+ case 13:
+ return _mm512_setr_ps (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0);
+ case 14:
+ return _mm512_setr_ps (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0);
+ case 15:
+ return _mm512_setr_ps (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x);
+ default:
+ abort ();
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ float e = -3.234;
+ float v[16];
+ union512 res;
+ int i, j;
+
+ for (i = 0; i < 16; i++)
+ {
+ for (j = 0; j < 16; j++)
+ v[j] = 0;
+ v[i] = e;
+
+ res.x = foo (e, i);
+
+ if (check_union512 (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_ps ();
+
+ res.x = foo_r (e, i);
+
+ if (check_union512 (res, v))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-5.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-5.c
new file mode 100644
index 00000000000..dec7fd40a7e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v16sf-5.c
@@ -0,0 +1,119 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512
+__attribute__ ((noinline))
+foo (float x, int i)
+{
+ switch (i)
+ {
+ case 15:
+ return _mm512_set_ps (x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 14:
+ return _mm512_set_ps (1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 13:
+ return _mm512_set_ps (1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 12:
+ return _mm512_set_ps (1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 11:
+ return _mm512_set_ps (1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 10:
+ return _mm512_set_ps (1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 9:
+ return _mm512_set_ps (1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 8:
+ return _mm512_set_ps (1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 7:
+ return _mm512_set_ps (1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1);
+ case 6:
+ return _mm512_set_ps (1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1);
+ case 5:
+ return _mm512_set_ps (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1);
+ case 4:
+ return _mm512_set_ps (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1);
+ case 3:
+ return _mm512_set_ps (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1);
+ case 2:
+ return _mm512_set_ps (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1);
+ case 1:
+ return _mm512_set_ps (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1);
+ case 0:
+ return _mm512_set_ps (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x);
+ default:
+ abort ();
+ }
+}
+
+static __m512
+__attribute__ ((noinline))
+foo_r (float x, int i)
+{
+ switch (i)
+ {
+ case 0:
+ return _mm512_setr_ps (x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 1:
+ return _mm512_setr_ps (1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 2:
+ return _mm512_setr_ps (1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 3:
+ return _mm512_setr_ps (1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 4:
+ return _mm512_setr_ps (1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 5:
+ return _mm512_setr_ps (1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 6:
+ return _mm512_setr_ps (1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 7:
+ return _mm512_setr_ps (1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 8:
+ return _mm512_setr_ps (1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1);
+ case 9:
+ return _mm512_setr_ps (1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1);
+ case 10:
+ return _mm512_setr_ps (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1);
+ case 11:
+ return _mm512_setr_ps (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1);
+ case 12:
+ return _mm512_setr_ps (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1);
+ case 13:
+ return _mm512_setr_ps (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1);
+ case 14:
+ return _mm512_setr_ps (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1);
+ case 15:
+ return _mm512_setr_ps (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x);
+ default:
+ abort ();
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ float e = -3.234;
+ float v[16];
+ union512 res;
+ int i, j;
+
+ for (i = 0; i < 16; i++)
+ {
+ for (j = 0; j < 16; j++)
+ v[j] = 1;
+ v[i] = e;
+
+ res.x = foo (e, i);
+
+ if (check_union512 (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_ps ();
+
+ res.x = foo_r (e, i);
+
+ if (check_union512 (res, v))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v16si-1.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v16si-1.c
new file mode 100644
index 00000000000..ebd0486999f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v16si-1.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512i
+__attribute__ ((noinline))
+foo (int *v)
+{
+ return _mm512_set_epi32 (v[15], v[14], v[13], v[12],
+ v[11], v[10], v[9], v[8],
+ v[7], v[6], v[5], v[4],
+ v[3], v[2], v[1], v[0]);
+}
+
+static __m512i
+__attribute__ ((noinline))
+foo_r (int *v)
+{
+ return _mm512_setr_epi32 (v[0], v[1], v[2], v[3],
+ v[4], v[5], v[6], v[7],
+ v[8], v[9], v[10], v[11],
+ v[12], v[13], v[14], v[15]);
+}
+
+static void
+avx512f_test (void)
+{
+ int v[16] = { 19832468, 2134, 6576856, 6678,
+ 8723467, 54646, 234566, 12314,
+ 786784, 77575, 645245, 234555,
+ 9487733, 411244, 12344, 86533 };
+ union512i_d res;
+
+ res.x = foo (v);
+
+ if (check_union512i_d (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_si512 ();
+
+ res.x = foo_r (v);
+
+ if (check_union512i_d (res, v))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v16si-2.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v16si-2.c
new file mode 100644
index 00000000000..3090a2de66c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v16si-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512i
+__attribute__ ((noinline))
+foo (int x1, int x2, int x3, int x4,
+ int x5, int x6, int x7, int x8,
+ int x9, int x10, int x11, int x12,
+ int x13, int x14, int x15, int x16)
+{
+ return _mm512_set_epi32 (x1, x2, x3, x4, x5, x6, x7, x8,
+ x9, x10, x11, x12, x13, x14, x15, x16);
+}
+
+static __m512i
+__attribute__ ((noinline))
+foo_r (int x1, int x2, int x3, int x4,
+ int x5, int x6, int x7, int x8,
+ int x9, int x10, int x11, int x12,
+ int x13, int x14, int x15, int x16)
+{
+ return _mm512_setr_epi32 (x16, x15, x14, x13, x12, x11, x10, x9,
+ x8, x7, x6, x5, x4, x3, x2, x1);
+}
+
+static void
+avx512f_test (void)
+{
+ int v[16] = { -3, -453, 2, -231, 1, -111, 9, -145,
+ 23, 671, -173, 166, -13, 714, 69, 123 };
+ union512i_d res;
+
+ res.x = foo (v[15], v[14], v[13], v[12], v[11], v[10], v[9], v[8],
+ v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
+
+ if (check_union512i_d (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_si512 ();
+
+ res.x = foo_r (v[15], v[14], v[13], v[12], v[11], v[10], v[9], v[8],
+ v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
+
+ if (check_union512i_d (res, v))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v16si-3.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v16si-3.c
new file mode 100644
index 00000000000..c02838ec349
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v16si-3.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512i
+__attribute__ ((noinline))
+foo (int x)
+{
+ return _mm512_set_epi32 (x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x);
+}
+
+static __m512i
+__attribute__ ((noinline))
+foo_r (int x)
+{
+ return _mm512_setr_epi32 (x, x, x, x, x, x, x, x,
+ x, x, x, x, x, x, x, x);
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ int e = 0xabadbeef;
+ int v[16];
+ union512i_d res;
+
+ for (i = 0; i < 16; i++)
+ v[i] = e;
+
+ res.x = foo (e);
+
+ if (check_union512i_d (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_si512 ();
+
+ res.x = foo_r (e);
+
+ if (check_union512i_d (res, v))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v16si-4.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v16si-4.c
new file mode 100644
index 00000000000..a16f6f06852
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v16si-4.c
@@ -0,0 +1,119 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512i
+__attribute__ ((noinline))
+foo (int x, int i)
+{
+ switch (i)
+ {
+ case 15:
+ return _mm512_set_epi32 (x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 14:
+ return _mm512_set_epi32 (0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 13:
+ return _mm512_set_epi32 (0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 12:
+ return _mm512_set_epi32 (0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 11:
+ return _mm512_set_epi32 (0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 10:
+ return _mm512_set_epi32 (0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 9:
+ return _mm512_set_epi32 (0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 8:
+ return _mm512_set_epi32 (0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 7:
+ return _mm512_set_epi32 (0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0);
+ case 6:
+ return _mm512_set_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0);
+ case 5:
+ return _mm512_set_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0);
+ case 4:
+ return _mm512_set_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0);
+ case 3:
+ return _mm512_set_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0);
+ case 2:
+ return _mm512_set_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0);
+ case 1:
+ return _mm512_set_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0);
+ case 0:
+ return _mm512_set_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x);
+ default:
+ abort ();
+ }
+}
+
+static __m512i
+__attribute__ ((noinline))
+foo_r (int x, int i)
+{
+ switch (i)
+ {
+ case 0:
+ return _mm512_setr_epi32 (x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 1:
+ return _mm512_setr_epi32 (0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 2:
+ return _mm512_setr_epi32 (0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 3:
+ return _mm512_setr_epi32 (0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 4:
+ return _mm512_setr_epi32 (0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 5:
+ return _mm512_setr_epi32 (0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 6:
+ return _mm512_setr_epi32 (0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 7:
+ return _mm512_setr_epi32 (0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0, 0);
+ case 8:
+ return _mm512_setr_epi32 (0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0, 0);
+ case 9:
+ return _mm512_setr_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0, 0);
+ case 10:
+ return _mm512_setr_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0, 0);
+ case 11:
+ return _mm512_setr_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0, 0);
+ case 12:
+ return _mm512_setr_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0, 0);
+ case 13:
+ return _mm512_setr_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 0);
+ case 14:
+ return _mm512_setr_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x, 0);
+ case 15:
+ return _mm512_setr_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, x);
+ default:
+ abort ();
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int e = 0xabadbeef;
+ int v[16];
+ union512i_d res;
+ int i, j;
+
+ for (i = 0; i < 16; i++)
+ {
+ for (j = 0; j < 16; j++)
+ v[j] = 0;
+ v[i] = e;
+
+ res.x = foo (e, i);
+
+ if (check_union512i_d (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_si512 ();
+
+ res.x = foo_r (e, i);
+
+ if (check_union512i_d (res, v))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v16si-5.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v16si-5.c
new file mode 100644
index 00000000000..948d4ed42f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v16si-5.c
@@ -0,0 +1,119 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512i
+__attribute__ ((noinline))
+foo (int x, int i)
+{
+ switch (i)
+ {
+ case 15:
+ return _mm512_set_epi32 (x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 14:
+ return _mm512_set_epi32 (1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 13:
+ return _mm512_set_epi32 (1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 12:
+ return _mm512_set_epi32 (1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 11:
+ return _mm512_set_epi32 (1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 10:
+ return _mm512_set_epi32 (1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 9:
+ return _mm512_set_epi32 (1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 8:
+ return _mm512_set_epi32 (1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 7:
+ return _mm512_set_epi32 (1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1);
+ case 6:
+ return _mm512_set_epi32 (1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1);
+ case 5:
+ return _mm512_set_epi32 (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1);
+ case 4:
+ return _mm512_set_epi32 (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1);
+ case 3:
+ return _mm512_set_epi32 (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1);
+ case 2:
+ return _mm512_set_epi32 (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1);
+ case 1:
+ return _mm512_set_epi32 (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1);
+ case 0:
+ return _mm512_set_epi32 (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x);
+ default:
+ abort ();
+ }
+}
+
+static __m512i
+__attribute__ ((noinline))
+foo_r (int x, int i)
+{
+ switch (i)
+ {
+ case 0:
+ return _mm512_setr_epi32 (x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 1:
+ return _mm512_setr_epi32 (1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 2:
+ return _mm512_setr_epi32 (1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 3:
+ return _mm512_setr_epi32 (1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 4:
+ return _mm512_setr_epi32 (1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 5:
+ return _mm512_setr_epi32 (1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 6:
+ return _mm512_setr_epi32 (1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 7:
+ return _mm512_setr_epi32 (1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1, 1);
+ case 8:
+ return _mm512_setr_epi32 (1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1, 1);
+ case 9:
+ return _mm512_setr_epi32 (1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, 1);
+ case 10:
+ return _mm512_setr_epi32 (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1);
+ case 11:
+ return _mm512_setr_epi32 (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1, 1);
+ case 12:
+ return _mm512_setr_epi32 (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1, 1);
+ case 13:
+ return _mm512_setr_epi32 (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1, 1);
+ case 14:
+ return _mm512_setr_epi32 (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x, 1);
+ case 15:
+ return _mm512_setr_epi32 (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, x);
+ default:
+ abort ();
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int e = 0xabadbeef;
+ int v[16];
+ union512i_d res;
+ int i, j;
+
+ for (i = 0; i < 16; i++)
+ {
+ for (j = 0; j < 16; j++)
+ v[j] = 1;
+ v[i] = e;
+
+ res.x = foo (e, i);
+
+ if (check_union512i_d (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_si512 ();
+
+ res.x = foo_r (e, i);
+
+ if (check_union512i_d (res, v))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v8df-1.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v8df-1.c
new file mode 100644
index 00000000000..a3514ef7271
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v8df-1.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512d
+__attribute__ ((noinline))
+foo (double *v)
+{
+ return _mm512_set_pd (v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
+}
+
+static __m512d
+__attribute__ ((noinline))
+foo_r (double *v)
+{
+ return _mm512_setr_pd (v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
+}
+
+static void
+avx512f_test (void)
+{
+ double v[8] = { -3.3, 2.6, 1.48, 9.104, -23.9, -173.37, -13.48, 69.78 };
+ union512d res;
+
+ res.x = foo (v);
+
+ if (check_union512d (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_pd ();
+
+ res.x = foo_r (v);
+
+ if (check_union512d (res, v))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v8df-2.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v8df-2.c
new file mode 100644
index 00000000000..a412de58207
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v8df-2.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512d
+__attribute__ ((noinline))
+foo (double x1, double x2, double x3, double x4,
+ double x5, double x6, double x7, double x8)
+{
+ return _mm512_set_pd (x1, x2, x3, x4, x5, x6, x7, x8);
+}
+
+static __m512d
+__attribute__ ((noinline))
+foo_r (double x1, double x2, double x3, double x4,
+ double x5, double x6, double x7, double x8)
+{
+ return _mm512_setr_pd (x8, x7, x6, x5, x4, x3, x2, x1);
+}
+
+static void
+avx512f_test (void)
+{
+ double v[8] = { -3.3, 2.6, 1.48, 9.104, -23.9, -173.37, -13.48, 69.78 };
+ union512d res;
+
+ res.x = foo (v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
+
+ if (check_union512d (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_pd ();
+
+ res.x = foo_r (v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
+
+ if (check_union512d (res, v))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v8df-3.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v8df-3.c
new file mode 100644
index 00000000000..751af670378
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v8df-3.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512d
+__attribute__ ((noinline))
+foo (double x)
+{
+ return _mm512_set_pd (x, x, x, x, x, x, x, x);
+}
+
+static __m512d
+__attribute__ ((noinline))
+foo_r (double x)
+{
+ return _mm512_setr_pd (x, x, x, x, x, x, x, x);
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ double e = 34.5;
+ double v[8];
+ union512d res;
+
+ for (i = 0; i < 8; i++)
+ v[i] = e;
+
+ res.x = foo (e);
+
+ if (check_union512d (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_pd ();
+
+ res.x = foo_r (e);
+
+ if (check_union512d (res, v))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v8df-4.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v8df-4.c
new file mode 100644
index 00000000000..f62bb5fa065
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v8df-4.c
@@ -0,0 +1,87 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512d
+__attribute__ ((noinline))
+foo (double x, int i)
+{
+ switch (i)
+ {
+ case 7:
+ return _mm512_set_pd (x, 0, 0, 0, 0, 0, 0, 0);
+ case 6:
+ return _mm512_set_pd (0, x, 0, 0, 0, 0, 0, 0);
+ case 5:
+ return _mm512_set_pd (0, 0, x, 0, 0, 0, 0, 0);
+ case 4:
+ return _mm512_set_pd (0, 0, 0, x, 0, 0, 0, 0);
+ case 3:
+ return _mm512_set_pd (0, 0, 0, 0, x, 0, 0, 0);
+ case 2:
+ return _mm512_set_pd (0, 0, 0, 0, 0, x, 0, 0);
+ case 1:
+ return _mm512_set_pd (0, 0, 0, 0, 0, 0, x, 0);
+ case 0:
+ return _mm512_set_pd (0, 0, 0, 0, 0, 0, 0, x);
+ default:
+ abort ();
+ }
+}
+
+static __m512d
+__attribute__ ((noinline))
+foo_r (double x, int i)
+{
+ switch (i)
+ {
+ case 0:
+ return _mm512_setr_pd (x, 0, 0, 0, 0, 0, 0, 0);
+ case 1:
+ return _mm512_setr_pd (0, x, 0, 0, 0, 0, 0, 0);
+ case 2:
+ return _mm512_setr_pd (0, 0, x, 0, 0, 0, 0, 0);
+ case 3:
+ return _mm512_setr_pd (0, 0, 0, x, 0, 0, 0, 0);
+ case 4:
+ return _mm512_setr_pd (0, 0, 0, 0, x, 0, 0, 0);
+ case 5:
+ return _mm512_setr_pd (0, 0, 0, 0, 0, x, 0, 0);
+ case 6:
+ return _mm512_setr_pd (0, 0, 0, 0, 0, 0, x, 0);
+ case 7:
+ return _mm512_setr_pd (0, 0, 0, 0, 0, 0, 0, x);
+ default:
+ abort ();
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ double e = -3.234;
+ double v[8];
+ union512d res;
+ int i, j;
+
+ for (i = 0; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++)
+ v[j] = 0;
+ v[i] = e;
+
+ res.x = foo (e, i);
+
+ if (check_union512d (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_pd ();
+
+ res.x = foo_r (e, i);
+
+ if (check_union512d (res, v))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v8df-5.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v8df-5.c
new file mode 100644
index 00000000000..c6abd82da04
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v8df-5.c
@@ -0,0 +1,87 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512d
+__attribute__ ((noinline))
+foo (double x, int i)
+{
+ switch (i)
+ {
+ case 7:
+ return _mm512_set_pd (x, 1, 1, 1, 1, 1, 1, 1);
+ case 6:
+ return _mm512_set_pd (1, x, 1, 1, 1, 1, 1, 1);
+ case 5:
+ return _mm512_set_pd (1, 1, x, 1, 1, 1, 1, 1);
+ case 4:
+ return _mm512_set_pd (1, 1, 1, x, 1, 1, 1, 1);
+ case 3:
+ return _mm512_set_pd (1, 1, 1, 1, x, 1, 1, 1);
+ case 2:
+ return _mm512_set_pd (1, 1, 1, 1, 1, x, 1, 1);
+ case 1:
+ return _mm512_set_pd (1, 1, 1, 1, 1, 1, x, 1);
+ case 0:
+ return _mm512_set_pd (1, 1, 1, 1, 1, 1, 1, x);
+ default:
+ abort ();
+ }
+}
+
+static __m512d
+__attribute__ ((noinline))
+foo_r (double x, int i)
+{
+ switch (i)
+ {
+ case 0:
+ return _mm512_setr_pd (x, 1, 1, 1, 1, 1, 1, 1);
+ case 1:
+ return _mm512_setr_pd (1, x, 1, 1, 1, 1, 1, 1);
+ case 2:
+ return _mm512_setr_pd (1, 1, x, 1, 1, 1, 1, 1);
+ case 3:
+ return _mm512_setr_pd (1, 1, 1, x, 1, 1, 1, 1);
+ case 4:
+ return _mm512_setr_pd (1, 1, 1, 1, x, 1, 1, 1);
+ case 5:
+ return _mm512_setr_pd (1, 1, 1, 1, 1, x, 1, 1);
+ case 6:
+ return _mm512_setr_pd (1, 1, 1, 1, 1, 1, x, 1);
+ case 7:
+ return _mm512_setr_pd (1, 1, 1, 1, 1, 1, 1, x);
+ default:
+ abort ();
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ double e = -3.234;
+ double v[8];
+ union512d res;
+ int i, j;
+
+ for (i = 0; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++)
+ v[j] = 1;
+ v[i] = e;
+
+ res.x = foo (e, i);
+
+ if (check_union512d (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_pd ();
+
+ res.x = foo_r (e, i);
+
+ if (check_union512d (res, v))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v8di-1.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v8di-1.c
new file mode 100644
index 00000000000..8cb1f8f61b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v8di-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512i
+__attribute__ ((noinline))
+foo (long long *v)
+{
+ return _mm512_set_epi64 (v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
+}
+
+static __m512i
+__attribute__ ((noinline))
+foo_r (long long *v)
+{
+ return _mm512_setr_epi64 (v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
+}
+
+static void
+avx512f_test (void)
+{
+ long long v[8] = { 0x12e9e94645ad8LL, 0x851c0b39446LL, 2134, 6678,
+ 0x786784645245LL, 0x9487731234LL, 41124, 86530 };
+ union512i_q res;
+
+ res.x = foo (v);
+
+ if (check_union512i_q (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_si512 ();
+
+ res.x = foo_r (v);
+
+ if (check_union512i_q (res, v))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v8di-2.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v8di-2.c
new file mode 100644
index 00000000000..fd033ce24e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v8di-2.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512i
+__attribute__ ((noinline))
+foo (long long x1, long long x2, long long x3, long long x4,
+ long long x5, long long x6, long long x7, long long x8)
+{
+ return _mm512_set_epi64 (x1, x2, x3, x4, x5, x6, x7, x8);
+}
+
+static __m512i
+__attribute__ ((noinline))
+foo_r (long long x1, long long x2, long long x3, long long x4,
+ long long x5, long long x6, long long x7, long long x8)
+{
+ return _mm512_setr_epi64 (x8, x7, x6, x5, x4, x3, x2, x1);
+}
+
+static void
+avx512f_test (void)
+{
+ long long v[8] = { 0x12e9e94645ad8LL, 0x851c0b39446LL, 2134, 6678,
+ 0x786784645245LL, 0x9487731234LL, 41124, 86530 };
+ union512i_q res;
+
+ res.x = foo (v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
+
+ if (check_union512i_q (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_si512 ();
+
+ res.x = foo_r (v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
+
+ if (check_union512i_q (res, v))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v8di-3.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v8di-3.c
new file mode 100644
index 00000000000..16e12c7f1a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v8di-3.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512i
+__attribute__ ((noinline))
+foo (long long x)
+{
+ return _mm512_set_epi64 (x, x, x, x, x, x, x, x);
+}
+
+static __m512i
+__attribute__ ((noinline))
+foo_r (long long x)
+{
+ return _mm512_setr_epi64 (x, x, x, x, x, x, x, x);
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ long long e = 0xfed178ab134badf1LL;
+ long long v[8];
+ union512i_q res;
+
+ for (i = 0; i < 8; i++)
+ v[i] = e;
+
+ res.x = foo (e);
+
+ if (check_union512i_q (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_si512 ();
+
+ res.x = foo_r (e);
+
+ if (check_union512i_q (res, v))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v8di-4.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v8di-4.c
new file mode 100644
index 00000000000..ea6421fcc03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v8di-4.c
@@ -0,0 +1,87 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512i
+__attribute__ ((noinline))
+foo (long long x, int i)
+{
+ switch (i)
+ {
+ case 7:
+ return _mm512_set_epi64 (x, 0, 0, 0, 0, 0, 0, 0);
+ case 6:
+ return _mm512_set_epi64 (0, x, 0, 0, 0, 0, 0, 0);
+ case 5:
+ return _mm512_set_epi64 (0, 0, x, 0, 0, 0, 0, 0);
+ case 4:
+ return _mm512_set_epi64 (0, 0, 0, x, 0, 0, 0, 0);
+ case 3:
+ return _mm512_set_epi64 (0, 0, 0, 0, x, 0, 0, 0);
+ case 2:
+ return _mm512_set_epi64 (0, 0, 0, 0, 0, x, 0, 0);
+ case 1:
+ return _mm512_set_epi64 (0, 0, 0, 0, 0, 0, x, 0);
+ case 0:
+ return _mm512_set_epi64 (0, 0, 0, 0, 0, 0, 0, x);
+ default:
+ abort ();
+ }
+}
+
+static __m512i
+__attribute__ ((noinline))
+foo_r (long long x, int i)
+{
+ switch (i)
+ {
+ case 0:
+ return _mm512_setr_epi64 (x, 0, 0, 0, 0, 0, 0, 0);
+ case 1:
+ return _mm512_setr_epi64 (0, x, 0, 0, 0, 0, 0, 0);
+ case 2:
+ return _mm512_setr_epi64 (0, 0, x, 0, 0, 0, 0, 0);
+ case 3:
+ return _mm512_setr_epi64 (0, 0, 0, x, 0, 0, 0, 0);
+ case 4:
+ return _mm512_setr_epi64 (0, 0, 0, 0, x, 0, 0, 0);
+ case 5:
+ return _mm512_setr_epi64 (0, 0, 0, 0, 0, x, 0, 0);
+ case 6:
+ return _mm512_setr_epi64 (0, 0, 0, 0, 0, 0, x, 0);
+ case 7:
+ return _mm512_setr_epi64 (0, 0, 0, 0, 0, 0, 0, x);
+ default:
+ abort ();
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ long long e = 0xabadbeef01234567LL;
+ long long v[8];
+ union512i_q res;
+ int i, j;
+
+ for (i = 0; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++)
+ v[j] = 0;
+ v[i] = e;
+
+ res.x = foo (e, i);
+
+ if (check_union512i_q (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_si512 ();
+
+ res.x = foo_r (e, i);
+
+ if (check_union512i_q (res, v))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-set-v8di-5.c b/gcc/testsuite/gcc.target/i386/avx512f-set-v8di-5.c
new file mode 100644
index 00000000000..76ec4438897
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-set-v8di-5.c
@@ -0,0 +1,87 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static __m512i
+__attribute__ ((noinline))
+foo (long long x, int i)
+{
+ switch (i)
+ {
+ case 7:
+ return _mm512_set_epi64 (x, 1, 1, 1, 1, 1, 1, 1);
+ case 6:
+ return _mm512_set_epi64 (1, x, 1, 1, 1, 1, 1, 1);
+ case 5:
+ return _mm512_set_epi64 (1, 1, x, 1, 1, 1, 1, 1);
+ case 4:
+ return _mm512_set_epi64 (1, 1, 1, x, 1, 1, 1, 1);
+ case 3:
+ return _mm512_set_epi64 (1, 1, 1, 1, x, 1, 1, 1);
+ case 2:
+ return _mm512_set_epi64 (1, 1, 1, 1, 1, x, 1, 1);
+ case 1:
+ return _mm512_set_epi64 (1, 1, 1, 1, 1, 1, x, 1);
+ case 0:
+ return _mm512_set_epi64 (1, 1, 1, 1, 1, 1, 1, x);
+ default:
+ abort ();
+ }
+}
+
+static __m512i
+__attribute__ ((noinline))
+foo_r (long long x, int i)
+{
+ switch (i)
+ {
+ case 0:
+ return _mm512_setr_epi64 (x, 1, 1, 1, 1, 1, 1, 1);
+ case 1:
+ return _mm512_setr_epi64 (1, x, 1, 1, 1, 1, 1, 1);
+ case 2:
+ return _mm512_setr_epi64 (1, 1, x, 1, 1, 1, 1, 1);
+ case 3:
+ return _mm512_setr_epi64 (1, 1, 1, x, 1, 1, 1, 1);
+ case 4:
+ return _mm512_setr_epi64 (1, 1, 1, 1, x, 1, 1, 1);
+ case 5:
+ return _mm512_setr_epi64 (1, 1, 1, 1, 1, x, 1, 1);
+ case 6:
+ return _mm512_setr_epi64 (1, 1, 1, 1, 1, 1, x, 1);
+ case 7:
+ return _mm512_setr_epi64 (1, 1, 1, 1, 1, 1, 1, x);
+ default:
+ abort ();
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ long long e = 0xabadbeef01234567LL;
+ long long v[8];
+ union512i_q res;
+ int i, j;
+
+ for (i = 0; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++)
+ v[j] = 1;
+ v[i] = e;
+
+ res.x = foo (e, i);
+
+ if (check_union512i_q (res, v))
+ abort ();
+
+ res.x = _mm512_setzero_si512 ();
+
+ res.x = foo_r (e, i);
+
+ if (check_union512i_q (res, v))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-setzero-pd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-setzero-pd-1.c
new file mode 100644
index 00000000000..f0589bd18a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-setzero-pd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void static
+avx512f_test (void)
+{
+ int i;
+ union512d res;
+ double res_ref[8];
+
+ res.x = _mm512_setzero_pd ();
+
+ for (i = 0; i < 8; i++)
+ res_ref[i] = 0.0;
+
+ if (check_union512d (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-setzero-ps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-setzero-ps-1.c
new file mode 100644
index 00000000000..5b1ee29e340
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-setzero-ps-1.c
@@ -0,0 +1,21 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void static
+avx512f_test (void)
+{
+ int i;
+ union512 res;
+ float res_ref[16];
+
+ res.x = _mm512_setzero_ps ();
+
+ for (i = 0; i < 16; i++)
+ res_ref[i] = 0.0;
+
+ if (check_union512 (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-setzero-si512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-setzero-si512-1.c
new file mode 100644
index 00000000000..1c60489b4fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-setzero-si512-1.c
@@ -0,0 +1,21 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void static
+avx512f_test (void)
+{
+ int i;
+ union512i_q res;
+ long long res_ref[8];
+
+ res.x = _mm512_setzero_si512 ();
+
+ for (i = 0; i < 8; i++)
+ res_ref[i] = 0;
+
+ if (check_union512i_q (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddpd-1.c
new file mode 100644
index 00000000000..8e37fec8e3d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddpd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vaddpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vaddpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vaddpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vaddpd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vaddpd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vaddpd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_add_pd (x, x);
+ x = _mm512_mask_add_pd (x, m, x, x);
+ x = _mm512_maskz_add_pd (m, x, x);
+ x = _mm512_add_round_pd (x, x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_add_round_pd (x, m, x, x, _MM_FROUND_TO_NEG_INF);
+ x = _mm512_maskz_add_round_pd (m, x, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddpd-2.c
new file mode 100644
index 00000000000..ac33c7d00ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddpd-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (double *r, double *s1, double *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] + s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_add_pd) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_add_pd) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_add_pd) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddps-1.c
new file mode 100644
index 00000000000..648fe486888
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vaddps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vaddps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vaddps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vaddps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vaddps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vaddps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_add_ps (x, x);
+ x = _mm512_mask_add_ps (x, m, x, x);
+ x = _mm512_maskz_add_ps (m, x, x);
+ x = _mm512_add_round_ps (x, x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_add_round_ps (x, m, x, x, _MM_FROUND_TO_POS_INF);
+ x = _mm512_maskz_add_round_ps (m, x, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddps-2.c
new file mode 100644
index 00000000000..c0e5994d1f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddps-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (float *r, float *s1, float *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] + s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN,) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_add_ps) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_add_ps) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_add_ps) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-1.c
new file mode 100644
index 00000000000..584005d64e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vaddsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vaddsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vaddsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vaddsd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vaddsd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_add_sd (x1, m, x1, x2);
+ x1 = _mm_maskz_add_sd (m, x1, x2);
+ x1 = _mm_add_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm_mask_add_round_sd (x1, m, x1, x2, _MM_FROUND_TO_NEG_INF);
+ x1 = _mm_maskz_add_round_sd (m, x1, x2, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-2.c
new file mode 100644
index 00000000000..7ba10b17ef5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddsd-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vaddsd (double *s1, double *s2, double *r)
+{
+ r[0] = s1[0] + s2[0];
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ union128d s1, s2, res1, res2;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[2];
+
+ s1.x = _mm_set_pd (-3.0, 111.111);
+ s2.x = _mm_set_pd (222.222, -4.5);
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_add_sd (res1.x, mask, s1.x, s2.x);
+ res2.x = _mm_maskz_add_sd (mask, s1.x, s2.x);
+
+ compute_vaddsd (s1.a, s2.a, res_ref);
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+ if (check_fp_union128d (res1, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+ if (check_fp_union128d (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddss-1.c
new file mode 100644
index 00000000000..b1f8b85cc5d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddss-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vaddss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vaddss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vaddss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vaddss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vaddss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_add_ss (x1, m, x1, x2);
+ x1 = _mm_maskz_add_ss (m, x1, x2);
+ x1 = _mm_add_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm_mask_add_round_ss (x1, m, x1, x2, _MM_FROUND_TO_NEG_INF);
+ x1 = _mm_maskz_add_round_ss (m, x1, x2, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vaddss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vaddss-2.c
new file mode 100644
index 00000000000..13cc360701f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vaddss-2.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vaddss (float *s1, float *s2, float *r)
+{
+ r[0] = s1[0] + s2[0];
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ union128 s1, s2, res1, res2;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref[4];
+
+ s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46);
+ s2.x = _mm_set_ps (222.22, -333.33, 444.44, -4.56);
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_add_ss (res1.x, mask, s1.x, s2.x);
+ res2.x = _mm_maskz_add_ss (mask, s1.x, s2.x);
+
+ compute_vaddss (s1.a, s2.a, res_ref);
+
+ MASK_MERGE () (res_ref, mask, 1);
+ if (check_fp_union128 (res1, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, 1);
+ if (check_fp_union128 (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-valignd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-valignd-1.c
new file mode 100644
index 00000000000..693adb0577f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-valignd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "valignd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "valignd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "valignd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __mmask16 m1;
+
+void extern
+avx512f_test (void)
+{
+ z = _mm512_alignr_epi32 (z, z, 3);
+ z = _mm512_mask_alignr_epi32 (z, m1, z, z, 3);
+ z = _mm512_maskz_alignr_epi32 (m1, z, z, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-valignd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-valignd-2.c
new file mode 100644
index 00000000000..2c21ab4166a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-valignd-2.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+#define N (SIZE / 2)
+
+static void
+CALC (int *s1, int *s2, int *r)
+{
+ int i;
+ int s[2 * SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s[i] = s2[i];
+ s[i + SIZE] = s1[i];
+ }
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = s[i + N];
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, s1, s2;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 2 * i;
+ s2.a[i] = i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_alignr_epi32) (s1.x, s2.x, N);
+ res2.x = INTRINSIC (_mask_alignr_epi32) (res2.x, mask, s1.x, s2.x, N);
+ res3.x = INTRINSIC (_maskz_alignr_epi32) (mask, s1.x, s2.x, N);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-valignq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-valignq-1.c
new file mode 100644
index 00000000000..a72946837a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-valignq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "valignq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "valignq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "valignq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __mmask8 m1;
+
+void extern
+avx512f_test (void)
+{
+ z = _mm512_alignr_epi64 (z, z, 3);
+ z = _mm512_mask_alignr_epi64 (z, m1, z, z, 3);
+ z = _mm512_maskz_alignr_epi64 (m1, z, z, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-valignq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-valignq-2.c
new file mode 100644
index 00000000000..e1c2f6bb665
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-valignq-2.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+#define N (SIZE / 2)
+
+static void
+CALC (long long *s1, long long *s2, long long *r)
+{
+ int i;
+ long long s[2 * SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s[i] = s2[i];
+ s[i + SIZE] = s1[i];
+ }
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = s[i + N];
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, s1, s2;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 2 * i;
+ s2.a[i] = i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_alignr_epi64) (s1.x, s2.x, N);
+ res2.x = INTRINSIC (_mask_alignr_epi64) (res2.x, mask, s1.x, s2.x, N);
+ res3.x = INTRINSIC (_maskz_alignr_epi64) (mask, s1.x, s2.x, N);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vblendmpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vblendmpd-1.c
new file mode 100644
index 00000000000..cb0e4c2504f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vblendmpd-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "(vblendmpd|vmovapd)\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask_blend_pd (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vblendmpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vblendmpd-2.c
new file mode 100644
index 00000000000..fa4d7360d91
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vblendmpd-2.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (double *r, double *s1, double *s2, MASK_TYPE mask)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (mask & (1LL << i)) ? s2[i] : s1[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, d) res1, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+
+ res1.x = INTRINSIC (_mask_blend_pd) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a, mask);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vblendmps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vblendmps-1.c
new file mode 100644
index 00000000000..faee9955b64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vblendmps-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "(vblendmps|vmovaps)\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask_blend_ps (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vblendmps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vblendmps-2.c
new file mode 100644
index 00000000000..292f73b63d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vblendmps-2.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (float *r, float *s1, float *s2, MASK_TYPE mask)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (mask & (1 << i)) ? s2[i] : s1[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN,) res1, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+
+ res1.x = INTRINSIC (_mask_blend_ps) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a, mask);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf32x4-1.c
new file mode 100644
index 00000000000..2af23f11dbd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf32x4-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastf32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]|vshuff32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]|vshuff32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}|vshuff32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __m128 y;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_broadcast_f32x4 (y);
+ x = _mm512_mask_broadcast_f32x4 (x, m, y);
+ x = _mm512_maskz_broadcast_f32x4 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf32x4-2.c
new file mode 100644
index 00000000000..3659a86f1c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf32x4-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (float *r, float *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[i % 4];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN,) res1, res2, res3;
+ UNION_TYPE (128,) src;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 4; i++)
+ {
+ src.a[i] = 34.67 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_broadcast_f32x4) (src.x);
+ res2.x = INTRINSIC (_mask_broadcast_f32x4) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcast_f32x4) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf64x4-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf64x4-1.c
new file mode 100644
index 00000000000..dbc3967ccba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf64x4-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastf64x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]|vshuff64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf64x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]|vshuff64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf64x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}|vshuff64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __m256d y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_broadcast_f64x4 (y);
+ x = _mm512_mask_broadcast_f64x4 (x, m, y);
+ x = _mm512_maskz_broadcast_f64x4 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf64x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf64x4-2.c
new file mode 100644
index 00000000000..256d12b4f09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastf64x4-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (double *r, double *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[i % 4];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3;
+ UNION_TYPE (256, d) src;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 2; i++)
+ {
+ src.a[i] = 34.67 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_broadcast_f64x4) (src.x);
+ res2.x = INTRINSIC (_mask_broadcast_f64x4) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcast_f64x4) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti32x4-1.c
new file mode 100644
index 00000000000..743e1cbcc87
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti32x4-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]|vshufi32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]|vshufi32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}|vshufi32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m128i y;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_broadcast_i32x4 (y);
+ x = _mm512_mask_broadcast_i32x4 (x, m, y);
+ x = _mm512_maskz_broadcast_i32x4 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti32x4-2.c
new file mode 100644
index 00000000000..c2288d337b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti32x4-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, int *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[i % 4];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
+ UNION_TYPE (128, i_d) src;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 4; i++)
+ {
+ src.a[i] = 34 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_broadcast_i32x4) (src.x);
+ res2.x = INTRINSIC (_mask_broadcast_i32x4) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcast_i32x4) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti64x4-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti64x4-1.c
new file mode 100644
index 00000000000..28a50ed8ccd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti64x4-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcasti64x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]|vshufi64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti64x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]|vshufi64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti64x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}|vshufi64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_broadcast_i64x4 (y);
+ x = _mm512_mask_broadcast_i64x4 (x, m, y);
+ x = _mm512_maskz_broadcast_i64x4 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti64x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti64x4-2.c
new file mode 100644
index 00000000000..55a14743843
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcasti64x4-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, long long *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[i % 4];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ UNION_TYPE (256, i_q) src;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 2; i++)
+ {
+ src.a[i] = 34 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_broadcast_i64x4) (src.x);
+ res2.x = INTRINSIC (_mask_broadcast_i64x4) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcast_i64x4) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastsd-1.c
new file mode 100644
index 00000000000..3d261afea75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastsd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastsd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastsd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastsd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __m128d y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_broadcastsd_pd (y);
+ x = _mm512_mask_broadcastsd_pd (x, m, y);
+ x = _mm512_maskz_broadcastsd_pd (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastsd-2.c
new file mode 100644
index 00000000000..f0e204746cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastsd-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (double *r, double *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[0];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3;
+ UNION_TYPE (128, d) src;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 2; i++)
+ {
+ src.a[i] = 1.5 + 34.67 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_broadcastsd_pd) (src.x);
+ res2.x = INTRINSIC (_mask_broadcastsd_pd) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcastsd_pd) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastss-1.c
new file mode 100644
index 00000000000..4cc8cb78714
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastss-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __m128 y;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_broadcastss_ps (y);
+ x = _mm512_mask_broadcastss_ps (x, m, y);
+ x = _mm512_maskz_broadcastss_ps (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastss-2.c
new file mode 100644
index 00000000000..8c790ab22fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastss-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (float *r, float *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[0];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN,) res1, res2, res3;
+ UNION_TYPE (128,) src;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 4; i++)
+ {
+ src.a[i] = 1.5 + 34.67 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_broadcastss_ps) (src.x);
+ res2.x = INTRINSIC (_mask_broadcastss_ps) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcastss_ps) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c
new file mode 100644
index 00000000000..fa3655610c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler "vcmppd\[ \\t\]+\[^\n\]*\[^\}\]%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vcmppd\[ \\t\]+\[^\n\]*\[^\}\]%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vcmppd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vcmppd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ m = _mm512_cmp_pd_mask (x, x, _CMP_FALSE_OQ);
+ m = _mm512_mask_cmp_pd_mask (m, x, x, _CMP_FALSE_OQ);
+ m = _mm512_cmp_round_pd_mask (x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
+ m = _mm512_mask_cmp_round_pd_mask (m, x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c
new file mode 100644
index 00000000000..470d645b055
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c
@@ -0,0 +1,73 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+#if AVX512F_LEN == 512
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 8; i++) \
+ { \
+ dst_ref = (((int) rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm512_loadu_pd(s1); \
+ source2.x = _mm512_loadu_pd(s2); \
+ dst1 = _mm512_cmp_pd_mask(source1.x, source2.x, imm);\
+ dst2 = _mm512_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((dst_ref & mask) != dst2) abort();
+#endif
+
+static void
+TEST ()
+{
+ UNION_TYPE (AVX512F_LEN, d) source1, source2;
+ MASK_TYPE dst1, dst2, dst_ref;
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+ double s1[8]={2134.3343, 6678.346, 453.345635, 54646.464,
+ 231.23311, 5674.455, 111.111111, 23241.152};
+ double s2[8]={41124.234, 6678.346, 8653.65635, 856.43576,
+ 231.23311, 4646.123, 111.111111, 124.12455};
+
+ CMP(_CMP_EQ_OQ, !isunordered(s1[i], s2[i]) && s1[i] == s2[i]);
+ CMP(_CMP_LT_OS, !isunordered(s1[i], s2[i]) && s1[i] < s2[i]);
+ CMP(_CMP_LE_OS, !isunordered(s1[i], s2[i]) && s1[i] <= s2[i]);
+ CMP(_CMP_UNORD_Q, isunordered(s1[i], s2[i]));
+ CMP(_CMP_NEQ_UQ, isunordered(s1[i], s2[i]) || s1[i] != s2[i]);
+ CMP(_CMP_NLT_US, isunordered(s1[i], s2[i]) || s1[i] >= s2[i]);
+ CMP(_CMP_NLE_US, isunordered(s1[i], s2[i]) || s1[i] > s2[i]);
+ CMP(_CMP_ORD_Q, !isunordered(s1[i], s2[i]));
+
+ CMP(_CMP_EQ_UQ, isunordered(s1[i], s2[i]) || s1[i] == s2[i]);
+ CMP(_CMP_NGE_US, isunordered(s1[i], s2[i]) || s1[i] < s2[i]);
+ CMP(_CMP_NGT_US, isunordered(s1[i], s2[i]) || s1[i] <= s2[i]);
+
+ CMP(_CMP_FALSE_OQ, 0);
+ CMP(_CMP_NEQ_OQ, !isunordered(s1[i], s2[i]) && s1[i] != s2[i]);
+ CMP(_CMP_GE_OS, !isunordered(s1[i], s2[i]) && s1[i] >= s2[i]);
+ CMP(_CMP_GT_OS, !isunordered(s1[i], s2[i]) && s1[i] > s2[i]);
+ CMP(_CMP_TRUE_UQ, 1);
+
+ CMP(_CMP_EQ_OS, !isunordered(s1[i], s2[i]) && s1[i] == s2[i]);
+ CMP(_CMP_LT_OQ, !isunordered(s1[i], s2[i]) && s1[i] < s2[i]);
+ CMP(_CMP_LE_OQ, !isunordered(s1[i], s2[i]) && s1[i] <= s2[i]);
+ CMP(_CMP_UNORD_S, isunordered(s1[i], s2[i]));
+ CMP(_CMP_NEQ_US, isunordered(s1[i], s2[i]) || s1[i] != s2[i]);
+ CMP(_CMP_NLT_UQ, isunordered(s1[i], s2[i]) || s1[i] >= s2[i]);
+ CMP(_CMP_NLE_UQ, isunordered(s1[i], s2[i]) || s1[i] > s2[i]);
+ CMP(_CMP_ORD_S, !isunordered(s1[i], s2[i]));
+ CMP(_CMP_EQ_US, isunordered(s1[i], s2[i]) || s1[i] == s2[i]);
+ CMP(_CMP_NGE_UQ, isunordered(s1[i], s2[i]) || s1[i] < s2[i]);
+ CMP(_CMP_NGT_UQ, isunordered(s1[i], s2[i]) || s1[i] <= s2[i]);
+ CMP(_CMP_FALSE_OS, 0);
+ CMP(_CMP_NEQ_OS, !isunordered(s1[i], s2[i]) && s1[i] != s2[i]);
+ CMP(_CMP_GE_OQ, !isunordered(s1[i], s2[i]) && s1[i] >= s2[i]);
+ CMP(_CMP_GT_OQ, !isunordered(s1[i], s2[i]) && s1[i] > s2[i]);
+ CMP(_CMP_TRUE_US, 1)
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c
new file mode 100644
index 00000000000..b90be8c726a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler "vcmpps\[ \\t\]+\[^\n\]*\[^\}\]%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vcmpps\[ \\t\]+\[^\n\]*\[^\}\]%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vcmpps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vcmpps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ m = _mm512_cmp_ps_mask (x, x, _CMP_FALSE_OQ);
+ m = _mm512_mask_cmp_ps_mask (m, x, x, _CMP_FALSE_OQ);
+ m = _mm512_cmp_round_ps_mask (x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
+ m = _mm512_mask_cmp_round_ps_mask (m, x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c
new file mode 100644
index 00000000000..4c458d2d51d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+#if AVX512F_LEN == 512
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 16; i++) \
+ { \
+ dst_ref = (((int) rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm512_loadu_ps(s1); \
+ source2.x = _mm512_loadu_ps(s2); \
+ dst1 = _mm512_cmp_ps_mask(source1.x, source2.x, imm);\
+ dst2 = _mm512_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((dst_ref & mask) != dst2) abort();
+#endif
+
+static void
+TEST ()
+{
+ UNION_TYPE (AVX512F_LEN,) source1, source2;
+ MASK_TYPE dst1, dst2, dst_ref;
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+ float s1[16] = {2134.3343, 6678.346, 453.345635, 54646.464,
+ 231.23311, 5674.455, 111.111111, 23241.152,
+ 123.14811, 1245.124, 244.151353, 53454.141,
+ 926.16717, 3733.261, 643.161644, 23514.633};
+ float s2[16] = {41124.234, 6678.346, 8653.65635, 856.43576,
+ 231.23311, 4646.123, 111.111111, 124.12455,
+ 123.14811, 1245.124, 244.151353, 53454.141,
+ 2134.3343, 6678.346, 453.345635, 54646.464};
+
+ CMP(_CMP_EQ_OQ, !isunordered(s1[i], s2[i]) && s1[i] == s2[i]);
+ CMP(_CMP_LT_OS, !isunordered(s1[i], s2[i]) && s1[i] < s2[i]);
+ CMP(_CMP_LE_OS, !isunordered(s1[i], s2[i]) && s1[i] <= s2[i]);
+ CMP(_CMP_UNORD_Q, isunordered(s1[i], s2[i]));
+ CMP(_CMP_NEQ_UQ, isunordered(s1[i], s2[i]) || s1[i] != s2[i]);
+ CMP(_CMP_NLT_US, isunordered(s1[i], s2[i]) || s1[i] >= s2[i]);
+ CMP(_CMP_NLE_US, isunordered(s1[i], s2[i]) || s1[i] > s2[i]);
+ CMP(_CMP_ORD_Q, !isunordered(s1[i], s2[i]));
+
+ CMP(_CMP_EQ_UQ, isunordered(s1[i], s2[i]) || s1[i] == s2[i]);
+ CMP(_CMP_NGE_US, isunordered(s1[i], s2[i]) || s1[i] < s2[i]);
+ CMP(_CMP_NGT_US, isunordered(s1[i], s2[i]) || s1[i] <= s2[i]);
+
+ CMP(_CMP_FALSE_OQ, 0);
+ CMP(_CMP_NEQ_OQ, !isunordered(s1[i], s2[i]) && s1[i] != s2[i]);
+ CMP(_CMP_GE_OS, !isunordered(s1[i], s2[i]) && s1[i] >= s2[i]);
+ CMP(_CMP_GT_OS, !isunordered(s1[i], s2[i]) && s1[i] > s2[i]);
+ CMP(_CMP_TRUE_UQ, 1);
+
+ CMP(_CMP_EQ_OS, !isunordered(s1[i], s2[i]) && s1[i] == s2[i]);
+ CMP(_CMP_LT_OQ, !isunordered(s1[i], s2[i]) && s1[i] < s2[i]);
+ CMP(_CMP_LE_OQ, !isunordered(s1[i], s2[i]) && s1[i] <= s2[i]);
+ CMP(_CMP_UNORD_S, isunordered(s1[i], s2[i]));
+ CMP(_CMP_NEQ_US, isunordered(s1[i], s2[i]) || s1[i] != s2[i]);
+ CMP(_CMP_NLT_UQ, isunordered(s1[i], s2[i]) || s1[i] >= s2[i]);
+ CMP(_CMP_NLE_UQ, isunordered(s1[i], s2[i]) || s1[i] > s2[i]);
+ CMP(_CMP_ORD_S, !isunordered(s1[i], s2[i]));
+ CMP(_CMP_EQ_US, isunordered(s1[i], s2[i]) || s1[i] == s2[i]);
+ CMP(_CMP_NGE_UQ, isunordered(s1[i], s2[i]) || s1[i] < s2[i]);
+ CMP(_CMP_NGT_UQ, isunordered(s1[i], s2[i]) || s1[i] <= s2[i]);
+ CMP(_CMP_FALSE_OS, 0);
+ CMP(_CMP_NEQ_OS, !isunordered(s1[i], s2[i]) && s1[i] != s2[i]);
+ CMP(_CMP_GE_OQ, !isunordered(s1[i], s2[i]) && s1[i] >= s2[i]);
+ CMP(_CMP_GT_OQ, !isunordered(s1[i], s2[i]) && s1[i] > s2[i]);
+ CMP(_CMP_TRUE_US, 1)
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpsd-1.c
new file mode 100644
index 00000000000..7f92fbea386
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpsd-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler "vcmpsd\[ \\t\]+\[^\n\]*\[^\}\]%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vcmpsd\[ \\t\]+\[^\n\]*\[^\}\]%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vcmpsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vcmpsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ m = _mm_cmp_sd_mask (x, x, _CMP_FALSE_OQ);
+ m = _mm_mask_cmp_sd_mask (m, x, x, _CMP_FALSE_OQ);
+ m = _mm_cmp_round_sd_mask (x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
+ m = _mm_mask_cmp_round_sd_mask (m, x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpsd-2.c
new file mode 100644
index 00000000000..3e4729e4aac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpsd-2.c
@@ -0,0 +1,67 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-require-effective-target c99_runtime } */
+/* { dg-options "-O2 -mavx512f -std=c99" } */
+
+#include "avx512f-check.h"
+#include <math.h>
+
+double s1[2] = {2134.3343, 6678.346};
+double s2[2] = {1485.1288, 6678.346};
+
+__mmask8 dst_ref;
+
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ dst_ref = ((int) rel) | dst_ref; \
+ source1 = _mm_loadu_pd(s1); \
+ source2 = _mm_loadu_pd(s2); \
+ dst = _mm_cmp_sd_mask(source1, source2, imm); \
+ dst2 = _mm_mask_cmp_sd_mask(mask, source1, source2, imm);\
+ if (dst_ref != dst) abort(); \
+ if ((dst_ref & mask) != dst2) abort();
+
+static void
+avx512f_test ()
+{
+ __m128d source1, source2;
+ __mmask8 dst, dst2, mask;
+ mask = 1;
+ int i;
+
+ CMP(_CMP_EQ_OQ, !isunordered(s1[0], s2[0]) && s1[0] == s2[0]);
+ CMP(_CMP_LT_OS, !isunordered(s1[0], s2[0]) && s1[0] < s2[0]);
+ CMP(_CMP_LE_OS, !isunordered(s1[0], s2[0]) && s1[0] <= s2[0]);
+ CMP(_CMP_UNORD_Q, isunordered(s1[0], s2[0]));
+ CMP(_CMP_NEQ_UQ, isunordered(s1[0], s2[0]) || s1[0] != s2[0]);
+ CMP(_CMP_NLT_US, isunordered(s1[0], s2[0]) || s1[0] >= s2[0]);
+ CMP(_CMP_NLE_US, isunordered(s1[0], s2[0]) || s1[0] > s2[0]);
+ CMP(_CMP_ORD_Q, !isunordered(s1[0], s2[0]));
+
+ CMP(_CMP_EQ_UQ, isunordered(s1[0], s2[0]) || s1[0] == s2[0]);
+ CMP(_CMP_NGE_US, isunordered(s1[0], s2[0]) || s1[0] < s2[0]);
+ CMP(_CMP_NGT_US, isunordered(s1[0], s2[0]) || s1[0] <= s2[0]);
+
+ CMP(_CMP_FALSE_OQ, 0);
+ CMP(_CMP_NEQ_OQ, !isunordered(s1[0], s2[0]) && s1[0] != s2[0]);
+ CMP(_CMP_GE_OS, !isunordered(s1[0], s2[0]) && s1[0] >= s2[0]);
+ CMP(_CMP_GT_OS, !isunordered(s1[0], s2[0]) && s1[0] > s2[0]);
+ CMP(_CMP_TRUE_UQ, 1);
+
+ CMP(_CMP_EQ_OS, !isunordered(s1[0], s2[0]) && s1[0] == s2[0]);
+ CMP(_CMP_LT_OQ, !isunordered(s1[0], s2[0]) && s1[0] < s2[0]);
+ CMP(_CMP_LE_OQ, !isunordered(s1[0], s2[0]) && s1[0] <= s2[0]);
+ CMP(_CMP_UNORD_S, isunordered(s1[0], s2[0]));
+ CMP(_CMP_NEQ_US, isunordered(s1[0], s2[0]) || s1[0] != s2[0]);
+ CMP(_CMP_NLT_UQ, isunordered(s1[0], s2[0]) || s1[0] >= s2[0]);
+ CMP(_CMP_NLE_UQ, isunordered(s1[0], s2[0]) || s1[0] > s2[0]);
+ CMP(_CMP_ORD_S, !isunordered(s1[0], s2[0]));
+ CMP(_CMP_EQ_US, isunordered(s1[0], s2[0]) || s1[0] == s2[0]);
+ CMP(_CMP_NGE_UQ, isunordered(s1[0], s2[0]) || s1[0] < s2[0]);
+ CMP(_CMP_NGT_UQ, isunordered(s1[0], s2[0]) || s1[0] <= s2[0]);
+ CMP(_CMP_FALSE_OS, 0);
+ CMP(_CMP_NEQ_OS, !isunordered(s1[0], s2[0]) && s1[0] != s2[0]);
+ CMP(_CMP_GE_OQ, !isunordered(s1[0], s2[0]) && s1[0] >= s2[0]);
+ CMP(_CMP_GT_OQ, !isunordered(s1[0], s2[0]) && s1[0] > s2[0]);
+ CMP(_CMP_TRUE_US, 1)
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpss-1.c
new file mode 100644
index 00000000000..9f370cb0e1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpss-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler "vcmpss\[ \\t\]+\[^\n\]*\[^\}\]%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vcmpss\[ \\t\]+\[^\n\]*\[^\}\]%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vcmpss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vcmpss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ m = _mm_cmp_ss_mask (x, x, _CMP_FALSE_OQ);
+ m = _mm_mask_cmp_ss_mask (m, x, x, _CMP_FALSE_OQ);
+ m = _mm_cmp_round_ss_mask (x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
+ m = _mm_mask_cmp_round_ss_mask (m, x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpss-2.c
new file mode 100644
index 00000000000..7343cb05cdb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpss-2.c
@@ -0,0 +1,68 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-require-effective-target c99_runtime } */
+/* { dg-options "-O2 -mavx512f -std=c99" } */
+
+#include "avx512f-check.h"
+#include <math.h>
+
+float s1[4] = {2134.3343, 6678.346, 453.345635, 54646.464};
+float s2[4] = {1485.1288, 6678.346, 8653.65635, 856.43576};
+
+__mmask8 dst_ref;
+
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ dst_ref = ((int) rel) | dst_ref; \
+ source1 = _mm_loadu_ps(s1); \
+ source2 = _mm_loadu_ps(s2); \
+ dst = _mm_cmp_ss_mask(source1, source2, imm); \
+ dst2 = _mm_mask_cmp_ss_mask(mask, source1, source2, imm);\
+ if (dst_ref != dst) abort(); \
+ if ((dst_ref & mask)!= dst2) abort();
+
+static void
+avx512f_test ()
+{
+ __m128 source1, source2;
+ __mmask8 dst, dst2, mask;
+ int i;
+
+ mask = 1;
+
+ CMP(_CMP_EQ_OQ, !isunordered(s1[0], s2[0]) && s1[0] == s2[0]);
+ CMP(_CMP_LT_OS, !isunordered(s1[0], s2[0]) && s1[0] < s2[0]);
+ CMP(_CMP_LE_OS, !isunordered(s1[0], s2[0]) && s1[0] <= s2[0]);
+ CMP(_CMP_UNORD_Q, isunordered(s1[0], s2[0]));
+ CMP(_CMP_NEQ_UQ, isunordered(s1[0], s2[0]) || s1[0] != s2[0]);
+ CMP(_CMP_NLT_US, isunordered(s1[0], s2[0]) || s1[0] >= s2[0]);
+ CMP(_CMP_NLE_US, isunordered(s1[0], s2[0]) || s1[0] > s2[0]);
+ CMP(_CMP_ORD_Q, !isunordered(s1[0], s2[0]));
+
+ CMP(_CMP_EQ_UQ, isunordered(s1[0], s2[0]) || s1[0] == s2[0]);
+ CMP(_CMP_NGE_US, isunordered(s1[0], s2[0]) || s1[0] < s2[0]);
+ CMP(_CMP_NGT_US, isunordered(s1[0], s2[0]) || s1[0] <= s2[0]);
+
+ CMP(_CMP_FALSE_OQ, 0);
+ CMP(_CMP_NEQ_OQ, !isunordered(s1[0], s2[0]) && s1[0] != s2[0]);
+ CMP(_CMP_GE_OS, !isunordered(s1[0], s2[0]) && s1[0] >= s2[0]);
+ CMP(_CMP_GT_OS, !isunordered(s1[0], s2[0]) && s1[0] > s2[0]);
+ CMP(_CMP_TRUE_UQ, 1);
+
+ CMP(_CMP_EQ_OS, !isunordered(s1[0], s2[0]) && s1[0] == s2[0]);
+ CMP(_CMP_LT_OQ, !isunordered(s1[0], s2[0]) && s1[0] < s2[0]);
+ CMP(_CMP_LE_OQ, !isunordered(s1[0], s2[0]) && s1[0] <= s2[0]);
+ CMP(_CMP_UNORD_S, isunordered(s1[0], s2[0]));
+ CMP(_CMP_NEQ_US, isunordered(s1[0], s2[0]) || s1[0] != s2[0]);
+ CMP(_CMP_NLT_UQ, isunordered(s1[0], s2[0]) || s1[0] >= s2[0]);
+ CMP(_CMP_NLE_UQ, isunordered(s1[0], s2[0]) || s1[0] > s2[0]);
+ CMP(_CMP_ORD_S, !isunordered(s1[0], s2[0]));
+ CMP(_CMP_EQ_US, isunordered(s1[0], s2[0]) || s1[0] == s2[0]);
+ CMP(_CMP_NGE_UQ, isunordered(s1[0], s2[0]) || s1[0] < s2[0]);
+ CMP(_CMP_NGT_UQ, isunordered(s1[0], s2[0]) || s1[0] <= s2[0]);
+ CMP(_CMP_FALSE_OS, 0);
+ CMP(_CMP_NEQ_OS, !isunordered(s1[0], s2[0]) && s1[0] != s2[0]);
+ CMP(_CMP_GE_OQ, !isunordered(s1[0], s2[0]) && s1[0] >= s2[0]);
+ CMP(_CMP_GT_OQ, !isunordered(s1[0], s2[0]) && s1[0] > s2[0]);
+ CMP(_CMP_TRUE_US, 1)
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcomisd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcomisd-1.c
new file mode 100644
index 00000000000..7b5aff4e34b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcomisd-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "vcomisd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm" } } */
+
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile int res;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm_comi_round_sd (x, x, _CMP_LT_OS, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcomiss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcomiss-1.c
new file mode 100644
index 00000000000..bc504190487
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcomiss-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcomiss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vcomiss\[ \\t\]+\[^{}\n\]*%xmm" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile int res;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm_comi_round_ss (x, x, _CMP_LT_OS, _MM_FROUND_NO_EXC);
+}
+
+void extern
+avx512f_test_2 (void)
+{
+ res = _mm_comi_round_ss (x, x, _CMP_LT_OS, _MM_FROUND_CUR_DIRECTION);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcompresspd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcompresspd-1.c
new file mode 100644
index 00000000000..3f2cdff9c14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcompresspd-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcompresspd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcompresspd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcompresspd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+double *p;
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask_compress_pd (x, m, x);
+ x = _mm512_maskz_compress_pd (m, x);
+
+ _mm512_mask_compressstoreu_pd (p, m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcompresspd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcompresspd-2.c
new file mode 100644
index 00000000000..959c77561be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcompresspd-2.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#define MASK ((1 << SIZE) - 1)
+#include <x86intrin.h>
+
+static void
+CALC (double *s, double *r, MASK_TYPE mask)
+{
+ int i, k;
+
+ for (i = 0, k = 0; i < SIZE; i++)
+ {
+ if (mask & (1 << i))
+ r[k++] = s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s, res1, res2;
+ double res3[SIZE];
+ MASK_TYPE compressed_mask, mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i, mask_bit_count, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 12345 * (i + 200) * sign;
+ res1.a[i] = DEFAULT_VALUE;
+ res3[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_mask_compress_pd) (res1.x, mask, s.x);
+ res2.x = INTRINSIC (_maskz_compress_pd) (mask, s.x);
+ INTRINSIC (_mask_compressstoreu_pd) (res3, mask, s.x);
+
+ mask_bit_count = __popcntd (mask & MASK);
+ compressed_mask = (1 << mask_bit_count) - 1;
+ CALC (s.a, res_ref, mask);
+
+ MASK_MERGE (d) (res_ref, compressed_mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, compressed_mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, compressed_mask, SIZE);
+ if (checkVd (res3, res_ref, SIZE))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcompressps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcompressps-1.c
new file mode 100644
index 00000000000..ab715c6fc09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcompressps-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcompressps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcompressps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcompressps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+float *p;
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask_compress_ps (x, m, x);
+ x = _mm512_maskz_compress_ps (m, x);
+
+ _mm512_mask_compressstoreu_ps (p, m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcompressps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcompressps-2.c
new file mode 100644
index 00000000000..bd6e081fd1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcompressps-2.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#define MASK ((1 << SIZE) - 1)
+#include <x86intrin.h>
+
+static void
+CALC (float *s, float *r, MASK_TYPE mask)
+{
+ int i, k;
+
+ for (i = 0, k = 0; i < SIZE; i++)
+ {
+ if (mask & (1 << i))
+ r[k++] = s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s, res1, res2;
+ float res3[SIZE];
+ MASK_TYPE compressed_mask, mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i, mask_bit_count, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 12345 * (i + 200) * sign;
+ res1.a[i] = DEFAULT_VALUE;
+ res3[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_mask_compress_ps) (res1.x, mask, s.x);
+ res2.x = INTRINSIC (_maskz_compress_ps) (mask, s.x);
+ INTRINSIC (_mask_compressstoreu_ps) (res3, mask, s.x);
+
+ mask_bit_count = __popcntd (mask & MASK);
+ compressed_mask = (1 << mask_bit_count) - 1;
+ CALC (s.a, res_ref, mask);
+
+ MASK_MERGE () (res_ref, compressed_mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, compressed_mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, compressed_mask, SIZE);
+ if (checkVf (res3, res_ref, SIZE))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2pd-1.c
new file mode 100644
index 00000000000..d2c616b08b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2pd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtdq2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtdq2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtdq2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i s;
+volatile __m512d res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepi32_pd (s);
+ res = _mm512_mask_cvtepi32_pd (res, m, s);
+ res = _mm512_maskz_cvtepi32_pd (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2pd-2.c
new file mode 100644
index 00000000000..9548b71deb5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2pd-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SRC_SIZE ((AVX512F_LEN_HALF) / 32)
+#include "avx512f-mask-type.h"
+#define DST_SIZE ((AVX512F_LEN) / 64)
+
+static void
+CALC (int *s, double *r)
+{
+ int i;
+
+ for (i = 0; i < DST_SIZE; i++)
+ {
+ r[i] = (double) s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN_HALF, i_d) s;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[DST_SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 123456 * (i + 2000) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_cvtepi32_pd) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepi32_pd) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepi32_pd) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2ps-1.c
new file mode 100644
index 00000000000..58e727d2814
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2ps-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m512 res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepi32_ps (s);
+ res = _mm512_mask_cvtepi32_ps (res, m, s);
+ res = _mm512_maskz_cvtepi32_ps (m, s);
+ res = _mm512_cvt_roundepi32_ps (s, _MM_FROUND_TO_NEAREST_INT);
+ res = _mm512_mask_cvt_roundepi32_ps (res, m, s, _MM_FROUND_TO_POS_INF);
+ res = _mm512_maskz_cvt_roundepi32_ps (m, s, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2ps-2.c
new file mode 100644
index 00000000000..b23ba70af1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtdq2ps-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *s, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (float) s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s;
+ UNION_TYPE (AVX512F_LEN, ) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123456 * (i + 2000) * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_cvtepi32_ps) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepi32_ps) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepi32_ps) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2dq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2dq-1.c
new file mode 100644
index 00000000000..964878f92ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2dq-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtpd2dq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2dq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2dq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2dq\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2dq\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2dq\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d s;
+volatile __m256i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtpd_epi32 (s);
+ res = _mm512_mask_cvtpd_epi32 (res, m, s);
+ res = _mm512_maskz_cvtpd_epi32 (m, s);
+ res = _mm512_cvt_roundpd_epi32 (s, _MM_FROUND_TO_NEAREST_INT);
+ res = _mm512_mask_cvt_roundpd_epi32 (res, m, s, _MM_FROUND_TO_POS_INF);
+ res = _mm512_maskz_cvt_roundpd_epi32 (m, s, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2dq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2dq-2.c
new file mode 100644
index 00000000000..052b51f76a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2dq-2.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SRC_SIZE ((AVX512F_LEN) / 64)
+#include "avx512f-mask-type.h"
+#define DST_SIZE ((AVX512F_LEN_HALF) / 32)
+
+static void
+CALC (double *s, unsigned *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ r[i] = (s[i] >= 0) ? (int) (s[i] + 0.5)
+ : (int) (s[i] - 0.5);
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN_HALF, i_d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_cvtpd_epi32) (s.x);
+ res2.x = INTRINSIC (_mask_cvtpd_epi32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtpd_epi32) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2ps-1.c
new file mode 100644
index 00000000000..457bb07700f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2ps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvtpd2ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 6 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2ps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2ps\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2ps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __m256 y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm512_cvtpd_ps (x);
+ y = _mm512_mask_cvtpd_ps (y, 4, x);
+ y = _mm512_maskz_cvtpd_ps (6, x);
+ y = _mm512_cvt_roundpd_ps (x, _MM_FROUND_TO_NEAREST_INT);
+ y = _mm512_mask_cvt_roundpd_ps (y, 4, x, _MM_FROUND_TO_NEG_INF);
+ y = _mm512_maskz_cvt_roundpd_ps (6, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2ps-2.c
new file mode 100644
index 00000000000..805b6b5fcf1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2ps-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void static
+CALC (float *e, UNION_TYPE (AVX512F_LEN, d) s1)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ e[i] = (float) s1.a[i];
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1;
+ UNION_TYPE (AVX512F_LEN_HALF,) u1, u2, u3;
+ MASK_TYPE mask = MASK_VALUE;
+ float e[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 0.12 * (i + 37.09);
+ u1.a[i] = DEFAULT_VALUE;
+ u2.a[i] = DEFAULT_VALUE;
+ u3.a[i] = DEFAULT_VALUE;
+ }
+
+ u1.x = INTRINSIC (_cvtpd_ps) (s1.x);
+ u2.x = INTRINSIC (_mask_cvtpd_ps) (u2.x, mask, s1.x);
+ u3.x = INTRINSIC (_maskz_cvtpd_ps) (mask, s1.x);
+
+ CALC (e, s1);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF,) (u1, e))
+ abort ();
+
+ MASK_MERGE ()(e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF,) (u2, e))
+ abort ();
+
+ MASK_ZERO ()(e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF,) (u3, e))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2udq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2udq-1.c
new file mode 100644
index 00000000000..28bfb17aa99
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2udq-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtpd2udq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2udq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2udq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2udq\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2udq\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2udq\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d s;
+volatile __m256i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtpd_epu32 (s);
+ res = _mm512_mask_cvtpd_epu32 (res, m, s);
+ res = _mm512_maskz_cvtpd_epu32 (m, s);
+ res = _mm512_cvt_roundpd_epu32 (s, _MM_FROUND_TO_NEAREST_INT);
+ res = _mm512_mask_cvt_roundpd_epu32 (res, m, s, _MM_FROUND_TO_POS_INF);
+ res = _mm512_maskz_cvt_roundpd_epu32 (m, s, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2udq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2udq-2.c
new file mode 100644
index 00000000000..895ad16bf3a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2udq-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SRC_SIZE ((AVX512F_LEN) / 64)
+#include "avx512f-mask-type.h"
+#define DST_SIZE ((AVX512F_LEN_HALF) / 32)
+
+static void
+CALC (double *s, unsigned *r)
+{
+ int i;
+
+ for (i = 0; i < DST_SIZE; i++)
+ {
+ r[i] = (unsigned) (s[i] + 0.5);
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN_HALF, i_d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned res_ref[DST_SIZE] = { 0 };
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000);
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_cvtpd_epu32) (s.x);
+ res2.x = INTRINSIC (_mask_cvtpd_epu32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtpd_epu32) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtph2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtph2ps-1.c
new file mode 100644
index 00000000000..b22a950dd66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtph2ps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 6 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m512 y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm512_cvtph_ps (x);
+ y = _mm512_mask_cvtph_ps (y, 4, x);
+ y = _mm512_maskz_cvtph_ps (6, x);
+ y = _mm512_cvt_roundph_ps (x, _MM_FROUND_NO_EXC);
+ y = _mm512_mask_cvt_roundph_ps (y, 4, x, _MM_FROUND_NO_EXC);
+ y = _mm512_maskz_cvt_roundph_ps (6, x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtph2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtph2ps-2.c
new file mode 100644
index 00000000000..0cc7d628d44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtph2ps-2.c
@@ -0,0 +1,82 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN_HALF, i_w) val;
+ UNION_TYPE (AVX512F_LEN,) res1,res2,res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float exp[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ exp[0] = 1;
+ exp[1] = 2;
+ exp[2] = 4;
+ exp[3] = 8;
+#if AVX512F_LEN > 128
+ exp[4] = -1;
+ exp[5] = -2;
+ exp[6] = -4;
+ exp[7] = -8;
+#endif
+#if AVX512F_LEN > 256
+ exp[8] = 1;
+ exp[9] = 2;
+ exp[10] = 4;
+ exp[11] = 8;
+ exp[12] = -1;
+ exp[13] = -2;
+ exp[14] = -4;
+ exp[15] = -8;
+#endif
+
+ val.a[0] = 0x3c00;
+ val.a[1] = 0x4000;
+ val.a[2] = 0x4400;
+ val.a[3] = 0x4800;
+#if AVX512F_LEN > 128
+ val.a[4] = 0xbc00;
+ val.a[5] = 0xc000;
+ val.a[6] = 0xc400;
+ val.a[7] = 0xc800;
+#endif
+#if AVX512F_LEN > 256
+ val.a[8] = 0x3c00;
+ val.a[9] = 0x4000;
+ val.a[10] = 0x4400;
+ val.a[11] = 0x4800;
+ val.a[12] = 0xbc00;
+ val.a[13] = 0xc000;
+ val.a[14] = 0xc400;
+ val.a[15] = 0xc800;
+#endif
+
+ res1.x = _mm512_cvtph_ps (val.x);
+ res2.x = _mm512_mask_cvtph_ps (res2.x, mask, val.x);
+ res3.x = _mm512_maskz_cvtph_ps (mask, val.x);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, exp))
+ abort ();
+
+ MASK_MERGE () (exp, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, exp))
+ abort ();
+
+ MASK_ZERO () (exp, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, exp))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2dq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2dq-1.c
new file mode 100644
index 00000000000..2db36e9c135
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2dq-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtps2dq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2dq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2dq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2dq\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2dq\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2dq\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 s;
+volatile __m512i res;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtps_epi32 (s);
+ res = _mm512_mask_cvtps_epi32 (res, m, s);
+ res = _mm512_maskz_cvtps_epi32 (m, s);
+ res = _mm512_cvt_roundps_epi32 (s, _MM_FROUND_TO_NEAREST_INT);
+ res = _mm512_mask_cvt_roundps_epi32 (res, m, s, _MM_FROUND_TO_POS_INF);
+ res = _mm512_maskz_cvt_roundps_epi32 (m, s, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2dq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2dq-2.c
new file mode 100644
index 00000000000..03d0452aa1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2dq-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, float *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ r[i] = (s[i] >= 0) ? (int) (s[i] + 0.5) : (int) (s[i] - 0.5);
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN,) src;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ res2.a[i] = DEFAULT_VALUE;
+ src.a[i] = 1.5 + 34.67 * i * sign;
+ sign = sign * -1;
+ }
+
+ res1.x = INTRINSIC (_cvtps_epi32) (src.x);
+ res2.x = INTRINSIC (_mask_cvtps_epi32) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtps_epi32) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2pd-1.c
new file mode 100644
index 00000000000..c6fc4733720
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2pd-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtps2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2pd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2pd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2pd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 s;
+volatile __m512d res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtps_pd (s);
+ res = _mm512_mask_cvtps_pd (res, m, s);
+ res = _mm512_maskz_cvtps_pd (m, s);
+ res = _mm512_cvt_roundps_pd (s, _MM_FROUND_NO_EXC);
+ res = _mm512_mask_cvt_roundps_pd (res, m, s, _MM_FROUND_NO_EXC);
+ res = _mm512_maskz_cvt_roundps_pd (m, s, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2pd-2.c
new file mode 100644
index 00000000000..e8e92fee6e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2pd-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SRC_SIZE ((AVX512F_LEN_HALF) / 32)
+#include "avx512f-mask-type.h"
+#define DST_SIZE ((AVX512F_LEN) / 64)
+
+static void
+CALC (float *s, double *r)
+{
+ int i;
+
+ for (i = 0; i < DST_SIZE; i++)
+ {
+ r[i] = (double) s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN_HALF, ) s;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[DST_SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_cvtps_pd) (s.x);
+ res2.x = INTRINSIC (_mask_cvtps_pd) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtps_pd) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-1.c
new file mode 100644
index 00000000000..daf701484a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __m256i y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm512_cvtps_ph (x, 0);
+ y = _mm512_maskz_cvtps_ph (4, x, 0);
+ y = _mm512_mask_cvtps_ph (y, 2, x, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-2.c
new file mode 100644
index 00000000000..91ea46a8913
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-2.c
@@ -0,0 +1,82 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) val;
+ UNION_TYPE (AVX512F_LEN_HALF, i_w) res1,res2,res3;
+ MASK_TYPE mask = MASK_VALUE;
+ short exp[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ val.a[0] = 1;
+ val.a[1] = 2;
+ val.a[2] = 4;
+ val.a[3] = 8;
+#if AVX512F_LEN > 128
+ val.a[4] = -1;
+ val.a[5] = -2;
+ val.a[6] = -4;
+ val.a[7] = -8;
+#endif
+#if AVX512F_LEN > 256
+ val.a[8] = 1;
+ val.a[9] = 2;
+ val.a[10] = 4;
+ val.a[11] = 8;
+ val.a[12] = -1;
+ val.a[13] = -2;
+ val.a[14] = -4;
+ val.a[15] = -8;
+#endif
+
+ exp[0] = 0x3c00;
+ exp[1] = 0x4000;
+ exp[2] = 0x4400;
+ exp[3] = 0x4800;
+#if AVX512F_LEN > 128
+ exp[4] = 0xbc00;
+ exp[5] = 0xc000;
+ exp[6] = 0xc400;
+ exp[7] = 0xc800;
+#endif
+#if AVX512F_LEN > 256
+ exp[8] = 0x3c00;
+ exp[9] = 0x4000;
+ exp[10] = 0x4400;
+ exp[11] = 0x4800;
+ exp[12] = 0xbc00;
+ exp[13] = 0xc000;
+ exp[14] = 0xc400;
+ exp[15] = 0xc800;
+#endif
+
+ res1.x = _mm512_cvtps_ph (val.x, 0);
+ res2.x = _mm512_mask_cvtps_ph (res2.x, mask, val.x, 0);
+ res3.x = _mm512_maskz_cvtps_ph (mask, val.x, 0);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res1, exp))
+ abort ();
+
+ MASK_MERGE (i_w) (exp, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res2, exp))
+ abort ();
+
+ MASK_ZERO (i_w) (exp, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res3, exp))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2udq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2udq-1.c
new file mode 100644
index 00000000000..dfc08ab10d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2udq-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtps2udq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2udq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2udq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2udq\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2udq\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2udq\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 s;
+volatile __m512i res;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtps_epu32 (s);
+ res = _mm512_mask_cvtps_epu32 (res, m, s);
+ res = _mm512_maskz_cvtps_epu32 (m, s);
+ res = _mm512_cvt_roundps_epu32 (s, _MM_FROUND_TO_NEAREST_INT);
+ res = _mm512_mask_cvt_roundps_epu32 (res, m, s, _MM_FROUND_TO_NEG_INF);
+ res = _mm512_maskz_cvt_roundps_epu32 (m, s, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2udq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2udq-2.c
new file mode 100644
index 00000000000..16150aa8302
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2udq-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned *r, float *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ r[i] = (unsigned) (s[i] + 0.5);
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN,) src;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1.5 + 34.67 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtps_epu32) (src.x);
+ res2.x = INTRINSIC (_mask_cvtps_epu32) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtps_epu32) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2si-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2si-1.c
new file mode 100644
index 00000000000..84a10da4b6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2si-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvtsd2si\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile unsigned y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvt_roundsd_i32 (x, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2si64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2si64-1.c
new file mode 100644
index 00000000000..ca2ff58b3b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2si64-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvtsd2siq\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile unsigned long long y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvt_roundsd_i64 (x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2ss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2ss-1.c
new file mode 100644
index 00000000000..a5164e75077
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2ss-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtsd2ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtsd2ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtsd2ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtsd2ss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtsd2ss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 s1, r;
+volatile __m128d s2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ r = _mm_mask_cvtsd_ss (r, m, s1, s2);
+ r = _mm_maskz_cvtsd_ss (m, s1, s2);
+ r = _mm_cvt_roundsd_ss (s1, s2, _MM_FROUND_TO_NEAREST_INT);
+ r = _mm_mask_cvt_roundsd_ss (r, m, s1, s2, _MM_FROUND_TO_NEG_INF);
+ r = _mm_maskz_cvt_roundsd_ss (m, s1, s2, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2ss-2.c
new file mode 100644
index 00000000000..fee5df9d435
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2ss-2.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vcvtsd2ss (float *s1, double *s2, float *r)
+{
+ r[0] = (float) s2[0];
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ union128 s1, res1, res2;
+ union128d s2;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46);
+ s2.x = _mm_set_pd (4560.987, -2301.987);
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_cvtsd_ss (res1.x, mask, s1.x, s2.x);
+ res2.x = _mm_maskz_cvtsd_ss (mask, s1.x, s2.x);
+
+ compute_vcvtsd2ss (s1.a, s2.a, res_ref);
+
+ MASK_MERGE () (res_ref, mask, 1);
+ if (check_union128 (res1, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, 1);
+ if (check_union128 (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi-1.c
new file mode 100644
index 00000000000..c5e80aed47d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvtsd2usi\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtsd2usi\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile unsigned y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvtsd_u32 (x);
+ y = _mm_cvt_roundsd_u32 (x, _MM_FROUND_TO_NEG_INF);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi-2.c
new file mode 100644
index 00000000000..e53012446e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi-2.c
@@ -0,0 +1,20 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void static
+avx512f_test (void)
+{
+ union128d s1;
+ unsigned int d;
+ unsigned int e;
+
+ s1.x = _mm_set_pd (24.43, 68.346);
+ d = _mm_cvtsd_u32 (s1.x);
+ e = (unsigned int)(s1.a[0] + 0.5);
+
+ if (e != d)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi64-1.c
new file mode 100644
index 00000000000..9edecd31d8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi64-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvtsd2usi\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtsd2usi\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile unsigned long long y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvtsd_u64 (x);
+ y = _mm_cvt_roundsd_u64 (x, _MM_FROUND_TO_POS_INF);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi64-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi64-2.c
new file mode 100644
index 00000000000..92843d9e361
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2usi64-2.c
@@ -0,0 +1,20 @@
+/* { dg-do run { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void static
+avx512f_test (void)
+{
+ union128d s1;
+ unsigned long long d;
+ unsigned long long e;
+
+ s1.x = _mm_set_pd (24.43, 68.346);
+ d = _mm_cvtsd_u64 (s1.x);
+ e = (unsigned long long)(s1.a[0] + 0.5);
+
+ if (e != d)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtsi2sd64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsi2sd64-1.c
new file mode 100644
index 00000000000..2d49094131e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsi2sd64-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtsi2sdq\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile long long n;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_cvt_roundi64_sd (x, n, _MM_FROUND_TO_POS_INF);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtsi2ss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsi2ss-1.c
new file mode 100644
index 00000000000..9768a570169
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsi2ss-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtsi2ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile int n;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_cvt_roundi32_ss (x, n, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtsi2ss64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsi2ss64-1.c
new file mode 100644
index 00000000000..c9d2daf363f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsi2ss64-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtsi2ssq\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile long long n;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_cvt_roundi64_ss (x, n, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2sd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2sd-1.c
new file mode 100644
index 00000000000..483486f621e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2sd-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtss2sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtss2sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtss2sd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtss2sd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtss2sd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d s1, r;
+volatile __m128 s2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ r = _mm_mask_cvtss_sd (r, m, s1, s2);
+ r = _mm_maskz_cvtss_sd (m, s1, s2);
+ r = _mm_cvt_roundss_sd (s1, s2, _MM_FROUND_NO_EXC);
+ r = _mm_mask_cvt_roundss_sd (r, m, s1, s2, _MM_FROUND_NO_EXC);
+ r = _mm_maskz_cvt_roundss_sd (m, s1, s2, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2sd-2.c
new file mode 100644
index 00000000000..aee4b01d6d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2sd-2.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vcvtss2sd (double *s1, float *s2, double *r)
+{
+ r[0] = (double) s2[0];
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ union128d s1, res1, res2;
+ union128 s2;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ s1.x = _mm_set_pd (4560.987, -2301.987);
+ s2.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46);
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_cvtss_sd (res1.x, mask, s1.x, s2.x);
+ res2.x = _mm_maskz_cvtss_sd (mask, s1.x, s2.x);
+
+ compute_vcvtss2sd (s1.a, s2.a, res_ref);
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+ if (check_union128d (res1, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+ if (check_union128d (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2si-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2si-1.c
new file mode 100644
index 00000000000..1e52fea6396
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2si-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvtss2si\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile unsigned y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvt_roundss_i32 (x, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2si64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2si64-1.c
new file mode 100644
index 00000000000..bc3e301e231
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2si64-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvtss2siq\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile unsigned long long y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvt_roundss_i64 (x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi-1.c
new file mode 100644
index 00000000000..70fcfe82c39
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvtss2usi\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtss2usi\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile unsigned y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvtss_u32 (x);
+ y = _mm_cvt_roundss_u32 (x, _MM_FROUND_TO_NEG_INF);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi-2.c
new file mode 100644
index 00000000000..bdfab830956
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi-2.c
@@ -0,0 +1,20 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void static
+avx512f_test (void)
+{
+ union128 s1;
+ unsigned int d;
+ unsigned int e;
+
+ s1.x = _mm_set_ps (24.43, 68.346, 35.7765, 34508.51);
+ d = _mm_cvtss_u32 (s1.x);
+ e = (unsigned int)(s1.a[0] + 0.5);
+
+ if (e != d)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi64-1.c
new file mode 100644
index 00000000000..0dd46cd9347
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi64-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvtss2usi\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtss2usi\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile unsigned long long y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvtss_u64 (x);
+ y = _mm_cvt_roundss_u64 (x, _MM_FROUND_TO_POS_INF);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi64-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi64-2.c
new file mode 100644
index 00000000000..d19da31719c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2usi64-2.c
@@ -0,0 +1,20 @@
+/* { dg-do run { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void static
+avx512f_test (void)
+{
+ union128 s1;
+ unsigned long long d;
+ unsigned long long e;
+
+ s1.x = _mm_set_ps (24.43, 68.346, 12.34, 80.67);
+ d = _mm_cvtss_u64 (s1.x);
+ e = (unsigned long long)(s1.a[0] + 0.5);
+
+ if (e != d)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2dq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2dq-1.c
new file mode 100644
index 00000000000..5fad1e354c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2dq-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d s;
+volatile __m256i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvttpd_epi32 (s);
+ res = _mm512_mask_cvttpd_epi32 (res, m, s);
+ res = _mm512_maskz_cvttpd_epi32 (m, s);
+ res = _mm512_cvtt_roundpd_epi32 (s, _MM_FROUND_NO_EXC);
+ res = _mm512_mask_cvtt_roundpd_epi32 (res, m, s, _MM_FROUND_NO_EXC);
+ res = _mm512_maskz_cvtt_roundpd_epi32 (m, s, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2dq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2dq-2.c
new file mode 100644
index 00000000000..775665751c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2dq-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SRC_SIZE ((AVX512F_LEN) / 64)
+#include "avx512f-mask-type.h"
+#define DST_SIZE ((AVX512F_LEN_HALF) / 32)
+
+static void
+CALC (double *s, int *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ r[i] = (int) s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN_HALF, i_d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_cvttpd_epi32) (s.x);
+ res2.x = INTRINSIC (_mask_cvttpd_epi32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttpd_epi32) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2udq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2udq-1.c
new file mode 100644
index 00000000000..36f2e40c59b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2udq-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvttpd2udq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d s;
+volatile __m256i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvttpd_epu32 (s);
+ res = _mm512_mask_cvttpd_epu32 (res, m, s);
+ res = _mm512_maskz_cvttpd_epu32 (m, s);
+ res = _mm512_cvtt_roundpd_epu32 (s, _MM_FROUND_NO_EXC);
+ res = _mm512_mask_cvtt_roundpd_epu32 (res, m, s, _MM_FROUND_NO_EXC);
+ res = _mm512_maskz_cvtt_roundpd_epu32 (m, s, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2udq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2udq-2.c
new file mode 100644
index 00000000000..2a8bf204b67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttpd2udq-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SRC_SIZE ((AVX512F_LEN) / 64)
+#include "avx512f-mask-type.h"
+#define DST_SIZE ((AVX512F_LEN_HALF) / 32)
+
+static void
+CALC (double *s, unsigned *r)
+{
+ int i;
+
+ for (i = 0; i < DST_SIZE; i++)
+ {
+ r[i] = (unsigned) s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN_HALF, i_d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned res_ref[DST_SIZE] = { 0 };
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000);
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_cvttpd_epu32) (s.x);
+ res2.x = INTRINSIC (_mask_cvttpd_epu32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttpd_epu32) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttps2dq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttps2dq-1.c
new file mode 100644
index 00000000000..a156dbee9f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttps2dq-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvttps2dq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 s;
+volatile __m512i res;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvttps_epi32 (s);
+ res = _mm512_mask_cvttps_epi32 (res, m, s);
+ res = _mm512_maskz_cvttps_epi32 (m, s);
+ res = _mm512_cvtt_roundps_epi32 (s, _MM_FROUND_NO_EXC);
+ res = _mm512_mask_cvtt_roundps_epi32 (res, m, s, _MM_FROUND_NO_EXC);
+ res = _mm512_maskz_cvtt_roundps_epi32 (m, s, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttps2dq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttps2dq-2.c
new file mode 100644
index 00000000000..33f62e6afce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttps2dq-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, float *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ r[i] = (int) s[i];
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN,) src;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ res2.a[i] = DEFAULT_VALUE;
+ src.a[i] = 1.5 + 34.67 * i * sign;
+ sign = sign * -1;
+ }
+
+ res1.x = INTRINSIC (_cvttps_epi32) (src.x);
+ res2.x = INTRINSIC (_mask_cvttps_epi32) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvttps_epi32) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttps2udq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttps2udq-1.c
new file mode 100644
index 00000000000..ffbfdfca328
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttps2udq-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvttps2udq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 s;
+volatile __m512i res;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvttps_epu32 (s);
+ res = _mm512_mask_cvttps_epu32 (res, m, s);
+ res = _mm512_maskz_cvttps_epu32 (m, s);
+ res = _mm512_cvtt_roundps_epu32 (s, _MM_FROUND_NO_EXC);
+ res = _mm512_mask_cvtt_roundps_epu32 (res, m, s, _MM_FROUND_NO_EXC);
+ res = _mm512_maskz_cvtt_roundps_epu32 (m, s, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttps2udq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttps2udq-2.c
new file mode 100644
index 00000000000..40d63c7c46d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttps2udq-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned *r, float *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ r[i] = (unsigned) s[i];
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN,) src;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1.5 + 34.67 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvttps_epu32) (src.x);
+ res2.x = INTRINSIC (_mask_cvttps_epu32) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvttps_epu32) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si-1.c
new file mode 100644
index 00000000000..e813a24a0a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvttsd2si\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttsd2si\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvttsd_i32 (x);
+ y = _mm_cvtt_roundsd_i32 (x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si-2.c
new file mode 100644
index 00000000000..a447a873421
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si-2.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static int
+__attribute__ ((noinline, unused))
+test (__m128d x)
+{
+ return _mm_cvttsd_i32 (x);
+}
+
+static void
+avx512f_test (void)
+{
+ union128d s1;
+ int res, res_ref;
+
+ s1.x = _mm_set_pd (123.321, 456.987);
+ res = test (s1.x);
+ res_ref = (int) s1.a[0];
+
+ if (res != res_ref)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si64-1.c
new file mode 100644
index 00000000000..a3b870c1004
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si64-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvttsd2siq\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttsd2siq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile long long y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvttsd_i64 (x);
+ y = _mm_cvtt_roundsd_i64 (x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si64-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si64-2.c
new file mode 100644
index 00000000000..7b759c1fa9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si64-2.c
@@ -0,0 +1,28 @@
+/* { dg-do run { target { ! { ia32 } } } } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static int
+__attribute__ ((noinline, unused))
+test (__m128d x)
+{
+ return _mm_cvttsd_i64 (x);
+}
+
+static void
+avx512f_test (void)
+{
+ union128d s1;
+ long long res, res_ref;
+
+ s1.x = _mm_set_pd (123.321, 456.987);
+ res = test (s1.x);
+ res_ref = (long long) s1.a[0];
+
+ if (res != res_ref)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi-1.c
new file mode 100644
index 00000000000..3a88517a738
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvttsd2usi\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttsd2usi\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile unsigned y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvttsd_u32 (x);
+ y = _mm_cvtt_roundsd_u32 (x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi-2.c
new file mode 100644
index 00000000000..00f7eb6e5d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi-2.c
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static unsigned int
+__attribute__((noinline, unused))
+test (union128d s1)
+{
+ return _mm_cvttsd_u32 (s1.x);
+}
+
+void static
+avx512f_test (void)
+{
+ union128d s1;
+ unsigned int d;
+ unsigned int e;
+
+ s1.x = _mm_set_pd (24.43, 68.346);
+ d = test (s1);
+ e = (unsigned int)s1.a[0];
+
+ if (e != d)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi64-1.c
new file mode 100644
index 00000000000..87bbcb7be6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi64-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvttsd2usi\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttsd2usi\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile unsigned long long y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvttsd_u64 (x);
+ y = _mm_cvtt_roundsd_u64 (x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi64-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi64-2.c
new file mode 100644
index 00000000000..4aa45ef8264
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2usi64-2.c
@@ -0,0 +1,27 @@
+/* { dg-do run { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static unsigned long long
+__attribute__((noinline, unused))
+test (union128d s1)
+{
+ return _mm_cvttsd_u64 (s1.x);
+}
+
+void static
+avx512f_test (void)
+{
+ union128d s1;
+ unsigned long long d;
+ unsigned long long e;
+
+ s1.x = _mm_set_pd (24.43, 68.346);
+ d = test (s1);
+ e = (unsigned long long)s1.a[0];
+
+ if (e != d)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si-1.c
new file mode 100644
index 00000000000..7669a1729a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvttss2si\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttss2si\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvttss_i32 (x);
+ y = _mm_cvtt_roundss_i32 (x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si-2.c
new file mode 100644
index 00000000000..2aa62c07140
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si-2.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static int
+__attribute__ ((noinline, unused))
+test (__m128 x)
+{
+ return _mm_cvttss_i32 (x);
+}
+
+static void
+avx512f_test (void)
+{
+ union128 s1;
+ int res, res_ref;
+
+ s1.x = _mm_set_ps (24.43, 68.346, 43.35, 546.46);
+ res = test (s1.x);
+ res_ref = (int) s1.a[0];
+
+ if (res != res_ref)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si64-1.c
new file mode 100644
index 00000000000..4888d6d1d9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si64-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvttss2siq\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttss2siq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile long long y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvttss_i64 (x);
+ y = _mm_cvtt_roundss_i64 (x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si64-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si64-2.c
new file mode 100644
index 00000000000..cf33b997a8e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si64-2.c
@@ -0,0 +1,28 @@
+/* { dg-do run { target { ! { ia32 } } } } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static int
+__attribute__ ((noinline, unused))
+test (__m128 x)
+{
+ return _mm_cvttss_i64 (x);
+}
+
+static void
+avx512f_test (void)
+{
+ union128 s1;
+ long long res, res_ref;
+
+ s1.x = _mm_set_ps (24.43, 68.346, 43.35, 546.46);
+ res = test (s1.x);
+ res_ref = (long long) s1.a[0];
+
+ if (res != res_ref)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi-1.c
new file mode 100644
index 00000000000..b270276352c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvttss2usi\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttss2usi\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile unsigned y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvttss_u32 (x);
+ y = _mm_cvtt_roundss_u32 (x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi-2.c
new file mode 100644
index 00000000000..4d19104776b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi-2.c
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static unsigned int
+__attribute__((noinline, unused))
+test (union128 s1)
+{
+ return _mm_cvttss_u32 (s1.x);
+}
+
+void static
+avx512f_test (void)
+{
+ union128 s1;
+ unsigned int d;
+ unsigned int e;
+
+ s1.x = _mm_set_ps (24.43, 68.346, 45.12, 90.97);
+ d = test (s1);
+ e = (unsigned int)s1.a[0];
+
+ if (e != d)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi64-1.c
new file mode 100644
index 00000000000..7c3b473c3b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi64-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vcvttss2usi\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttss2usi\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile unsigned long long y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm_cvttss_u64 (x);
+ y = _mm_cvtt_roundss_u64 (x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi64-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi64-2.c
new file mode 100644
index 00000000000..85f55d6cd7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2usi64-2.c
@@ -0,0 +1,27 @@
+/* { dg-do run { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static unsigned long long
+__attribute__((noinline, unused))
+test (union128 s1)
+{
+ return _mm_cvttss_u64 (s1.x);
+}
+
+void static
+avx512f_test (void)
+{
+ union128 s1;
+ unsigned long long d;
+ unsigned long long e;
+
+ s1.x = _mm_set_ps (24.43, 68.346, 10.756, 89.145);
+ d = test (s1);
+ e = (unsigned long long)s1.a[0];
+
+ if (e != d)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2pd-1.c
new file mode 100644
index 00000000000..933e785e866
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2pd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtudq2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i s;
+volatile __m512d res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepu32_pd (s);
+ res = _mm512_mask_cvtepu32_pd (res, m, s);
+ res = _mm512_maskz_cvtepu32_pd (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2pd-2.c
new file mode 100644
index 00000000000..e96a4847c74
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2pd-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SRC_SIZE ((AVX512F_LEN_HALF) / 32)
+#include "avx512f-mask-type.h"
+#define DST_SIZE ((AVX512F_LEN) / 64)
+
+static void
+CALC (unsigned *s, double *r)
+{
+ int i;
+
+ for (i = 0; i < DST_SIZE; i++)
+ {
+ r[i] = (double) s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN_HALF, i_d) s;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[DST_SIZE];
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 123456 * (i + 2000);
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_cvtepu32_pd) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepu32_pd) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepu32_pd) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2ps-1.c
new file mode 100644
index 00000000000..a42a58890a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2ps-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m512 res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepu32_ps (s);
+ res = _mm512_mask_cvtepu32_ps (res, m, s);
+ res = _mm512_maskz_cvtepu32_ps (m, s);
+ res = _mm512_cvt_roundepu32_ps (s, _MM_FROUND_TO_NEAREST_INT);
+ res = _mm512_mask_cvt_roundepu32_ps (res, m, s, _MM_FROUND_TO_NEG_INF);
+ res = _mm512_maskz_cvt_roundepu32_ps (m, s, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2ps-2.c
new file mode 100644
index 00000000000..e9e11325146
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtudq2ps-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (unsigned *s, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (float) s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s;
+ UNION_TYPE (AVX512F_LEN, ) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123456 * (i + 2000);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtepu32_ps) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepu32_ps) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepu32_ps) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd-1.c
new file mode 100644
index 00000000000..b00c321c500
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "vcvtusi2sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile unsigned n;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_cvtu32_sd (x, n);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd-2.c
new file mode 100644
index 00000000000..2100cbeb423
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd-2.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static void
+ __attribute__ ((noinline, unused))
+compute_vcvtusi2sd (double *s1, unsigned s2, double *r)
+{
+ r[0] = (double) s2;
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ union128d s1, res;
+ unsigned s2;
+ double res_ref[2];
+
+ s1.x = _mm_set_pd (-24.43, -43.35);
+ s2 = 0xFEDCA987;
+
+ res.x = _mm_cvtu32_sd (s1.x, s2);
+
+ compute_vcvtusi2sd (s1.a, s2, res_ref);
+
+ if (check_union128d (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd64-1.c
new file mode 100644
index 00000000000..097cfa27b51
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd64-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtusi2sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtusi2sd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile unsigned long long n;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_cvtu64_sd (x, n);
+ x = _mm_cvt_roundu64_sd (x, n, _MM_FROUND_TO_POS_INF);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd64-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd64-2.c
new file mode 100644
index 00000000000..997e21bb54d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd64-2.c
@@ -0,0 +1,31 @@
+/* { dg-do run { target { ! { ia32 } } } } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static void
+ __attribute__ ((noinline, unused))
+compute_vcvtusi2sd (double *s1, unsigned long long s2, double *r)
+{
+ r[0] = (double) s2;
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ union128d s1, res;
+ unsigned long long s2;
+ double res_ref[4];
+
+ s1.x = _mm_set_pd (-24.43, -43.35);
+ s2 = 0xFEDCBA9876543210;
+
+ res.x = _mm_cvtu64_sd (s1.x, s2);
+
+ compute_vcvtusi2sd (s1.a, s2, res_ref);
+
+ if (check_union128d (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss-1.c
new file mode 100644
index 00000000000..93b53fd543e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtusi2ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtusi2ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile unsigned n;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_cvtu32_ss (x, n);
+ x = _mm_cvt_roundu32_ss (x, n, _MM_FROUND_TO_NEAREST_INT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss-2.c
new file mode 100644
index 00000000000..b5f67dd0ba0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss-2.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static void
+ __attribute__ ((noinline, unused))
+compute_vcvtusi2ss (float *s1, unsigned s2, float *r)
+{
+ r[0] = (float) s2;
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ union128 s1, res;
+ unsigned s2;
+ float res_ref[4];
+
+ s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46);
+ s2 = 0xFEDCA987;
+
+ res.x = _mm_cvtu32_ss (s1.x, s2);
+
+ compute_vcvtusi2ss (s1.a, s2, res_ref);
+
+ if (check_union128 (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss64-1.c
new file mode 100644
index 00000000000..f1f691e88d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss64-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vcvtusi2ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtusi2ss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile unsigned long long n;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_cvtu64_ss (x, n);
+ x = _mm_cvt_roundu64_ss (x, n, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss64-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss64-2.c
new file mode 100644
index 00000000000..eeb499aac9f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss64-2.c
@@ -0,0 +1,33 @@
+/* { dg-do run { target { ! { ia32 } } } } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+static void
+ __attribute__ ((noinline, unused))
+compute_vcvtusi2ss (float *s1, unsigned long long s2, float *r)
+{
+ r[0] = (float) s2;
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ union128 s1, res;
+ unsigned long long s2;
+ float res_ref[4];
+
+ s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46);
+ s2 = 0xFEDCBA9876543210;
+
+ res.x = _mm_cvtu64_ss (s1.x, s2);
+
+ compute_vcvtusi2ss (s1.a, s2, res_ref);
+
+ if (check_union128 (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivpd-1.c
new file mode 100644
index 00000000000..660c9566342
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivpd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vdivpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vdivpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vdivpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vdivpd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdivpd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdivpd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_div_pd (x, x);
+ x = _mm512_mask_div_pd (x, m, x, x);
+ x = _mm512_maskz_div_pd (m, x, x);
+ x = _mm512_div_round_pd (x, x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_div_round_pd (x, m, x, x, _MM_FROUND_TO_NEG_INF);
+ x = _mm512_maskz_div_round_pd (m, x, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivpd-2.c
new file mode 100644
index 00000000000..cceba78d926
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivpd-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (double *r, double *s1, double *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] / s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign + 1.0;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_div_pd) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_div_pd) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_div_pd) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivps-1.c
new file mode 100644
index 00000000000..8274440f7ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vdivps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vdivps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vdivps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vdivps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdivps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdivps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_div_ps (x, x);
+ x = _mm512_mask_div_ps (x, m, x, x);
+ x = _mm512_maskz_div_ps (m, x, x);
+ x = _mm512_div_round_ps (x, x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_div_round_ps (x, m, x, x, _MM_FROUND_TO_POS_INF);
+ x = _mm512_maskz_div_round_ps (m, x, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivps-2.c
new file mode 100644
index 00000000000..9ba177c772e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivps-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (float *r, float *s1, float *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] / s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN,) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign + 1.0;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_div_ps) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_div_ps) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_div_ps) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-1.c
new file mode 100644
index 00000000000..ea52a3867aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vdivsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vdivsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vdivsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdivsd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdivsd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_div_sd (x1, m, x1, x2);
+ x1 = _mm_maskz_div_sd (m, x1, x2);
+ x1 = _mm_div_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm_mask_div_round_sd (x1, m, x1, x2, _MM_FROUND_TO_NEG_INF);
+ x1 = _mm_maskz_div_round_sd (m, x1, x2, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-2.c
new file mode 100644
index 00000000000..b37877b1bf7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivsd-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vdivsd (double *s1, double *s2, double *r)
+{
+ r[0] = s1[0] / s2[0];
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ union128d s1, s2, res1, res2;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[2];
+
+ s1.x = _mm_set_pd (-3.0, 111.111);
+ s2.x = _mm_set_pd (222.222, -4.5);
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_div_sd (res1.x, mask, s1.x, s2.x);
+ res2.x = _mm_maskz_div_sd (mask, s1.x, s2.x);
+
+ compute_vdivsd (s1.a, s2.a, res_ref);
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+ if (check_fp_union128d (res1, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+ if (check_fp_union128d (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivss-1.c
new file mode 100644
index 00000000000..111e1ff5d2e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivss-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vdivss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vdivss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vdivss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdivss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdivss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_div_ss (x1, m, x1, x2);
+ x1 = _mm_maskz_div_ss (m, x1, x2);
+ x1 = _mm_div_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm_mask_div_round_ss (x1, m, x1, x2, _MM_FROUND_TO_NEG_INF);
+ x1 = _mm_maskz_div_round_ss (m, x1, x2, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vdivss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vdivss-2.c
new file mode 100644
index 00000000000..f64a363755b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vdivss-2.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vdivss (float *s1, float *s2, float *r)
+{
+ r[0] = s1[0] / s2[0];
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ union128 s1, s2, res1, res2;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref[4];
+
+ s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46);
+ s2.x = _mm_set_ps (222.22, -333.33, 444.44, -4.56);
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_div_ss (res1.x, mask, s1.x, s2.x);
+ res2.x = _mm_maskz_div_ss (mask, s1.x, s2.x);
+
+ compute_vdivss (s1.a, s2.a, res_ref);
+
+ MASK_MERGE () (res_ref, mask, 1);
+ if (check_fp_union128 (res1, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, 1);
+ if (check_fp_union128 (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vec-init.c b/gcc/testsuite/gcc.target/i386/avx512f-vec-init.c
new file mode 100644
index 00000000000..0628dc6b1b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vec-init.c
@@ -0,0 +1,140 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+%zmm" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastw" 2 } } */
+/* { dg-final { scan-assembler-times "vbroadcastss" 2 } } */
+/* { dg-final { scan-assembler-times "vbroadcastsd" 1 } } */
+/* { dg-final { scan-assembler-times "vmovddup" 1 } } */
+
+#include <x86intrin.h>
+
+typedef char __v64qi __attribute__ ((vector_size (64)));
+typedef short __v32hi __attribute__ ((vector_size (64)));
+
+__v64qi foo_1 (char c)
+{
+ __v64qi v1 = {
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c
+ };
+
+ return v1;
+}
+
+__v32hi foo_2 (short c)
+{
+ __v32hi v1 = {
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c
+ };
+
+ return v1;
+}
+
+__v16si foo_3 (int c)
+{
+ __v16si v1 = {
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c
+ };
+
+ return v1;
+}
+
+__v8di foo_4 (long long c)
+{
+ __v8di v1 = {
+ c, c, c, c, c, c, c, c
+ };
+
+ return v1;
+}
+
+__v32qi foo_5 (char c)
+{
+ __v32qi v1 = {
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c
+ };
+
+ return v1;
+}
+
+__v16hi foo_6 (short c)
+{
+ __v16hi v1 = {
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c
+ };
+
+ return v1;
+}
+
+__v8si foo_7 (int c)
+{
+ __v8si v1 = {
+ c, c, c, c, c, c, c, c
+ };
+
+ return v1;
+}
+
+__v4di foo_8 (long long c)
+{
+ __v4di v1 = {
+ c, c, c, c
+ };
+
+ return v1;
+}
+
+
+__v16qi foo_9 (char c)
+{
+ __v16qi v1 = {
+ c, c, c, c, c, c, c, c,
+ c, c, c, c, c, c, c, c
+ };
+
+ return v1;
+}
+
+__v8hi foo_10(short c)
+{
+ __v8hi v1 = {
+ c, c, c, c, c, c, c, c
+ };
+
+ return v1;
+}
+
+__v4si foo_11 (int c)
+{
+ __v4si v1 = {
+ c, c, c, c
+ };
+
+ return v1;
+}
+
+__v2di foo_12 (long long c)
+{
+ __v2di v1 = {
+ c, c
+ };
+
+ return v1;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vec-unpack.c b/gcc/testsuite/gcc.target/i386/avx512f-vec-unpack.c
new file mode 100644
index 00000000000..8dcdac7b063
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vec-unpack.c
@@ -0,0 +1,127 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512f" } */
+
+long long *D;
+int *S;
+short *H;
+char *Q;
+
+long long foo_unpack_1 (int low, int high, int ind)
+{
+ int i;
+
+ for (i = low; i <= high; i++)
+ D[i] *= S[i];
+
+ return D[ind];
+}
+
+long long foo_unpack_2 (int low, int high, int ind)
+{
+ int i;
+
+ for (i = low; i <= high; i++)
+ D[i] *= H[i];
+
+ return D[ind];
+}
+
+long long foo_unpack_3 (int low, int high, int ind)
+{
+ int i;
+
+ for (i = low; i <= high; i++)
+ D[i] *= Q[i];
+
+ return D[ind];
+}
+
+int foo_unpack_4 (int low, int high, int ind)
+{
+ int i;
+
+ for (i = low; i <= high; i++)
+ S[i] *= H[i];
+
+ return S[ind];
+}
+
+int foo_unpack_5 (int low, int high, int ind)
+{
+ int i;
+
+ for (i = low; i <= high; i++)
+ S[i] *= Q[i];
+
+ return S[ind];
+}
+
+short foo_unpack_6 (int low, int high, int ind)
+{
+ int i;
+
+ for (i = low; i <= high; i++)
+ H[i] *= Q[i];
+
+ return H[ind];
+}
+
+int foo_expand_1 (int low, int high, int ind)
+{
+ int i;
+
+ for (i = low; i <= high; i++)
+ S[i] *= D[i];
+
+ return S[ind];
+}
+
+short foo_expand_2 (int low, int high, int ind)
+{
+ int i;
+
+ for (i = low; i <= high; i++)
+ H[i] *= D[i];
+
+ return H[ind];
+}
+
+char foo_expand_3 (int low, int high, int ind)
+{
+ int i;
+
+ for (i = low; i <= high; i++)
+ Q[i] *= D[i];
+
+ return Q[ind];
+}
+
+short foo_expand_4 (int low, int high, int ind)
+{
+ int i;
+
+ for (i = low; i <= high; i++)
+ H[i] *= S[i];
+
+ return H[ind];
+}
+
+char foo_expand_5 (int low, int high, int ind)
+{
+ int i;
+
+ for (i = low; i <= high; i++)
+ Q[i] *= S[i];
+
+ return Q[ind];
+}
+
+char foo_expand_6 (int low, int high, int ind)
+{
+ int i;
+
+ for (i = low; i <= high; i++)
+ Q[i] *= H[i];
+
+ return Q[ind];
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vexpandpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vexpandpd-1.c
new file mode 100644
index 00000000000..fc121656f20
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vexpandpd-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+
+#include <immintrin.h>
+
+double *p;
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask_expand_pd (x, m, x);
+ x = _mm512_maskz_expand_pd (m, x);
+
+ x = _mm512_mask_expandloadu_pd (x, m, p);
+ x = _mm512_maskz_expandloadu_pd (m, p);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vexpandpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vexpandpd-2.c
new file mode 100644
index 00000000000..61071f41339
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vexpandpd-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (double *s, double *r, MASK_TYPE mask)
+{
+ int i, k;
+
+ for (i = 0, k = 0; i < SIZE; i++)
+ {
+ if (mask & (1 << i))
+ r[i] = s[k++];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ double s2[SIZE];
+ double res_ref1[SIZE];
+ double res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 123.456 * (i + 200) * sign;
+ s2[i] = 789.012 * (i + 300) * sign;
+ res1.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_mask_expand_pd) (res1.x, mask, s1.x);
+ res2.x = INTRINSIC (_maskz_expand_pd) (mask, s1.x);
+ res3.x = INTRINSIC (_mask_expandloadu_pd) (res3.x, mask, s2);
+ res4.x = INTRINSIC (_maskz_expandloadu_pd) (mask, s2);
+
+ CALC (s1.a, res_ref1, mask);
+ CALC (s2, res_ref2, mask);
+
+ MASK_MERGE (d) (res_ref1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref1))
+ abort ();
+
+ MASK_ZERO (d) (res_ref1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref1))
+ abort ();
+
+ MASK_MERGE (d) (res_ref2, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref2))
+ abort ();
+
+ MASK_ZERO (d) (res_ref2, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res4, res_ref2))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vexpandps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vexpandps-1.c
new file mode 100644
index 00000000000..fcf87642b40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vexpandps-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+
+#include <immintrin.h>
+
+float *p;
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask_expand_ps (x, m, x);
+ x = _mm512_maskz_expand_ps (m, x);
+
+ x = _mm512_mask_expandloadu_ps (x, m, p);
+ x = _mm512_maskz_expandloadu_ps (m, p);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vexpandps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vexpandps-2.c
new file mode 100644
index 00000000000..68808de9918
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vexpandps-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (float *s, float *r, MASK_TYPE mask)
+{
+ int i, k;
+
+ for (i = 0, k = 0; i < SIZE; i++)
+ {
+ if (mask & (1 << i))
+ r[i] = s[k++];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s1, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ float s2[SIZE];
+ float res_ref1[SIZE];
+ float res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 123.456 * (i + 200) * sign;
+ s2[i] = 789.012 * (i + 300) * sign;
+ res1.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_mask_expand_ps) (res1.x, mask, s1.x);
+ res2.x = INTRINSIC (_maskz_expand_ps) (mask, s1.x);
+ res3.x = INTRINSIC (_mask_expandloadu_ps) (res3.x, mask, s2);
+ res4.x = INTRINSIC (_maskz_expandloadu_ps) (mask, s2);
+
+ CALC (s1.a, res_ref1, mask);
+ CALC (s2, res_ref2, mask);
+
+ MASK_MERGE () (res_ref1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref1))
+ abort ();
+
+ MASK_ZERO () (res_ref1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, res_ref1))
+ abort ();
+
+ MASK_MERGE () (res_ref2, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res3, res_ref2))
+ abort ();
+
+ MASK_ZERO () (res_ref2, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res4, res_ref2))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vextractf32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vextractf32x4-1.c
new file mode 100644
index 00000000000..b32d161ba9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vextractf32x4-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vextractf32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vextractf32x4\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vextractf32x4\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __m128 y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm512_extractf32x4_ps (x, 1);
+ y = _mm512_mask_extractf32x4_ps (y, 2, x, 1);
+ y = _mm512_maskz_extractf32x4_ps (2, x, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vextractf32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vextractf32x4-2.c
new file mode 100644
index 00000000000..26d7c3c6b7f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vextractf32x4-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void
+CALC (UNION_TYPE (AVX512F_LEN,) s1, float *res_ref, int mask)
+{
+ memset (res_ref, 0, 16);
+ memcpy (res_ref, s1.a + mask * 4, 16);
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) s1;
+ union128 res1, res2, res3;
+ float res_ref[4];
+ MASK_TYPE mask = MASK_VALUE;
+ int j;
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s1.a[j] = j * j / 4.56;
+ }
+
+ for (j = 0; j < 4; j++)
+ {
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_extractf32x4_ps) (s1.x, 1);
+ res2.x = INTRINSIC (_mask_extractf32x4_ps) (res2.x, mask, s1.x, 1);
+ res3.x = INTRINSIC (_maskz_extractf32x4_ps) (mask, s1.x, 1);
+ CALC (s1, res_ref, 1);
+
+ if (check_union128 (res1, res_ref))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, 4);
+ if (check_union128 (res2, res_ref))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, 4);
+ if (check_union128 (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vextractf64x4-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vextractf64x4-1.c
new file mode 100644
index 00000000000..6259ac80624
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vextractf64x4-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vextractf64x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vextractf64x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vextractf64x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __m256d y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm512_extractf64x4_pd (x, 1);
+ y = _mm512_maskz_extractf64x4_pd (2, x, 1);
+ y = _mm512_mask_extractf64x4_pd (y, 2, x, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vextractf64x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vextractf64x4-2.c
new file mode 100644
index 00000000000..b73044917b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vextractf64x4-2.c
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O2 -mavx512f" } */
+
+#include <string.h>
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+
+void static
+avx512f_test (void)
+{
+ union512d s1;
+ union256d res1, res2, res3;
+ __mmask8 mask = 0xBA;
+ double res_ref[4];
+ int j;
+
+ for (j = 0; j < 8; j++)
+ {
+ s1.a[j] = j * j / 4.56;
+ }
+
+ for (j = 0; j < 4; j++)
+ {
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ res1.x = _mm512_extractf64x4_pd (s1.x, 0);
+ res2.x = _mm512_mask_extractf64x4_pd (res2.x, mask, s1.x, 0);
+ res3.x = _mm512_maskz_extractf64x4_pd (mask, s1.x, 0);
+
+ memset (res_ref, 0, 32);
+ memcpy (res_ref, s1.a, 32);
+
+ if (check_union256d (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, 4);
+ if (check_union256d (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 4);
+ if (check_union256d (res3, res_ref))
+ abort ();
+
+ res1.x = _mm512_extractf64x4_pd (s1.x, 1);
+ res2.x = _mm512_mask_extractf64x4_pd (res2.x, mask, s1.x, 1);
+ res3.x = _mm512_maskz_extractf64x4_pd (mask, s1.x, 1);
+
+ memset (res_ref, 0, 32);
+ memcpy (res_ref, s1.a + 4, 32);
+
+ if (check_union256d (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, 4);
+ if (check_union256d (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 4);
+ if (check_union256d (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vextracti32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vextracti32x4-1.c
new file mode 100644
index 00000000000..87c92f7b5d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vextracti32x4-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vextracti32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vextracti32x4\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vextracti32x4\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m128i y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm512_extracti32x4_epi32 (x, 1);
+ y = _mm512_mask_extracti32x4_epi32 (y, 2, x, 1);
+ y = _mm512_maskz_extracti32x4_epi32 (2, x, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vextracti32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vextracti32x4-2.c
new file mode 100644
index 00000000000..c82858dba14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vextracti32x4-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void
+CALC (UNION_TYPE (AVX512F_LEN, i_d) s1, int *res_ref, int mask)
+{
+ memset (res_ref, 0, 16);
+ memcpy (res_ref, s1.a + mask * 4, 16);
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1;
+ union128i_d res1, res2, res3;
+ int res_ref[4];
+ MASK_TYPE mask = MASK_VALUE;
+ int j;
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s1.a[j] = j * j / 4.56;
+ }
+
+ for (j = 0; j < 4; j++)
+ {
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_extracti32x4_epi32) (s1.x, 1);
+ res2.x =
+ INTRINSIC (_mask_extracti32x4_epi32) (res2.x, mask, s1.x, 1);
+ res3.x = INTRINSIC (_maskz_extracti32x4_epi32) (mask, s1.x, 1);
+ CALC (s1, res_ref, 1);
+
+ if (check_union128i_d (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, 4);
+ if (check_union128i_d (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, 4);
+ if (check_union128i_d (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vextracti64x4-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vextracti64x4-1.c
new file mode 100644
index 00000000000..71268bcbe52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vextracti64x4-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vextracti64x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vextracti64x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vextracti64x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm512_extracti64x4_epi64 (x, 1);
+ y = _mm512_mask_extracti64x4_epi64 (y, 2, x, 1);
+ y = _mm512_maskz_extracti64x4_epi64 (2, x, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vextracti64x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vextracti64x4-2.c
new file mode 100644
index 00000000000..9753d2461f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vextracti64x4-2.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O2 -mavx512f" } */
+
+#include <string.h>
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+
+void static
+avx512f_test (void)
+{
+ union512i_q s1;
+ union256i_q res1, res2, res3;
+ __mmask8 mask = 0xBA;
+ long long int res_ref[4];
+ int j;
+
+ for (j = 0; j < 8; j++)
+ s1.a[j] = j * j;
+
+ for (j = 0; j < 4; j++)
+ {
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+ res1.x = _mm512_extracti64x4_epi64 (s1.x, 0);
+ res2.x = _mm512_mask_extracti64x4_epi64 (res2.x, mask, s1.x, 0);
+ res3.x = _mm512_maskz_extracti64x4_epi64 (mask, s1.x, 0);
+
+ memset (res_ref, 0, 32);
+ memcpy (res_ref, s1.a, 32);
+
+ if (check_union256i_q (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, 4);
+ if (check_union256i_q (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, 4);
+ if (check_union256i_q (res3, res_ref))
+ abort ();
+
+ res1.x = _mm512_extracti64x4_epi64 (s1.x, 1);
+ res2.x = _mm512_mask_extracti64x4_epi64 (res2.x, mask, s1.x, 1);
+ res3.x = _mm512_maskz_extracti64x4_epi64 (mask, s1.x, 1);
+
+ memset (res_ref, 0, 32);
+ memcpy (res_ref, s1.a + 4, 32);
+
+ if (check_union256i_q (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, 4);
+ if (check_union256i_q (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, 4);
+ if (check_union256i_q (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmpd-1.c
new file mode 100644
index 00000000000..e452ebcffd0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmpd-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vfixupimmpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vfixupimmpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfixupimmpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfixupimmpd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vfixupimmpd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfixupimmpd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x1, x2;
+volatile __m512i y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_fixupimm_pd (x1, x2, y, 3);
+ x1 = _mm512_mask_fixupimm_pd (x1, m, x2, y, 3);
+ x1 = _mm512_maskz_fixupimm_pd (m, x1, x2, y, 3);
+ x1 = _mm512_fixupimm_round_pd (x1, x2, y, 3, _MM_FROUND_NO_EXC);
+ x1 = _mm512_mask_fixupimm_round_pd (x1, m, x2, y, 3, _MM_FROUND_NO_EXC);
+ x1 = _mm512_maskz_fixupimm_round_pd (m, x1, x2, y, 3, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmpd-2.c
new file mode 100644
index 00000000000..3ecee8cbd18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmpd-2.c
@@ -0,0 +1,113 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include "math.h"
+#include "values.h"
+
+static void
+CALC (double *r, double src, long long tbl)
+{
+ switch (tbl & 0xf)
+ {
+ case 0:
+ *r = src;
+ break;
+ case 1:
+ *r = src;
+ break;
+ case 2:
+ *r = signbit (src) ? -NAN : NAN;
+ break;
+ case 3:
+ *r = -NAN;
+ break;
+ case 4:
+ *r = -INFINITY;
+ break;
+ case 5:
+ *r = INFINITY;
+ break;
+ case 6:
+ *r = signbit (src) ? -INFINITY : INFINITY;
+ break;
+ case 7:
+ *r = 1.0 / -INFINITY;
+ break;
+ case 8:
+ *r = 0.0;
+ break;
+ case 9:
+ *r = -1.0;
+ break;
+ case 10:
+ *r = 1.0;
+ break;
+ case 11:
+ *r = 1.0 / 2.0;
+ break;
+ case 12:
+ *r = 90.0;
+ break;
+ case 13:
+ *r = M_PI_2;
+ break;
+ case 14:
+ *r = MAXDOUBLE;
+ break;
+ case 15:
+ *r = -MAXDOUBLE;
+ break;
+ default:
+ abort ();
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, j, k;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, s1;
+ UNION_TYPE (AVX512F_LEN, i_q) s2;
+ double res_ref[SIZE];
+
+
+ float vals[2] = { -10, 10 };
+ int controls[8] = {0x11111111, 0x77777777, 0x77777777, 0x88888888,
+ 0x99999999, 0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc};
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < 2; i++)
+ {
+ for (j = 0; j < SIZE; j++)
+ {
+ s1.a[j] = vals[i];
+ s2.a[j] = controls[j];
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+
+ CALC (&res_ref[j], s1.a[j], s2.a[j]);
+ }
+
+ res1.x = INTRINSIC (_fixupimm_pd) (res1.x, s1.x, s2.x, 0);
+ res2.x = INTRINSIC (_mask_fixupimm_pd) (res2.x, mask, s1.x, s2.x, 0);
+ res3.x = INTRINSIC (_maskz_fixupimm_pd) (mask, res3.x, s1.x, s2.x, 0);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE(d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+ MASK_ZERO(d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+ }
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmps-1.c
new file mode 100644
index 00000000000..5cf045df342
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmps-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vfixupimmps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vfixupimmps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfixupimmps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfixupimmps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vfixupimmps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfixupimmps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x1, x2;
+volatile __m512i y;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_fixupimm_ps (x1, x2, y, 3);
+ x1 = _mm512_mask_fixupimm_ps (x1, m, x2, y, 3);
+ x1 = _mm512_maskz_fixupimm_ps (m, x1, x2, y, 3);
+ x1 = _mm512_fixupimm_round_ps (x1, x2, y, 3, _MM_FROUND_NO_EXC);
+ x1 = _mm512_mask_fixupimm_round_ps (x1, m, x2, y, 3, _MM_FROUND_NO_EXC);
+ x1 = _mm512_maskz_fixupimm_round_ps (m, x1, x2, y, 3, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmps-2.c
new file mode 100644
index 00000000000..4ae912b7f83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmps-2.c
@@ -0,0 +1,119 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "math.h"
+#include "values.h"
+
+static void
+CALC (float *r, float src, int tbl)
+{
+ switch (tbl & 0xf)
+ {
+ case 0:
+ *r = src;
+ break;
+ case 1:
+ *r = src;
+ break;
+ case 2:
+ *r = signbit (src) ? -NAN : NAN;
+ break;
+ case 3:
+ *r = -NAN;
+ break;
+ case 4:
+ *r = -INFINITY;
+ break;
+ case 5:
+ *r = INFINITY;
+ break;
+ case 6:
+ *r = signbit (src) ? -INFINITY : INFINITY;
+ break;
+ case 7:
+ *r = 1.0 / -INFINITY;
+ break;
+ case 8:
+ *r = 0.0;
+ break;
+ case 9:
+ *r = -1.0;
+ break;
+ case 10:
+ *r = 1.0;
+ break;
+ case 11:
+ *r = 1.0 / 2.0;
+ break;
+ case 12:
+ *r = 90.0;
+ break;
+ case 13:
+ *r = M_PI_2;
+ break;
+ case 14:
+ *r = MAXFLOAT;
+ break;
+ case 15:
+ *r = -MAXFLOAT;
+ break;
+ default:
+ abort ();
+ }
+}
+
+
+void static
+TEST (void)
+{
+ int i, j, k;
+ UNION_TYPE (AVX512F_LEN,) res1, res2, res3, s1;
+ UNION_TYPE (AVX512F_LEN, i_d) s2;
+ float res_ref[SIZE];
+
+
+ float vals[2] = { -10, 10 };
+ int controls[16] = { 0x11111111,
+ 0x77777777, 0x88888888, 0x99999999,
+ 0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc,
+ 0x77777777, 0x88888888, 0x99999999,
+ 0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc,
+ 0xdddddddd, 0xeeeeeeee, 0xffffffff
+ };
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < 2; i++)
+ {
+ for (j = 0; j < SIZE; j++)
+ {
+ s1.a[j] = vals[i];
+ s2.a[j] = controls[j];
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+
+ CALC (&res_ref[j], s1.a[j], s2.a[j]);
+ }
+
+ res1.x = INTRINSIC (_fixupimm_ps) (res1.x, s1.x, s2.x, 0);
+ res2.x = INTRINSIC (_mask_fixupimm_ps) (res2.x, mask, s1.x, s2.x, 0);
+ res3.x = INTRINSIC (_maskz_fixupimm_ps) (mask, res3.x, s1.x, s2.x, 0);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE() (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+ MASK_ZERO() (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+ }
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmsd-1.c
new file mode 100644
index 00000000000..76676afef82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmsd-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vfixupimmsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vfixupimmsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfixupimmsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfixupimmsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vfixupimmsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfixupimmsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_fixupimm_sd (x, x, y, 3);
+ x = _mm_mask_fixupimm_sd (x, m, x, y, 3);
+ x = _mm_maskz_fixupimm_sd (m, x, x, y, 3);
+ x = _mm_fixupimm_round_sd (x, x, y, 3, _MM_FROUND_NO_EXC);
+ x = _mm_mask_fixupimm_round_sd (x, m, x, y, 3, _MM_FROUND_NO_EXC);
+ x = _mm_maskz_fixupimm_round_sd (m, x, x, y, 3, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmsd-2.c
new file mode 100644
index 00000000000..ebd288ed268
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmsd-2.c
@@ -0,0 +1,118 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+#include <math.h>
+#include <values.h>
+#include "avx512f-mask-type.h"
+
+void
+compute_fixupimmpd (double *r, double src, long long tbl)
+{
+ switch (tbl & 0xf)
+ {
+ case 0:
+ *r = src;
+ break;
+ case 1:
+ *r = src;
+ break;
+ case 2:
+ *r = signbit (src) ? -NAN : NAN;
+ break;
+ case 3:
+ *r = -NAN;
+ break;
+ case 4:
+ *r = -INFINITY;
+ break;
+ case 5:
+ *r = INFINITY;
+ break;
+ case 6:
+ *r = signbit (src) ? -INFINITY : INFINITY;
+ break;
+ case 7:
+ *r = 1.0 / -INFINITY;
+ break;
+ case 8:
+ *r = 0.0;
+ break;
+ case 9:
+ *r = -1.0;
+ break;
+ case 10:
+ *r = 1.0;
+ break;
+ case 11:
+ *r = 1.0 / 2.0;
+ break;
+ case 12:
+ *r = 90.0;
+ break;
+ case 13:
+ *r = M_PI_2;
+ break;
+ case 14:
+ *r = MAXDOUBLE;
+ break;
+ case 15:
+ *r = -MAXDOUBLE;
+ break;
+ default:
+ abort ();
+ }
+}
+
+void static
+avx512f_test (void)
+{
+ union128d s1, res1, res2, res3;
+ union128i_q s2;
+ double res_ref[2];
+ int i, j, k;
+
+ float vals[2] = { -10, 10 };
+ int controls[10] = { 0x11111111,
+ 0x77777777, 0x88888888, 0x99999999,
+ 0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc,
+ 0xdddddddd, 0xeeeeeeee, 0xffffffff
+ };
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < 2; i++)
+ {
+ s1.a[0] = vals[i];
+ s1.a[1] = 1.0;
+ s2.a[1] = 1.0;
+
+ res_ref[0] = 1.0;
+ res_ref[1] = 1.0;
+ res1.a[0] = res2.a[0] = res3.a[0] = DEFAULT_VALUE;
+ res1.a[1] = res2.a[1] = res3.a[1] = DEFAULT_VALUE;
+
+ for (j = 0; j < 10; j++)
+ {
+ s2.a[0] = controls[j];
+ compute_fixupimmpd (&res_ref[0], s1.a[0], s2.a[0]);
+
+ res1.x = _mm_fixupimm_sd (res1.x, s1.x, s2.x, 0);
+ res2.x = _mm_mask_fixupimm_sd (res2.x, mask, s1.x, s2.x, 0);
+ res3.x = _mm_maskz_fixupimm_sd (mask, res3.x, s1.x, s2.x, 0);
+
+ if (check_union128d (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+ if (check_union128d (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+ if (check_union128d (res3, res_ref))
+ abort ();
+ }
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmss-1.c
new file mode 100644
index 00000000000..435befbfa6f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmss-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vfixupimmss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vfixupimmss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfixupimmss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfixupimmss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vfixupimmss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfixupimmss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_fixupimm_ss (x, x, y, 3);
+ x = _mm_mask_fixupimm_ss (x, m, x, y, 3);
+ x = _mm_maskz_fixupimm_ss (m, x, x, y, 3);
+ x = _mm_fixupimm_round_ss (x, x, y, 3, _MM_FROUND_NO_EXC);
+ x = _mm_mask_fixupimm_round_ss (x, m, x, y, 3, _MM_FROUND_NO_EXC);
+ x = _mm_maskz_fixupimm_round_ss (m, x, x, y, 3, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmss-2.c
new file mode 100644
index 00000000000..50830b8bd36
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmss-2.c
@@ -0,0 +1,119 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-helper.h"
+#include <math.h>
+#include <values.h>
+#include "avx512f-mask-type.h"
+
+void
+compute_fixupimmps (float *r, float src, int tbl)
+{
+ switch (tbl & 0xf)
+ {
+ case 0:
+ *r = src;
+ break;
+ case 1:
+ *r = src;
+ break;
+ case 2:
+ *r = signbit (src) ? -NAN : NAN;
+ break;
+ case 3:
+ *r = -NAN;
+ break;
+ case 4:
+ *r = -INFINITY;
+ break;
+ case 5:
+ *r = INFINITY;
+ break;
+ case 6:
+ *r = signbit (src) ? -INFINITY : INFINITY;
+ break;
+ case 7:
+ *r = 1.0 / -INFINITY;
+ break;
+ case 8:
+ *r = 0.0;
+ break;
+ case 9:
+ *r = -1.0;
+ break;
+ case 10:
+ *r = 1.0;
+ break;
+ case 11:
+ *r = 1.0 / 2.0;
+ break;
+ case 12:
+ *r = 90.0;
+ break;
+ case 13:
+ *r = M_PI_2;
+ break;
+ case 14:
+ *r = MAXFLOAT;
+ break;
+ case 15:
+ *r = -MAXFLOAT;
+ break;
+ default:
+ abort ();
+ }
+}
+
+void static
+avx512f_test (void)
+{
+ union128 s1, res1, res2, res3;
+ union128i_d s2;
+ float res_ref[4];
+ int i, j, k;
+
+ float vals[2] = { -10, 10 };
+ int controls[10] = { 0x11111111,
+ 0x77777777, 0x88888888, 0x99999999,
+ 0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc,
+ 0xdddddddd, 0xeeeeeeee, 0xffffffff
+ };
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < 2; i++)
+ {
+ s1.a[0] = vals[i];
+ res1.a[0] = res2.a[0] = res3.a[0] = DEFAULT_VALUE;
+ for (k = 1; k < 4; k++)
+ {
+ s1.a[k] = k;
+ s2.a[k] = k;
+ res_ref[k] = k;
+ res1.a[k] = res2.a[k] = res3.a[k] = DEFAULT_VALUE;
+ }
+
+ for (j = 0; j < 10; j++)
+ {
+ s2.a[0] = controls[j];
+ compute_fixupimmps (&res_ref[0], s1.a[0], s2.a[0]);
+
+ res1.x = _mm_fixupimm_ss (res1.x, s1.x, s2.x, 0);
+ res2.x = _mm_mask_fixupimm_ss (res2.x, mask, s1.x, s2.x, 0);
+ res3.x = _mm_maskz_fixupimm_ss (mask, res3.x, s1.x, s2.x, 0);
+
+ if (check_union128 (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, 1);
+ if (check_union128 (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, 1);
+ if (check_union128 (res3, res_ref))
+ abort ();
+ }
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXpd-1.c
new file mode 100644
index 00000000000..c45930c6b20
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXpd-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 8 } } */
+/* { dg-final { scan-assembler-times "vfmadd...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd231pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd...pd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...pd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd231pd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...pd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x1, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_fmadd_pd (x1, x2, x3);
+ x1 = _mm512_mask_fmadd_pd (x1, m, x2, x3);
+ x3 = _mm512_mask3_fmadd_pd (x1, x2, x3, m);
+ x1 = _mm512_maskz_fmadd_pd (m, x1, x2, x3);
+ x1 = _mm512_fmadd_round_pd (x1, x2, x3, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm512_mask_fmadd_round_pd (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF);
+ x3 = _mm512_mask3_fmadd_round_pd (x1, x2, x3, m, _MM_FROUND_TO_POS_INF);
+ x1 = _mm512_maskz_fmadd_round_pd (m, x1, x2, x3, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXpd-2.c
new file mode 100644
index 00000000000..a863767a0aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXpd-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (double *s1, double *s2, double *s3, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] * s2[i] + s3[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1, s2, s3, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref1[SIZE];
+ double res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 56.78 * (i + 1) * sign;
+ s3.a[i] = 90.12 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_fmadd_pd) (s1.x, s2.x, s3.x);
+#endif
+ res2.x = INTRINSIC (_mask_fmadd_pd) (s1.x, mask, s2.x, s3.x);
+ res3.x = INTRINSIC (_mask3_fmadd_pd) (s2.x, s3.x, s1.x, mask);
+ res4.x = INTRINSIC (_maskz_fmadd_pd) (mask, s1.x, s2.x, s3.x);
+
+ CALC (s1.a, s2.a, s3.a, res_ref1);
+ CALC (s2.a, s3.a, s1.a, res_ref2);
+
+#if AVX512F_LEN == 512
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res1, res_ref1, 0.0001))
+ abort ();
+#endif
+
+ MASK_MERGE (d) (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res2, res_ref1, 0.0001))
+ abort ();
+
+ MASK_MERGE (d) (res_ref2, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res3, res_ref2, 0.0001))
+ abort ();
+
+ MASK_ZERO (d) (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res4, res_ref1, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXps-1.c
new file mode 100644
index 00000000000..ddeddb21b67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXps-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 8 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd231ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd231ps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x1, x2, x3;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_fmadd_ps (x1, x2, x3);
+ x1 = _mm512_mask_fmadd_ps (x1, m, x2, x3);
+ x3 = _mm512_mask3_fmadd_ps (x1, x2, x3, m);
+ x1 = _mm512_maskz_fmadd_ps (m, x1, x2, x3);
+ x1 = _mm512_fmadd_round_ps (x1, x2, x3, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm512_mask_fmadd_round_ps (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF);
+ x3 = _mm512_mask3_fmadd_round_ps (x1, x2, x3, m, _MM_FROUND_TO_POS_INF);
+ x1 = _mm512_maskz_fmadd_round_ps (m, x1, x2, x3, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXps-2.c
new file mode 100644
index 00000000000..26f9e572e42
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXps-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (float *s1, float *s2, float *s3, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] * s2[i] + s3[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s1, s2, s3, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref1[SIZE];
+ float res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 56.78 * (i + 1) * sign;
+ s3.a[i] = 90.12 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_fmadd_ps) (s1.x, s2.x, s3.x);
+#endif
+ res2.x = INTRINSIC (_mask_fmadd_ps) (s1.x, mask, s2.x, s3.x);
+ res3.x = INTRINSIC (_mask3_fmadd_ps) (s2.x, s3.x, s1.x, mask);
+ res4.x = INTRINSIC (_maskz_fmadd_ps) (mask, s1.x, s2.x, s3.x);
+
+ CALC (s1.a, s2.a, s3.a, res_ref1);
+ CALC (s2.a, s3.a, s1.a, res_ref2);
+
+#if AVX512F_LEN == 512
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res1, res_ref1, 0.0001))
+ abort ();
+#endif
+
+ MASK_MERGE () (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res2, res_ref1, 0.0001))
+ abort ();
+
+ MASK_MERGE () (res_ref2, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res3, res_ref2, 0.0001))
+ abort ();
+
+ MASK_ZERO () (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res4, res_ref1, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-1.c
new file mode 100644
index 00000000000..ea1f9f5c12a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd231sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd231sd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d a, b, c;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_mask_fmadd_sd (a, m, b, c);
+ c = _mm_mask3_fmadd_sd (a, b, c, m);
+ a = _mm_maskz_fmadd_sd (m, a, b, c);
+ a = _mm_fmadd_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+ a = _mm_mask_fmadd_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF);
+ c = _mm_mask3_fmadd_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF);
+ a = _mm_maskz_fmadd_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-2.c
new file mode 100644
index 00000000000..cdf74ed8f2b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vfmaddsd (double *r, double *s1, double *s2, double *s3)
+{
+ r[0] = s1[0] * s2[0] + s3[0];
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union128d res1, res2, res3, src1, src2;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref1[2], res_ref2[2];
+
+ for (i = 0; i < 2; i++)
+ {
+ res_ref1[i] = res_ref2[i] = DEFAULT_VALUE;
+ res1.a[i] = res2.a[i] = res3.a[i] = DEFAULT_VALUE;
+ src1.a[i] = i * i - 8.179;
+ src2.a[i] = 2.45 - i;
+ }
+
+ compute_vfmaddsd (res_ref1, res1.a, src1.a, src2.a);
+ compute_vfmaddsd (res_ref2, src1.a, src2.a, res2.a);
+
+ res1.x = _mm_mask_fmadd_sd (res1.x, mask, src1.x, src2.x);
+ res2.x = _mm_mask3_fmadd_sd (src1.x, src2.x, res2.x, mask);
+ res3.x = _mm_maskz_fmadd_sd (mask, res3.x, src1.x, src2.x);
+
+ MASK_MERGE (d) (res_ref1, mask, 1);
+ if (check_fp_union128d (res1, res_ref1))
+ abort ();
+
+ MASK_MERGE (d) (res_ref2, mask, 1);
+ if (check_fp_union128d (res2, res_ref2))
+ abort ();
+
+ MASK_ZERO (d) (res_ref1, mask, 1);
+ if (check_fp_union128d (res3, res_ref1))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-1.c
new file mode 100644
index 00000000000..737556475d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd231ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd231ss\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 a, b, c;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_mask_fmadd_ss (a, m, b, c);
+ c = _mm_mask3_fmadd_ss (a, b, c, m);
+ a = _mm_maskz_fmadd_ss (m, a, b, c);
+ a = _mm_fmadd_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+ a = _mm_mask_fmadd_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF);
+ c = _mm_mask3_fmadd_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF);
+ a = _mm_maskz_fmadd_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-2.c
new file mode 100644
index 00000000000..fd8fedc5cac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vfmaddss (float *r, float *s1, float *s2, float *s3)
+{
+ r[0] = s1[0] * s2[0] + s3[0];
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union128 res1, res2, res3, src1, src2;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref1[4], res_ref2[4];
+
+ for (i = 0; i < 4; i++)
+ {
+ res_ref1[i] = res_ref2[i] = DEFAULT_VALUE;
+ res1.a[i] = res2.a[i] = res3.a[i] = DEFAULT_VALUE;
+ src1.a[i] = i * i - 8.179;
+ src2.a[i] = 2.45 - i;
+ }
+
+ compute_vfmaddss (res_ref1, res1.a, src1.a, src2.a);
+ compute_vfmaddss (res_ref2, src1.a, src2.a, res2.a);
+
+ res1.x = _mm_mask_fmadd_ss (res1.x, mask, src1.x, src2.x);
+ res2.x = _mm_mask3_fmadd_ss (src1.x, src2.x, res2.x, mask);
+ res3.x = _mm_maskz_fmadd_ss (mask, res3.x, src1.x, src2.x);
+
+ MASK_MERGE ()(res_ref1, mask, 1);
+ if (check_fp_union128 (res1, res_ref1))
+ abort ();
+
+ MASK_MERGE ()(res_ref2, mask, 1);
+ if (check_fp_union128 (res2, res_ref2))
+ abort ();
+
+ MASK_ZERO ()(res_ref1, mask, 1);
+ if (check_fp_union128 (res3, res_ref1))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXpd-1.c
new file mode 100644
index 00000000000..7f4ab7bdd1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXpd-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmaddsub...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 8 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub231pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...pd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...pd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub231pd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...pd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x1, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_fmaddsub_pd (x1, x2, x3);
+ x1 = _mm512_mask_fmaddsub_pd (x1, m, x2, x3);
+ x3 = _mm512_mask3_fmaddsub_pd (x1, x2, x3, m);
+ x1 = _mm512_maskz_fmaddsub_pd (m, x1, x2, x3);
+ x1 = _mm512_fmaddsub_round_pd (x1, x2, x3, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm512_mask_fmaddsub_round_pd (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF);
+ x3 = _mm512_mask3_fmaddsub_round_pd (x1, x2, x3, m, _MM_FROUND_TO_POS_INF);
+ x1 = _mm512_maskz_fmaddsub_round_pd (m, x1, x2, x3, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXpd-2.c
new file mode 100644
index 00000000000..efc764cb812
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXpd-2.c
@@ -0,0 +1,67 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (double *s1, double *s2, double *s3, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ if (i % 2)
+ r[i] = s1[i] * s2[i] + s3[i];
+ else
+ r[i] = s1[i] * s2[i] - s3[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1, s2, s3, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref1[SIZE];
+ double res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 56.78 * (i + 1) * sign;
+ s3.a[i] = 90.12 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_fmaddsub_pd) (s1.x, s2.x, s3.x);
+#endif
+ res2.x = INTRINSIC (_mask_fmaddsub_pd) (s1.x, mask, s2.x, s3.x);
+ res3.x = INTRINSIC (_mask3_fmaddsub_pd) (s2.x, s3.x, s1.x, mask);
+ res4.x = INTRINSIC (_maskz_fmaddsub_pd) (mask, s1.x, s2.x, s3.x);
+
+ CALC (s1.a, s2.a, s3.a, res_ref1);
+ CALC (s2.a, s3.a, s1.a, res_ref2);
+
+#if AVX512F_LEN == 512
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res1, res_ref1, 0.0001))
+ abort ();
+#endif
+
+ MASK_MERGE (d) (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res2, res_ref1, 0.0001))
+ abort ();
+
+ MASK_MERGE (d) (res_ref2, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res3, res_ref2, 0.0001))
+ abort ();
+
+ MASK_ZERO (d) (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res4, res_ref1, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXps-1.c
new file mode 100644
index 00000000000..73936c71caa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXps-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmaddsub...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 8 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub231ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...ps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...ps\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub231ps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...ps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x1, x2, x3;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_fmaddsub_ps (x1, x2, x3);
+ x1 = _mm512_mask_fmaddsub_ps (x1, m, x2, x3);
+ x3 = _mm512_mask3_fmaddsub_ps (x1, x2, x3, m);
+ x1 = _mm512_maskz_fmaddsub_ps (m, x1, x2, x3);
+ x1 = _mm512_fmaddsub_round_ps (x1, x2, x3, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm512_mask_fmaddsub_round_ps (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF);
+ x3 = _mm512_mask3_fmaddsub_round_ps (x1, x2, x3, m, _MM_FROUND_TO_POS_INF);
+ x1 = _mm512_maskz_fmaddsub_round_ps (m, x1, x2, x3, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXps-2.c
new file mode 100644
index 00000000000..0b764cbdebd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmaddsubXXXps-2.c
@@ -0,0 +1,67 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (float *s1, float *s2, float *s3, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ if (i % 2)
+ r[i] = s1[i] * s2[i] + s3[i];
+ else
+ r[i] = s1[i] * s2[i] - s3[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s1, s2, s3, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref1[SIZE];
+ float res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 56.78 * (i + 1) * sign;
+ s3.a[i] = 90.12 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_fmaddsub_ps) (s1.x, s2.x, s3.x);
+#endif
+ res2.x = INTRINSIC (_mask_fmaddsub_ps) (s1.x, mask, s2.x, s3.x);
+ res3.x = INTRINSIC (_mask3_fmaddsub_ps) (s2.x, s3.x, s1.x, mask);
+ res4.x = INTRINSIC (_maskz_fmaddsub_ps) (mask, s1.x, s2.x, s3.x);
+
+ CALC (s1.a, s2.a, s3.a, res_ref1);
+ CALC (s2.a, s3.a, s1.a, res_ref2);
+
+#if AVX512F_LEN == 512
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res1, res_ref1, 0.0001))
+ abort ();
+#endif
+
+ MASK_MERGE () (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res2, res_ref1, 0.0001))
+ abort ();
+
+ MASK_MERGE () (res_ref2, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res3, res_ref2, 0.0001))
+ abort ();
+
+ MASK_ZERO () (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res4, res_ref1, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXpd-1.c
new file mode 100644
index 00000000000..2ad15573290
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXpd-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsub...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfmsub231pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub...pd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...pd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub231pd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...pd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x1, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_fmsub_pd (x1, x2, x3);
+ x1 = _mm512_mask_fmsub_pd (x1, m, x2, x3);
+ x3 = _mm512_mask3_fmsub_pd (x1, x2, x3, m);
+ x1 = _mm512_maskz_fmsub_pd (m, x1, x2, x3);
+ x1 = _mm512_fmsub_round_pd (x1, x2, x3, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm512_mask_fmsub_round_pd (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF);
+ x3 = _mm512_mask3_fmsub_round_pd (x1, x2, x3, m, _MM_FROUND_TO_POS_INF);
+ x1 = _mm512_maskz_fmsub_round_pd (m, x1, x2, x3, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXpd-2.c
new file mode 100644
index 00000000000..ff2e3e03f5b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXpd-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (double *s1, double *s2, double *s3, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] * s2[i] - s3[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1, s2, s3, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref1[SIZE];
+ double res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 56.78 * (i + 1) * sign;
+ s3.a[i] = 90.12 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_fmsub_pd) (s1.x, s2.x, s3.x);
+#endif
+ res2.x = INTRINSIC (_mask_fmsub_pd) (s1.x, mask, s2.x, s3.x);
+ res3.x = INTRINSIC (_mask3_fmsub_pd) (s2.x, s3.x, s1.x, mask);
+ res4.x = INTRINSIC (_maskz_fmsub_pd) (mask, s1.x, s2.x, s3.x);
+
+ CALC (s1.a, s2.a, s3.a, res_ref1);
+ CALC (s2.a, s3.a, s1.a, res_ref2);
+
+#if AVX512F_LEN == 512
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res1, res_ref1, 0.0001))
+ abort ();
+#endif
+
+ MASK_MERGE (d) (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res2, res_ref1, 0.0001))
+ abort ();
+
+ MASK_MERGE (d) (res_ref2, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res3, res_ref2, 0.0001))
+ abort ();
+
+ MASK_ZERO (d) (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res4, res_ref1, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXps-1.c
new file mode 100644
index 00000000000..81afaf59092
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXps-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfmsub231ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub231ps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x1, x2, x3;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_fmsub_ps (x1, x2, x3);
+ x1 = _mm512_mask_fmsub_ps (x1, m, x2, x3);
+ x3 = _mm512_mask3_fmsub_ps (x1, x2, x3, m);
+ x1 = _mm512_maskz_fmsub_ps (m, x1, x2, x3);
+ x1 = _mm512_fmsub_round_ps (x1, x2, x3, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm512_mask_fmsub_round_ps (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF);
+ x3 = _mm512_mask3_fmsub_round_ps (x1, x2, x3, m, _MM_FROUND_TO_POS_INF);
+ x1 = _mm512_maskz_fmsub_round_ps (m, x1, x2, x3, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXps-2.c
new file mode 100644
index 00000000000..5f0fb941278
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXps-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (float *s1, float *s2, float *s3, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] * s2[i] - s3[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s1, s2, s3, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref1[SIZE];
+ float res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 56.78 * (i + 1) * sign;
+ s3.a[i] = 90.12 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_fmsub_ps) (s1.x, s2.x, s3.x);
+#endif
+ res2.x = INTRINSIC (_mask_fmsub_ps) (s1.x, mask, s2.x, s3.x);
+ res3.x = INTRINSIC (_mask3_fmsub_ps) (s2.x, s3.x, s1.x, mask);
+ res4.x = INTRINSIC (_maskz_fmsub_ps) (mask, s1.x, s2.x, s3.x);
+
+ CALC (s1.a, s2.a, s3.a, res_ref1);
+ CALC (s2.a, s3.a, s1.a, res_ref2);
+
+#if AVX512F_LEN == 512
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res1, res_ref1, 0.0001))
+ abort ();
+#endif
+
+ MASK_MERGE () (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res2, res_ref1, 0.0001))
+ abort ();
+
+ MASK_MERGE () (res_ref2, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res3, res_ref2, 0.0001))
+ abort ();
+
+ MASK_ZERO () (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res4, res_ref1, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-1.c
new file mode 100644
index 00000000000..1fdd07a5b14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfmsub231sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub231sd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d a, b, c;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_mask_fmsub_sd (a, m, b, c);
+ c = _mm_mask3_fmsub_sd (a, b, c, m);
+ a = _mm_maskz_fmsub_sd (m, a, b, c);
+ a = _mm_fmsub_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+ a = _mm_mask_fmsub_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF);
+ c = _mm_mask3_fmsub_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF);
+ a = _mm_maskz_fmsub_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-2.c
new file mode 100644
index 00000000000..75a134241cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vfmsubsd (double *r, double *s1, double *s2, double *s3)
+{
+ r[0] = s1[0] * s2[0] - s3[0];
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union128d res1, res2, res3, src1, src2;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref1[2], res_ref2[2];
+
+ for (i = 0; i < 2; i++)
+ {
+ res_ref1[i] = res_ref2[i] = DEFAULT_VALUE;
+ res1.a[i] = res2.a[i] = res3.a[i] = DEFAULT_VALUE;
+ src1.a[i] = i * i - 8.179;
+ src2.a[i] = 2.45 - i;
+ }
+
+ compute_vfmsubsd (res_ref1, res1.a, src1.a, src2.a);
+ compute_vfmsubsd (res_ref2, src1.a, src2.a, res2.a);
+
+ res1.x = _mm_mask_fmsub_sd (res1.x, mask, src1.x, src2.x);
+ res2.x = _mm_mask3_fmsub_sd (src1.x, src2.x, res2.x, mask);
+ res3.x = _mm_maskz_fmsub_sd (mask, res3.x, src1.x, src2.x);
+
+ MASK_MERGE (d) (res_ref1, mask, 1);
+ if (check_fp_union128d (res1, res_ref1))
+ abort ();
+
+ MASK_MERGE (d) (res_ref2, mask, 1);
+ if (check_fp_union128d (res2, res_ref2))
+ abort ();
+
+ MASK_ZERO (d) (res_ref1, mask, 1);
+ if (check_fp_union128d (res3, res_ref1))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-1.c
new file mode 100644
index 00000000000..b0abe2240b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfmsub231ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub231ss\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 a, b, c;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_mask_fmsub_ss (a, m, b, c);
+ c = _mm_mask3_fmsub_ss (a, b, c, m);
+ a = _mm_maskz_fmsub_ss (m, a, b, c);
+ a = _mm_fmsub_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+ a = _mm_mask_fmsub_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF);
+ c = _mm_mask3_fmsub_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF);
+ a = _mm_maskz_fmsub_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c
new file mode 100644
index 00000000000..ad8fc2342d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vfmsubss (float *r, float *s1, float *s2, float *s3)
+{
+ r[0] = s1[0] * s2[0] - s3[0];
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union128 res1, res2, res3, src1, src2;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref1[4], res_ref2[4];
+
+ for (i = 0; i < 4; i++)
+ {
+ res_ref1[i] = res_ref2[i] = DEFAULT_VALUE;
+ res1.a[i] = res2.a[i] = res3.a[i] = DEFAULT_VALUE;
+ src1.a[i] = i * i - 8.179;
+ src2.a[i] = 2.45 - i;
+ }
+
+ compute_vfmsubss (res_ref1, res1.a, src1.a, src2.a);
+ compute_vfmsubss (res_ref2, src1.a, src2.a, res2.a);
+
+ res1.x = _mm_mask_fmsub_ss (res1.x, mask, src1.x, src2.x);
+ res2.x = _mm_mask3_fmsub_ss (src1.x, src2.x, res2.x, mask);
+ res3.x = _mm_maskz_fmsub_ss (mask, res3.x, src1.x, src2.x);
+
+ MASK_MERGE ()(res_ref1, mask, 1);
+ if (check_fp_union128 (res1, res_ref1))
+ abort ();
+
+ MASK_MERGE ()(res_ref2, mask, 1);
+ if (check_fp_union128 (res2, res_ref2))
+ abort ();
+
+ MASK_ZERO ()(res_ref1, mask, 1);
+ if (check_fp_union128 (res3, res_ref1))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXpd-1.c
new file mode 100644
index 00000000000..1ff3f2b7536
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXpd-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsubadd...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd231pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...pd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...pd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd231pd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...pd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x1, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_fmsubadd_pd (x1, x2, x3);
+ x1 = _mm512_mask_fmsubadd_pd (x1, m, x2, x3);
+ x3 = _mm512_mask3_fmsubadd_pd (x1, x2, x3, m);
+ x1 = _mm512_maskz_fmsubadd_pd (m, x1, x2, x3);
+ x1 = _mm512_fmsubadd_round_pd (x1, x2, x3, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm512_mask_fmsubadd_round_pd (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF);
+ x3 = _mm512_mask3_fmsubadd_round_pd (x1, x2, x3, m, _MM_FROUND_TO_POS_INF);
+ x1 = _mm512_maskz_fmsubadd_round_pd (m, x1, x2, x3, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXpd-2.c
new file mode 100644
index 00000000000..5cb14d2b498
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXpd-2.c
@@ -0,0 +1,67 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (double *s1, double *s2, double *s3, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ if (i % 2)
+ r[i] = s1[i] * s2[i] - s3[i];
+ else
+ r[i] = s1[i] * s2[i] + s3[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1, s2, s3, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref1[SIZE];
+ double res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 56.78 * (i + 1) * sign;
+ s3.a[i] = 90.12 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_fmsubadd_pd) (s1.x, s2.x, s3.x);
+#endif
+ res2.x = INTRINSIC (_mask_fmsubadd_pd) (s1.x, mask, s2.x, s3.x);
+ res3.x = INTRINSIC (_mask3_fmsubadd_pd) (s2.x, s3.x, s1.x, mask);
+ res4.x = INTRINSIC (_maskz_fmsubadd_pd) (mask, s1.x, s2.x, s3.x);
+
+ CALC (s1.a, s2.a, s3.a, res_ref1);
+ CALC (s2.a, s3.a, s1.a, res_ref2);
+
+#if AVX512F_LEN == 512
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res1, res_ref1, 0.0001))
+ abort ();
+#endif
+
+ MASK_MERGE (d) (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res2, res_ref1, 0.0001))
+ abort ();
+
+ MASK_MERGE (d) (res_ref2, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res3, res_ref2, 0.0001))
+ abort ();
+
+ MASK_ZERO (d) (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res4, res_ref1, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXps-1.c
new file mode 100644
index 00000000000..283c0af19f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXps-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsubadd...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd231ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...ps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...ps\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd231ps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...ps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x1, x2, x3;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_fmsubadd_ps (x1, x2, x3);
+ x1 = _mm512_mask_fmsubadd_ps (x1, m, x2, x3);
+ x3 = _mm512_mask3_fmsubadd_ps (x1, x2, x3, m);
+ x1 = _mm512_maskz_fmsubadd_ps (m, x1, x2, x3);
+ x1 = _mm512_fmsubadd_round_ps (x1, x2, x3, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm512_mask_fmsubadd_round_ps (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF);
+ x3 = _mm512_mask3_fmsubadd_round_ps (x1, x2, x3, m, _MM_FROUND_TO_POS_INF);
+ x1 = _mm512_maskz_fmsubadd_round_ps (m, x1, x2, x3, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXps-2.c
new file mode 100644
index 00000000000..b6fb4c5a5e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfmsubaddXXXps-2.c
@@ -0,0 +1,67 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (float *s1, float *s2, float *s3, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ if (i % 2)
+ r[i] = s1[i] * s2[i] - s3[i];
+ else
+ r[i] = s1[i] * s2[i] + s3[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s1, s2, s3, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref1[SIZE];
+ float res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 56.78 * (i + 1) * sign;
+ s3.a[i] = 90.12 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_fmsubadd_ps) (s1.x, s2.x, s3.x);
+#endif
+ res2.x = INTRINSIC (_mask_fmsubadd_ps) (s1.x, mask, s2.x, s3.x);
+ res3.x = INTRINSIC (_mask3_fmsubadd_ps) (s2.x, s3.x, s1.x, mask);
+ res4.x = INTRINSIC (_maskz_fmsubadd_ps) (mask, s1.x, s2.x, s3.x);
+
+ CALC (s1.a, s2.a, s3.a, res_ref1);
+ CALC (s2.a, s3.a, s1.a, res_ref2);
+
+#if AVX512F_LEN == 512
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res1, res_ref1, 0.0001))
+ abort ();
+#endif
+
+ MASK_MERGE () (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res2, res_ref1, 0.0001))
+ abort ();
+
+ MASK_MERGE () (res_ref2, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res3, res_ref2, 0.0001))
+ abort ();
+
+ MASK_ZERO () (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res4, res_ref1, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXpd-1.c
new file mode 100644
index 00000000000..b08d7e18891
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXpd-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmadd...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...pd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...pd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231pd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...pd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x1, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_fnmadd_pd (x1, x2, x3);
+ x1 = _mm512_mask_fnmadd_pd (x1, m, x2, x3);
+ x3 = _mm512_mask3_fnmadd_pd (x1, x2, x3, m);
+ x1 = _mm512_maskz_fnmadd_pd (m, x1, x2, x3);
+ x1 = _mm512_fnmadd_round_pd (x1, x2, x3, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm512_mask_fnmadd_round_pd (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF);
+ x3 = _mm512_mask3_fnmadd_round_pd (x1, x2, x3, m, _MM_FROUND_TO_POS_INF);
+ x1 = _mm512_maskz_fnmadd_round_pd (m, x1, x2, x3, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXpd-2.c
new file mode 100644
index 00000000000..dea71571877
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXpd-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (double *s1, double *s2, double *s3, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = -s1[i] * s2[i] + s3[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1, s2, s3, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref1[SIZE];
+ double res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 56.78 * (i + 1) * sign;
+ s3.a[i] = 90.12 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_fnmadd_pd) (s1.x, s2.x, s3.x);
+#endif
+ res2.x = INTRINSIC (_mask_fnmadd_pd) (s1.x, mask, s2.x, s3.x);
+ res3.x = INTRINSIC (_mask3_fnmadd_pd) (s2.x, s3.x, s1.x, mask);
+ res4.x = INTRINSIC (_maskz_fnmadd_pd) (mask, s1.x, s2.x, s3.x);
+
+ CALC (s1.a, s2.a, s3.a, res_ref1);
+ CALC (s2.a, s3.a, s1.a, res_ref2);
+
+#if AVX512F_LEN == 512
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res1, res_ref1, 0.0001))
+ abort ();
+#endif
+
+ MASK_MERGE (d) (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res2, res_ref1, 0.0001))
+ abort ();
+
+ MASK_MERGE (d) (res_ref2, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res3, res_ref2, 0.0001))
+ abort ();
+
+ MASK_ZERO (d) (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res4, res_ref1, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXps-1.c
new file mode 100644
index 00000000000..8b4447c832f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXps-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231ps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x1, x2, x3;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_fnmadd_ps (x1, x2, x3);
+ x1 = _mm512_mask_fnmadd_ps (x1, m, x2, x3);
+ x3 = _mm512_mask3_fnmadd_ps (x1, x2, x3, m);
+ x1 = _mm512_maskz_fnmadd_ps (m, x1, x2, x3);
+ x1 = _mm512_fnmadd_round_ps (x1, x2, x3, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm512_mask_fnmadd_round_ps (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF);
+ x3 = _mm512_mask3_fnmadd_round_ps (x1, x2, x3, m, _MM_FROUND_TO_POS_INF);
+ x1 = _mm512_maskz_fnmadd_round_ps (m, x1, x2, x3, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXps-2.c
new file mode 100644
index 00000000000..9ef5e36682c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXps-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (float *s1, float *s2, float *s3, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = -s1[i] * s2[i] + s3[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s1, s2, s3, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref1[SIZE];
+ float res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 56.78 * (i + 1) * sign;
+ s3.a[i] = 90.12 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_fnmadd_ps) (s1.x, s2.x, s3.x);
+#endif
+ res2.x = INTRINSIC (_mask_fnmadd_ps) (s1.x, mask, s2.x, s3.x);
+ res3.x = INTRINSIC (_mask3_fnmadd_ps) (s2.x, s3.x, s1.x, mask);
+ res4.x = INTRINSIC (_maskz_fnmadd_ps) (mask, s1.x, s2.x, s3.x);
+
+ CALC (s1.a, s2.a, s3.a, res_ref1);
+ CALC (s2.a, s3.a, s1.a, res_ref2);
+
+#if AVX512F_LEN == 512
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res1, res_ref1, 0.0001))
+ abort ();
+#endif
+
+ MASK_MERGE () (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res2, res_ref1, 0.0001))
+ abort ();
+
+ MASK_MERGE () (res_ref2, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res3, res_ref2, 0.0001))
+ abort ();
+
+ MASK_ZERO () (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res4, res_ref1, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-1.c
new file mode 100644
index 00000000000..0dd7459b5bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231sd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d a, b, c;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_mask_fnmadd_sd (a, m, b, c);
+ c = _mm_mask3_fnmadd_sd (a, b, c, m);
+ a = _mm_maskz_fnmadd_sd (m, a, b, c);
+ a = _mm_fnmadd_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+ a = _mm_mask_fnmadd_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF);
+ c = _mm_mask3_fnmadd_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF);
+ a = _mm_maskz_fnmadd_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-2.c
new file mode 100644
index 00000000000..effe491ad9f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vfnmaddsd (double *r, double *s1, double *s2, double *s3)
+{
+ r[0] = -s1[0] * s2[0] + s3[0];
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union128d res1, res2, res3, src1, src2;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref1[2], res_ref2[2];
+
+ for (i = 0; i < 2; i++)
+ {
+ res_ref1[i] = res_ref2[i] = DEFAULT_VALUE;
+ res1.a[i] = res2.a[i] = res3.a[i] = DEFAULT_VALUE;
+ src1.a[i] = i * i - 8.179;
+ src2.a[i] = 2.45 - i;
+ }
+
+ compute_vfnmaddsd (res_ref1, res1.a, src1.a, src2.a);
+ compute_vfnmaddsd (res_ref2, src1.a, src2.a, res2.a);
+
+ res1.x = _mm_mask_fnmadd_sd (res1.x, mask, src1.x, src2.x);
+ res2.x = _mm_mask3_fnmadd_sd (src1.x, src2.x, res2.x, mask);
+ res3.x = _mm_maskz_fnmadd_sd (mask, res3.x, src1.x, src2.x);
+
+ MASK_MERGE (d) (res_ref1, mask, 1);
+ if (check_fp_union128d (res1, res_ref1))
+ abort ();
+
+ MASK_MERGE (d) (res_ref2, mask, 1);
+ if (check_fp_union128d (res2, res_ref2))
+ abort ();
+
+ MASK_ZERO (d) (res_ref1, mask, 1);
+ if (check_fp_union128d (res3, res_ref1))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-1.c
new file mode 100644
index 00000000000..416d2371806
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231ss\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 a, b, c;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_mask_fnmadd_ss (a, m, b, c);
+ c = _mm_mask3_fnmadd_ss (a, b, c, m);
+ a = _mm_maskz_fnmadd_ss (m, a, b, c);
+ a = _mm_fnmadd_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+ a = _mm_mask_fnmadd_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF);
+ c = _mm_mask3_fnmadd_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF);
+ a = _mm_maskz_fnmadd_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c
new file mode 100644
index 00000000000..676f3f08f67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vfnmaddss (float *r, float *s1, float *s2, float *s3)
+{
+ r[0] = -s1[0] * s2[0] + s3[0];
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union128 res1, res2, res3, src1, src2;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref1[4], res_ref2[4];
+
+ for (i = 0; i < 4; i++)
+ {
+ res_ref1[i] = res_ref2[i] = DEFAULT_VALUE;
+ res1.a[i] = res2.a[i] = res3.a[i] = DEFAULT_VALUE;
+ src1.a[i] = i * i - 8.179;
+ src2.a[i] = 2.45 - i;
+ }
+
+ compute_vfnmaddss (res_ref1, res1.a, src1.a, src2.a);
+ compute_vfnmaddss (res_ref2, src1.a, src2.a, res2.a);
+
+ res1.x = _mm_mask_fnmadd_ss (res1.x, mask, src1.x, src2.x);
+ res2.x = _mm_mask3_fnmadd_ss (src1.x, src2.x, res2.x, mask);
+ res3.x = _mm_maskz_fnmadd_ss (mask, res3.x, src1.x, src2.x);
+
+ MASK_MERGE ()(res_ref1, mask, 1);
+ if (check_fp_union128 (res1, res_ref1))
+ abort ();
+
+ MASK_MERGE ()(res_ref2, mask, 1);
+ if (check_fp_union128 (res2, res_ref2))
+ abort ();
+
+ MASK_ZERO ()(res_ref1, mask, 1);
+ if (check_fp_union128 (res3, res_ref1))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXpd-1.c
new file mode 100644
index 00000000000..a0776430d58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXpd-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...pd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...pd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231pd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...pd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x1, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_fnmsub_pd (x1, x2, x3);
+ x1 = _mm512_mask_fnmsub_pd (x1, m, x2, x3);
+ x3 = _mm512_mask3_fnmsub_pd (x1, x2, x3, m);
+ x1 = _mm512_maskz_fnmsub_pd (m, x1, x2, x3);
+ x1 = _mm512_fnmsub_round_pd (x1, x2, x3, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm512_mask_fnmsub_round_pd (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF);
+ x3 = _mm512_mask3_fnmsub_round_pd (x1, x2, x3, m, _MM_FROUND_TO_POS_INF);
+ x1 = _mm512_maskz_fnmsub_round_pd (m, x1, x2, x3, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXpd-2.c
new file mode 100644
index 00000000000..d46ca0242ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXpd-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (double *s1, double *s2, double *s3, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = -s1[i] * s2[i] - s3[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1, s2, s3, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref1[SIZE];
+ double res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 56.78 * (i + 1) * sign;
+ s3.a[i] = 90.12 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_fnmsub_pd) (s1.x, s2.x, s3.x);
+#endif
+ res2.x = INTRINSIC (_mask_fnmsub_pd) (s1.x, mask, s2.x, s3.x);
+ res3.x = INTRINSIC (_mask3_fnmsub_pd) (s2.x, s3.x, s1.x, mask);
+ res4.x = INTRINSIC (_maskz_fnmsub_pd) (mask, s1.x, s2.x, s3.x);
+
+ CALC (s1.a, s2.a, s3.a, res_ref1);
+ CALC (s2.a, s3.a, s1.a, res_ref2);
+
+#if AVX512F_LEN == 512
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res1, res_ref1, 0.0001))
+ abort ();
+#endif
+
+ MASK_MERGE (d) (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res2, res_ref1, 0.0001))
+ abort ();
+
+ MASK_MERGE (d) (res_ref2, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res3, res_ref2, 0.0001))
+ abort ();
+
+ MASK_ZERO (d) (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res4, res_ref1, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXps-1.c
new file mode 100644
index 00000000000..b863fb1bbd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXps-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231ps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x1, x2, x3;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_fnmsub_ps (x1, x2, x3);
+ x1 = _mm512_mask_fnmsub_ps (x1, m, x2, x3);
+ x3 = _mm512_mask3_fnmsub_ps (x1, x2, x3, m);
+ x1 = _mm512_maskz_fnmsub_ps (m, x1, x2, x3);
+ x1 = _mm512_fnmsub_round_ps (x1, x2, x3, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm512_mask_fnmsub_round_ps (x1, m, x2, x3, _MM_FROUND_TO_NEG_INF);
+ x3 = _mm512_mask3_fnmsub_round_ps (x1, x2, x3, m, _MM_FROUND_TO_POS_INF);
+ x1 = _mm512_maskz_fnmsub_round_ps (m, x1, x2, x3, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXps-2.c
new file mode 100644
index 00000000000..ff3fadfeafe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXps-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (float *s1, float *s2, float *s3, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = -s1[i] * s2[i] - s3[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s1, s2, s3, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref1[SIZE];
+ float res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 56.78 * (i + 1) * sign;
+ s3.a[i] = 90.12 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_fnmsub_ps) (s1.x, s2.x, s3.x);
+#endif
+ res2.x = INTRINSIC (_mask_fnmsub_ps) (s1.x, mask, s2.x, s3.x);
+ res3.x = INTRINSIC (_mask3_fnmsub_ps) (s2.x, s3.x, s1.x, mask);
+ res4.x = INTRINSIC (_maskz_fnmsub_ps) (mask, s1.x, s2.x, s3.x);
+
+ CALC (s1.a, s2.a, s3.a, res_ref1);
+ CALC (s2.a, s3.a, s1.a, res_ref2);
+
+#if AVX512F_LEN == 512
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res1, res_ref1, 0.0001))
+ abort ();
+#endif
+
+ MASK_MERGE () (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res2, res_ref1, 0.0001))
+ abort ();
+
+ MASK_MERGE () (res_ref2, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res3, res_ref2, 0.0001))
+ abort ();
+
+ MASK_ZERO () (res_ref1, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, ) (res4, res_ref1, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-1.c
new file mode 100644
index 00000000000..cdc93f02347
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231sd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d a, b, c;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_mask_fnmsub_sd (a, m, b, c);
+ c = _mm_mask3_fnmsub_sd (a, b, c, m);
+ a = _mm_maskz_fnmsub_sd (m, a, b, c);
+ a = _mm_fnmsub_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+ a = _mm_mask_fnmsub_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF);
+ c = _mm_mask3_fnmsub_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF);
+ a = _mm_maskz_fnmsub_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-2.c
new file mode 100644
index 00000000000..254a783e6d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vfnmsubsd (double *r, double *s1, double *s2, double *s3)
+{
+ r[0] = -s1[0] * s2[0] - s3[0];
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union128d res1, res2, res3, src1, src2;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref1[2], res_ref2[2];
+
+ for (i = 0; i < 2; i++)
+ {
+ res_ref1[i] = res_ref2[i] = DEFAULT_VALUE;
+ res1.a[i] = res2.a[i] = res3.a[i] = DEFAULT_VALUE;
+ src1.a[i] = i * i - 8.179;
+ src2.a[i] = 2.45 - i;
+ }
+
+ compute_vfnmsubsd (res_ref1, res1.a, src1.a, src2.a);
+ compute_vfnmsubsd (res_ref2, src1.a, src2.a, res2.a);
+
+ res1.x = _mm_mask_fnmsub_sd (res1.x, mask, src1.x, src2.x);
+ res2.x = _mm_mask3_fnmsub_sd (src1.x, src2.x, res2.x, mask);
+ res3.x = _mm_maskz_fnmsub_sd (mask, res3.x, src1.x, src2.x);
+
+ MASK_MERGE (d) (res_ref1, mask, 1);
+ if (check_fp_union128d (res1, res_ref1))
+ abort ();
+
+ MASK_MERGE (d) (res_ref2, mask, 1);
+ if (check_fp_union128d (res2, res_ref2))
+ abort ();
+
+ MASK_ZERO (d) (res_ref1, mask, 1);
+ if (check_fp_union128d (res3, res_ref1))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-1.c
new file mode 100644
index 00000000000..ec3c33e2ab2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231ss\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 a, b, c;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ a = _mm_mask_fnmsub_ss (a, m, b, c);
+ c = _mm_mask3_fnmsub_ss (a, b, c, m);
+ a = _mm_maskz_fnmsub_ss (m, a, b, c);
+ a = _mm_fnmsub_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT);
+ a = _mm_mask_fnmsub_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF);
+ c = _mm_mask3_fnmsub_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF);
+ a = _mm_maskz_fnmsub_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-2.c
new file mode 100644
index 00000000000..042d07d176c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vfnmsubss (float *r, float *s1, float *s2, float *s3)
+{
+ r[0] = -s1[0] * s2[0] - s3[0];
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ union128 res1, res2, res3, src1, src2;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref1[4], res_ref2[4];
+
+ for (i = 0; i < 4; i++)
+ {
+ res_ref1[i] = res_ref2[i] = DEFAULT_VALUE;
+ res1.a[i] = res2.a[i] = res3.a[i] = DEFAULT_VALUE;
+ src1.a[i] = i * i - 8.179;
+ src2.a[i] = 2.45 - i;
+ }
+
+ compute_vfnmsubss (res_ref1, res1.a, src1.a, src2.a);
+ compute_vfnmsubss (res_ref2, src1.a, src2.a, res2.a);
+
+ res1.x = _mm_mask_fnmsub_ss (res1.x, mask, src1.x, src2.x);
+ res2.x = _mm_mask3_fnmsub_ss (src1.x, src2.x, res2.x, mask);
+ res3.x = _mm_maskz_fnmsub_ss (mask, res3.x, src1.x, src2.x);
+
+ MASK_MERGE ()(res_ref1, mask, 1);
+ if (check_fp_union128 (res1, res_ref1))
+ abort ();
+
+ MASK_MERGE ()(res_ref2, mask, 1);
+ if (check_fp_union128 (res2, res_ref2))
+ abort ();
+
+ MASK_ZERO ()(res_ref1, mask, 1);
+ if (check_fp_union128 (res3, res_ref1))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetexppd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetexppd-1.c
new file mode 100644
index 00000000000..3d899ea2b61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetexppd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vgetexppd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6} } */
+/* { dg-final { scan-assembler-times "vgetexppd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2} } */
+/* { dg-final { scan-assembler-times "vgetexppd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2} } */
+/* { dg-final { scan-assembler-times "vgetexppd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 3} } */
+/* { dg-final { scan-assembler-times "vgetexppd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1} } */
+/* { dg-final { scan-assembler-times "vgetexppd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1} } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_getexp_pd (x);
+ x = _mm512_mask_getexp_pd (x, m, x);
+ x = _mm512_maskz_getexp_pd (m, x);
+ x = _mm512_getexp_round_pd (x, _MM_FROUND_NO_EXC);
+ x = _mm512_mask_getexp_round_pd (x, m, x, _MM_FROUND_NO_EXC);
+ x = _mm512_maskz_getexp_round_pd (m, x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetexppd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetexppd-2.c
new file mode 100644
index 00000000000..a18818fa603
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetexppd-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include "math.h"
+
+static void
+CALC (double *s, double *r)
+{
+ int i = 0;
+ for (i = 0; i < SIZE; i++)
+ r[i] = floor (log (s[i]) / log (2));
+}
+
+void static
+TEST (void)
+{
+ int j;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, s;
+ double res_ref[SIZE];
+ double res_ref_mask[SIZE];
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s.a[j] = j * (j + 12.0231);
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_getexp_pd) (s.x);
+ res2.x = INTRINSIC (_mask_getexp_pd) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_getexp_pd) (mask, s.x);
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE(d) (res_ref, mask, SIZE);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO(d) (res_ref, mask, SIZE);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetexpps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpps-1.c
new file mode 100644
index 00000000000..fb5674d702b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vgetexpps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6} } */
+/* { dg-final { scan-assembler-times "vgetexpps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2} } */
+/* { dg-final { scan-assembler-times "vgetexpps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2} } */
+/* { dg-final { scan-assembler-times "vgetexpps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 3} } */
+/* { dg-final { scan-assembler-times "vgetexpps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1} } */
+/* { dg-final { scan-assembler-times "vgetexpps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1} } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_getexp_ps (x);
+ x = _mm512_mask_getexp_ps (x, m, x);
+ x = _mm512_maskz_getexp_ps (m, x);
+ x = _mm512_getexp_round_ps (x, _MM_FROUND_NO_EXC);
+ x = _mm512_mask_getexp_round_ps (x, m, x, _MM_FROUND_NO_EXC);
+ x = _mm512_maskz_getexp_round_ps (m, x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetexpps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpps-2.c
new file mode 100644
index 00000000000..b665814beca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpps-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "math.h"
+
+static void
+CALC (float *s, float *r)
+{
+ int i = 0;
+ for (i = 0; i < SIZE; i++)
+ r[i] = floor (log (s[i]) / log (2));
+}
+
+void static
+TEST (void)
+{
+ int j;
+ UNION_TYPE (AVX512F_LEN, ) res1,res2,res3,s;
+ float res_ref[SIZE];
+ float res_ref_mask[SIZE];
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s.a[j] = j * (j + 12.0231);
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_getexp_ps) (s.x);
+ res2.x = INTRINSIC (_mask_getexp_ps) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_getexp_ps) (mask, s.x);
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE() (res_ref,mask,SIZE );
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO() (res_ref,mask,SIZE );
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res3, res_ref))
+ abort ();
+
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-1.c
new file mode 100644
index 00000000000..a56c6fae754
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vgetexpsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vgetexpsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vgetexpsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vgetexpsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vgetexpsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetexpsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_getexp_sd (x, x);
+ x = _mm_mask_getexp_sd (x, m, x, x);
+ x = _mm_maskz_getexp_sd (m, x, x);
+ x = _mm_getexp_round_sd (x, x, _MM_FROUND_NO_EXC);
+ x = _mm_mask_getexp_round_sd (x, m, x, x, _MM_FROUND_NO_EXC);
+ x = _mm_maskz_getexp_round_sd (m, x, x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-2.c
new file mode 100644
index 00000000000..90729a3c85c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpsd-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vgetexpsd (double *s, double *r)
+{
+ r[0] = floor (log (s[0]) / log (2));
+}
+
+void static
+avx512f_test (void)
+{
+ int i;
+ union128d res1, res2, res3, s1;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 5.0 - i;
+ res_ref[i] = s1.a[i];
+ }
+
+ res1.x = _mm_getexp_sd (s1.x, s1.x);
+ res2.x = _mm_mask_getexp_sd (res2.x, mask, s1.x, s1.x);
+ res3.x = _mm_maskz_getexp_sd (mask, s1.x, s1.x);
+
+ compute_vgetexpsd (s1.a, res_ref);
+
+ if (check_fp_union128d (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+ if (check_fp_union128d (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+ if (check_fp_union128d (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-1.c
new file mode 100644
index 00000000000..e16308c8d45
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vgetexpss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vgetexpss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vgetexpss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vgetexpss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vgetexpss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetexpss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_getexp_ss (x, x);
+ x = _mm_mask_getexp_ss (x, m, x, x);
+ x = _mm_maskz_getexp_ss (m, x, x);
+ x = _mm_getexp_round_ss (x, x, _MM_FROUND_NO_EXC);
+ x = _mm_mask_getexp_round_ss (x, m, x, x, _MM_FROUND_NO_EXC);
+ x = _mm_maskz_getexp_round_ss (m, x, x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-2.c
new file mode 100644
index 00000000000..385165d0688
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetexpss-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vgetexpss (float *s, float *r)
+{
+ r[0] = floor (log (s[0]) / log (2));
+}
+
+void static
+avx512f_test (void)
+{
+ int i;
+ union128 res1, res2, res3, s1;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 5.0 - i;
+ res_ref[i] = s1.a[i];
+ }
+
+ res1.x = _mm_getexp_ss (s1.x, s1.x);
+ res2.x = _mm_mask_getexp_ss (res2.x, mask, s1.x, s1.x);
+ res3.x = _mm_maskz_getexp_ss (mask, s1.x, s1.x);
+
+ compute_vgetexpss (s1.a, res_ref);
+
+ if (check_fp_union128 (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, 1);
+ if (check_fp_union128 (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, 1);
+ if (check_fp_union128 (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetmantpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantpd-1.c
new file mode 100644
index 00000000000..b19846d17e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantpd-1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vgetmantpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vgetmantpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vgetmantpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vgetmantpd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantpd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantpd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x, y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_getmant_pd (y, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src);
+ x =
+ _mm512_mask_getmant_pd (x, m, y, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+ x =
+ _mm512_maskz_getmant_pd (m, y, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+ x = _mm512_getmant_round_pd (y, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src,
+ _MM_FROUND_NO_EXC);
+ x =
+ _mm512_mask_getmant_round_pd (x, m, y, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, _MM_FROUND_NO_EXC);
+ x =
+ _mm512_maskz_getmant_round_pd (m, y, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetmantpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantpd-2.c
new file mode 100644
index 00000000000..2d2d5c8d6f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantpd-2.c
@@ -0,0 +1,110 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include <math.h>
+
+#ifndef GET_NORM_MANT
+#define GET_NORM_MANT
+double
+get_norm_mant (double source, int signctrl, int interv)
+{
+ long long dest, src, sign, exp, fraction;
+ src = *(long long *) &source;
+ sign = (signctrl & 0x1) ? 0 : (src >> 63);
+ exp = (src & 0x7ff0000000000000) >> 52;
+ fraction = (src & 0xfffffffffffff);
+
+ if (isnan (source))
+ return signbit (source) ? -NAN : NAN;
+ if (source == 0.0 || source == -0.0 || isinf (source))
+ return sign ? -1.0 : 1.0;
+ if (signbit (source) && (signctrl & 0x2))
+ return -NAN;
+ if (!isnormal (source))
+ {
+ src = (src & 0xfff7ffffffffffff);
+ exp = 0x3ff;
+ while (!(src & 0x8000000000000))
+ {
+ src += fraction & 0x8000000000000;
+ fraction = fraction << 1;
+ exp--;
+ }
+ }
+
+ switch (interv)
+ {
+ case 0:
+ exp = 0x3ff;
+ break;
+ case 1:
+ exp = ((exp - 0x3ff) & 0x1) ? 0x3fe : 0x3ff;
+ break;
+ case 2:
+ exp = 0x3fe;
+ break;
+ case 3:
+ exp = (fraction & 0x8000000000000) ? 0x3fe : 0x3ff;
+ break;
+ default:
+ abort ();
+ }
+
+ dest = (sign << 63) | (exp << 52) | fraction;
+ return *(double *) &dest;
+}
+#endif
+
+CALC (double *r, double *s, int interv, int signctrl)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = get_norm_mant (s[i], signctrl, interv);
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, src;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int interv = _MM_MANT_NORM_p5_1;
+ int signctrl = _MM_MANT_SIGN_src;
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 34.67 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_getmant_pd) (src.x, interv, signctrl);
+ res2.x =
+ INTRINSIC (_mask_getmant_pd) (res2.x, mask, src.x, interv,
+ signctrl);
+ res3.x =
+ INTRINSIC (_maskz_getmant_pd) (mask, src.x, interv, signctrl);
+
+ CALC (res_ref, src.a, interv, signctrl);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetmantps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantps-1.c
new file mode 100644
index 00000000000..a3ce09e97c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantps-1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vgetmantps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vgetmantps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vgetmantps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vgetmantps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x, y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_getmant_ps (y, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src);
+ x =
+ _mm512_mask_getmant_ps (x, m, y, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+ x =
+ _mm512_maskz_getmant_ps (m, y, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+ x = _mm512_getmant_round_ps (y, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src,
+ _MM_FROUND_NO_EXC);
+ x =
+ _mm512_mask_getmant_round_ps (x, m, y, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, _MM_FROUND_NO_EXC);
+ x =
+ _mm512_maskz_getmant_round_ps (m, y, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetmantps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantps-2.c
new file mode 100644
index 00000000000..ad87f70bbdc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantps-2.c
@@ -0,0 +1,110 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include <math.h>
+
+#ifndef GET_NORM_MANT
+#define GET_NORM_MANT
+float
+get_norm_mant (float source, int signctrl, int interv)
+{
+ int dest, src, sign, exp, fraction;
+ src = *(int *) &source;
+ sign = (signctrl & 0x1) ? 0 : (src >> 31);
+ exp = (src & 0x7f800000) >> 23;
+ fraction = (src & 0x7fffff);
+
+ if (isnan (source))
+ return signbit (source) ? -NAN : NAN;
+ if (source == 0.0 || source == -0.0 || isinf (source))
+ return sign ? -1.0 : 1.0;
+ if (signbit (source) && (signctrl & 0x2))
+ return -NAN;
+ if (!isnormal (source))
+ {
+ src = (src & 0xffbfffff);
+ exp = 0x7f;
+ while (!(src & 0x400000))
+ {
+ src += fraction & 0x400000;
+ fraction = fraction << 1;
+ exp--;
+ }
+ }
+
+ switch (interv)
+ {
+ case 0:
+ exp = 0x7f;
+ break;
+ case 1:
+ exp = ((exp - 0x7f) & 0x1) ? 0x7e : 0x7f;
+ break;
+ case 2:
+ exp = 0x7e;
+ break;
+ case 3:
+ exp = (fraction & 0x400000) ? 0x7e : 0x7f;
+ break;
+ default:
+ abort ();
+ }
+
+ dest = (sign << 31) | (exp << 23) | fraction;
+ return *(float *) &dest;
+}
+#endif
+
+CALC (float *r, float *s, int interv, int signctrl)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = get_norm_mant (s[i], signctrl, interv);
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN,) res1, res2, res3, src;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int interv = _MM_MANT_NORM_p5_1;
+ int signctrl = _MM_MANT_SIGN_src;
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 34.67 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_getmant_ps) (src.x, interv, signctrl);
+ res2.x =
+ INTRINSIC (_mask_getmant_ps) (res2.x, mask, src.x, interv,
+ signctrl);
+ res3.x =
+ INTRINSIC (_maskz_getmant_ps) (mask, src.x, interv, signctrl);
+
+ CALC (res_ref, src.a, interv, signctrl);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-1.c
new file mode 100644
index 00000000000..06e9b639101
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vgetmantsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vgetmantsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vgetmantsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vgetmantsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x, y, z;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_getmant_sd (y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src);
+ x =
+ _mm_mask_getmant_sd (x, m, y, z, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+ x =
+ _mm_maskz_getmant_sd (m, y, z, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+ x = _mm_getmant_round_sd (y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src,
+ _MM_FROUND_NO_EXC);
+ x =
+ _mm_mask_getmant_round_sd (x, m, y, z, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, _MM_FROUND_NO_EXC);
+ x =
+ _mm_maskz_getmant_round_sd (m, y, z, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-2.c
new file mode 100644
index 00000000000..22c0e4b6bc1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantsd-2.c
@@ -0,0 +1,100 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+#include <math.h>
+
+double
+get_norm_mant (double source, int signctrl, int interv)
+{
+ long long dest, src, sign, exp, fraction;
+ src = *(long long *) &source;
+ sign = (signctrl & 0x1) ? 0 : (src >> 63);
+ exp = (src & 0x7ff0000000000000) >> 52;
+ fraction = (src & 0xfffffffffffff);
+
+ if (isnan (source))
+ return signbit (source) ? -NAN : NAN;
+ if (source == 0.0 || source == -0.0 || isinf (source))
+ return sign ? -1.0 : 1.0;
+ if (signbit (source) && (signctrl & 0x2))
+ return -NAN;
+ if (!isnormal (source))
+ {
+ src = (src & 0xfff7ffffffffffff);
+ exp = 0x3ff;
+ while (!(src & 0x8000000000000))
+ {
+ src += fraction & 0x8000000000000;
+ fraction = fraction << 1;
+ exp--;
+ }
+ }
+
+ switch (interv)
+ {
+ case 0:
+ exp = 0x3ff;
+ break;
+ case 1:
+ exp = ((exp - 0x3ff) & 0x1) ? 0x3fe : 0x3ff;
+ break;
+ case 2:
+ exp = 0x3fe;
+ break;
+ case 3:
+ exp = (fraction & 0x8000000000000) ? 0x3fe : 0x3ff;
+ break;
+ default:
+ abort ();
+ }
+
+ dest = (sign << 63) | (exp << 52) | fraction;
+ return *(double *) &dest;
+}
+
+static void
+compute_vgetmantsd (double *r, double *s1, double *s2, int interv,
+ int signctrl)
+{
+ r[0] = get_norm_mant (s2[0], signctrl, interv);
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ int i, sign;
+ union128d res1, res2, res3, src1, src2;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[2];
+ int interv = _MM_MANT_NORM_p5_1;
+ int signctrl = _MM_MANT_SIGN_src;
+
+ src1.x = _mm_set_pd (-3.0, 111.111);
+ src2.x = _mm_set_pd (222.222, -2.0);
+ res2.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_getmant_sd (src1.x, src2.x, interv, signctrl);
+ res2.x =
+ _mm_mask_getmant_sd (res2.x, mask, src1.x, src2.x, interv,
+ signctrl);
+ res3.x =
+ _mm_maskz_getmant_sd (mask, src1.x, src2.x, interv, signctrl);
+
+ compute_vgetmantsd (res_ref, src1.a, src2.a, interv, signctrl);
+
+ if (check_union128d (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+ if (check_union128d (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+ if (check_union128d (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-1.c
new file mode 100644
index 00000000000..c088c02f6fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vgetmantss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[\\n\]" 2 } } */
+/* { dg-final { scan-assembler-times "vgetmantss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vgetmantss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vgetmantss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x, y, z;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_getmant_ss (y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src);
+ x =
+ _mm_mask_getmant_ss (x, m, y, z, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+ x =
+ _mm_maskz_getmant_ss (m, y, z, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+ x = _mm_getmant_round_ss (y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src,
+ _MM_FROUND_NO_EXC);
+ x =
+ _mm_mask_getmant_round_ss (x, m, y, z, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, _MM_FROUND_NO_EXC);
+ x =
+ _mm_maskz_getmant_round_ss (m, y, z, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-2.c
new file mode 100644
index 00000000000..25d186d9329
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vgetmantss-2.c
@@ -0,0 +1,104 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+#include <math.h>
+
+float
+get_norm_mant (float source, int signctrl, int interv)
+{
+ int dest, src, sign, exp, fraction;
+ src = *(int *) &source;
+ sign = (signctrl & 0x1) ? 0 : (src >> 31);
+ exp = (src & 0x7f800000) >> 23;
+ fraction = (src & 0x7fffff);
+
+ if (isnan (source))
+ return signbit (source) ? -NAN : NAN;
+ if (source == 0.0 || source == -0.0 || isinf (source))
+ return sign ? -1.0 : 1.0;
+ if (signbit (source) && (signctrl & 0x2))
+ return -NAN;
+ if (!isnormal (source))
+ {
+ src = (src & 0xffbfffff);
+ exp = 0x7f;
+ while (!(src & 0x400000))
+ {
+ src += fraction & 0x400000;
+ fraction = fraction << 1;
+ exp--;
+ }
+ }
+
+ switch (interv)
+ {
+ case 0:
+ exp = 0x7f;
+ break;
+ case 1:
+ exp = ((exp - 0x7f) & 0x1) ? 0x7e : 0x7f;
+ break;
+ case 2:
+ exp = 0x7e;
+ break;
+ case 3:
+ exp = (fraction & 0x400000) ? 0x7e : 0x7f;
+ break;
+ default:
+ abort ();
+ }
+
+ dest = (sign << 31) | (exp << 23) | fraction;
+ return *(float *) &dest;
+}
+
+static void
+compute_vgetmantss (float *r, float *s1, float *s2, int interv,
+ int signctrl)
+{
+ int i;
+ r[0] = get_norm_mant (s2[0], signctrl, interv);
+ for (i = 1; i < 4; i++)
+ {
+ r[i] = s1[i];
+ }
+}
+
+static void
+avx512f_test (void)
+{
+ int i, sign;
+ union128 res1, res2, res3, src1, src2;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref[4];
+ int interv = _MM_MANT_NORM_p5_1;
+ int signctrl = _MM_MANT_SIGN_src;
+
+ src1.x = _mm_set_ps (-24.043, 68.346, -43.35, 546.46);
+ src2.x = _mm_set_ps (222.222, 333.333, 444.444, -2.0);
+ res2.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_getmant_ss (src1.x, src2.x, interv, signctrl);
+ res2.x =
+ _mm_mask_getmant_ss (res2.x, mask, src1.x, src2.x, interv,
+ signctrl);
+ res3.x =
+ _mm_maskz_getmant_ss (mask, src1.x, src2.x, interv, signctrl);
+
+ compute_vgetmantss (res_ref, src1.a, src2.a, interv, signctrl);
+
+ if (check_union128 (res1, res_ref))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, 1);
+ if (check_union128 (res2, res_ref))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, 1);
+ if (check_union128 (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vinsertf32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vinsertf32x4-1.c
new file mode 100644
index 00000000000..b2caa53246c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vinsertf32x4-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vinsertf32x4\[^\n\]*zmm" 3 } } */
+/* { dg-final { scan-assembler-times "vinsertf32x4\[^\n\]*\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vinsertf32x4\[^\n\]*\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+__m128 y;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_insertf32x4 (x, y, 1);
+ x = _mm512_maskz_insertf32x4 (6, x, y, 1);
+ x = _mm512_mask_insertf32x4 (x, 2, x, y, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vinsertf32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vinsertf32x4-2.c
new file mode 100644
index 00000000000..86d253bb6ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vinsertf32x4-2.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void static
+CALC (UNION_TYPE (AVX512F_LEN,) s1, union128 s2, float *res_ref, int imm)
+{
+ memcpy (res_ref, s1.a, SIZE * sizeof (float));
+ memcpy (res_ref + imm * 4, s2.a, 16);
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) s1, res1, res2, res3;
+ union128 s2;
+ float res_ref[SIZE];
+ int j;
+
+ MASK_TYPE mask = 6 ^ (0xffd >> SIZE);
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s1.a[j] = j * j / 10.2;
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ for (j = 0; j < 4; j++)
+ s2.a[j] = j * j * j / 2.03;
+
+ res1.x = INTRINSIC (_insertf32x4) (s1.x, s2.x, 1);
+ res2.x = INTRINSIC (_mask_insertf32x4) (res2.x, mask, s1.x, s2.x, 1);
+ res3.x = INTRINSIC (_maskz_insertf32x4) (mask, s1.x, s2.x, 1);
+
+ CALC (s1, s2, res_ref, 1);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vinsertf64x4-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vinsertf64x4-1.c
new file mode 100644
index 00000000000..a4c74fd4863
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vinsertf64x4-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vinsertf64x4\[ \\t\]+\[^\n\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vinsertf64x4\[ \\t\]+\[^\n\]+\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vinsertf64x4\[ \\t\]+\[^\n\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __m256d y;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_insertf64x4 (x, y, 1);
+ x = _mm512_mask_insertf64x4 (x, 2, x, y, 1);
+ x = _mm512_maskz_insertf64x4 (2, x, y, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vinsertf64x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vinsertf64x4-2.c
new file mode 100644
index 00000000000..17871b85493
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vinsertf64x4-2.c
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O2 -mavx512f" } */
+
+#define SIZE (512 / 64)
+#include "avx512f-mask-type.h"
+#include <string.h>
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+void static
+avx512f_test (void)
+{
+ union512d s1, res, res2, res3;
+ union256d s2;
+ double res_ref[8];
+ MASK_TYPE mask = MASK_VALUE;
+ int j;
+
+ for (j = 0; j < 8; j++)
+ {
+ s1.a[j] = j * j + 1.6;
+ res2.a[j] = DEFAULT_VALUE;
+ }
+
+ for (j = 0; j < 4; j++)
+ s2.a[j] = j * j * j / 2.7;
+
+ res.x = _mm512_insertf64x4 (s1.x, s2.x, 0);
+ res2.x = _mm512_mask_insertf64x4 (res2.x, mask, s1.x, s2.x, 0);
+ res3.x = _mm512_maskz_insertf64x4 (mask, s1.x, s2.x, 0);
+
+ memcpy (res_ref, s1.a, 64);
+ memcpy (res_ref, s2.a, 32);
+
+ if (check_union512d (res, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (check_union512d (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (check_union512d (res3, res_ref))
+ abort ();
+
+ res.x = _mm512_insertf64x4 (s1.x, s2.x, 1);
+ res2.x = _mm512_mask_insertf64x4 (res2.x, mask, s1.x, s2.x, 1);
+ res3.x = _mm512_maskz_insertf64x4 (mask, s1.x, s2.x, 1);
+
+ memcpy (res_ref, s1.a, 64);
+ memcpy (res_ref + 4, s2.a, 32);
+
+ if (check_union512d (res, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (check_union512d (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (check_union512d (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vinserti32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vinserti32x4-1.c
new file mode 100644
index 00000000000..44c083137a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vinserti32x4-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vinserti32x4\[^\n\]*xmm\[^\n\]*zmm\[^\n\]*zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vinserti32x4\[^\n\]*\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vinserti32x4\[^\n\]*\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x,a;
+volatile __m128i y;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_maskz_inserti32x4 (6, x, y, 1);
+ x = _mm512_mask_inserti32x4 (a, 6, x, y, 1);
+ x = _mm512_inserti32x4 (x, y, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vinserti32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vinserti32x4-2.c
new file mode 100644
index 00000000000..0bfc0579779
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vinserti32x4-2.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void static
+CALC (UNION_TYPE (AVX512F_LEN, i_d) s1, union128i_d s2, int *res_ref, int imm)
+{
+ memcpy (res_ref, s1.a, SIZE * sizeof (int));
+ memcpy (res_ref + imm * 4, s2.a, 16);
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, res1, res2, res3;
+ union128i_d s2;
+ int res_ref[SIZE];
+ int j;
+
+ MASK_TYPE mask = 6 ^ (0xffd >> SIZE);
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s1.a[j] = j * j;
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ for (j = 0; j < 4; j++)
+ s2.a[j] = j * j * j;
+
+ res1.x = INTRINSIC (_inserti32x4) (s1.x, s2.x, 1);
+ res2.x = INTRINSIC (_mask_inserti32x4) (res2.x, mask, s1.x, s2.x, 1);
+ res3.x = INTRINSIC (_maskz_inserti32x4) (mask, s1.x, s2.x, 1);
+
+ CALC (s1, s2, res_ref, 1);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vinserti64x4-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vinserti64x4-1.c
new file mode 100644
index 00000000000..f5b7eff096d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vinserti64x4-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vinserti64x4\[ \\t\]+\[^\n\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vinserti64x4\[ \\t\]+\[^\n\]+\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vinserti64x4\[ \\t\]+\[^\n\]+\[^\n\]" 3 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_inserti64x4 (x, y, 1);
+ x = _mm512_mask_inserti64x4 (x, 2, x, y, 1);
+ x = _mm512_maskz_inserti64x4 (2, x, y, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vinserti64x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vinserti64x4-2.c
new file mode 100644
index 00000000000..58993ad5ed0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vinserti64x4-2.c
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O2 -mavx512f" } */
+
+#define SIZE (512 / 64)
+#include "avx512f-mask-type.h"
+#include <string.h>
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+void static
+avx512f_test (void)
+{
+ union512i_q s1, res, res2, res3;
+ union256i_q s2;
+ long long int res_ref[8];
+ MASK_TYPE mask = MASK_VALUE;
+ int j;
+
+ for (j = 0; j < 8; j++)
+ {
+ s1.a[j] = j * j;
+ res2.a[j] = DEFAULT_VALUE;
+ }
+
+ for (j = 0; j < 4; j++)
+ s2.a[j] = j * j * j;
+
+ res.x = _mm512_inserti64x4 (s1.x, s2.x, 0);
+ res2.x = _mm512_mask_inserti64x4 (res2.x, mask, s1.x, s2.x, 0);
+ res3.x = _mm512_maskz_inserti64x4 (mask, s1.x, s2.x, 0);
+
+ memcpy (res_ref, s1.a, 64);
+ memcpy (res_ref, s2.a, 32);
+
+ if (check_union512i_q (res, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (check_union512i_q (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (check_union512i_q (res3, res_ref))
+ abort ();
+
+ res.x = _mm512_inserti64x4 (s1.x, s2.x, 1);
+ res2.x = _mm512_mask_inserti64x4 (res2.x, mask, s1.x, s2.x, 1);
+ res3.x = _mm512_maskz_inserti64x4 (mask, s1.x, s2.x, 1);
+
+ memcpy (res_ref, s1.a, 64);
+ memcpy (res_ref + 4, s2.a, 32);
+
+ if (check_union512i_q (res, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (check_union512i_q (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (check_union512i_q (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxpd-1.c
new file mode 100644
index 00000000000..085a7e5e0c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxpd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vmaxpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vmaxpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmaxpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vmaxpd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vmaxpd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxpd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_max_pd (x, x);
+ x = _mm512_mask_max_pd (x, m, x, x);
+ x = _mm512_maskz_max_pd (m, x, x);
+ x = _mm512_max_round_pd (x, x, _MM_FROUND_NO_EXC);
+ x = _mm512_mask_max_round_pd (x, m, x, x, _MM_FROUND_NO_EXC);
+ x = _mm512_maskz_max_round_pd (m, x, x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxpd-2.c
new file mode 100644
index 00000000000..9a683b913eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxpd-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (double *r, double *s1, double *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] > s2[i] ? s1[i] : s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_max_pd) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_max_pd) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_max_pd) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxps-1.c
new file mode 100644
index 00000000000..564eeb516de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vmaxps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vmaxps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmaxps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vmaxps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vmaxps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_max_ps (x, x);
+ x = _mm512_mask_max_ps (x, m, x, x);
+ x = _mm512_maskz_max_ps (m, x, x);
+ x = _mm512_max_round_ps (x, x, _MM_FROUND_NO_EXC);
+ x = _mm512_mask_max_round_ps (x, m, x, x, _MM_FROUND_NO_EXC);
+ x = _mm512_maskz_max_round_ps (m, x, x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxps-2.c
new file mode 100644
index 00000000000..a49967ccb7e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxps-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (float *r, float *s1, float *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] > s2[i] ? s1[i] : s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, ) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_max_ps) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_max_ps) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_max_ps) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c
new file mode 100644
index 00000000000..39e968b4efb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_max_sd (x1, m, x1, x2);
+ x1 = _mm_maskz_max_sd (m, x1, x2);
+ x1 = _mm_max_round_sd (x1, x2, _MM_FROUND_NO_EXC);
+ x1 = _mm_mask_max_round_sd (x1, m, x1, x2, _MM_FROUND_NO_EXC);
+ x1 = _mm_maskz_max_round_sd (m, x1, x2, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-2.c
new file mode 100644
index 00000000000..dfb1063b521
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-2.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_maxsd (double * src1, double *src2, double *dst)
+{
+ int i;
+ dst[0] = src1[0] > src2[0] ? src1[0]: src2[0];
+ dst[1] = src1[1];
+}
+
+void static
+avx512f_test (void)
+{
+ int i;
+ union128d src1, src2, res1, res2;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[2];
+
+ for (i = 0; i < 2; i++)
+ {
+ src1.a[i] = 1. / (i + 1);
+ src2.a[i] = i;
+ }
+
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_max_sd (res1.x, mask, src1.x, src2.x);
+ res2.x = _mm_maskz_max_sd (mask, src1.x, src2.x);
+
+ compute_maxsd (src1.a, src2.a, res_ref);
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+ if (check_union128d (res1, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+ if (check_union128d (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c
new file mode 100644
index 00000000000..386f6ac699f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_max_ss (x1, m, x1, x2);
+ x1 = _mm_maskz_max_ss (m, x1, x2);
+ x1 = _mm_max_round_ss (x1, x2, _MM_FROUND_NO_EXC);
+ x1 = _mm_mask_max_round_ss (x1, m, x1, x2, _MM_FROUND_NO_EXC);
+ x1 = _mm_maskz_max_round_ss (m, x1, x2, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-2.c
new file mode 100644
index 00000000000..c3658e0a243
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-2.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_maxss (float * src1, float *src2, float *dst)
+{
+ int i;
+ dst[0] = src1[0] > src2[0] ? src1[0]: src2[0];
+ for (i = 1; i < 4; i++)
+ dst[i] = src1[i];
+}
+
+void static
+avx512f_test (void)
+{
+ int i;
+ union128 src1, src2, res1, res2;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref[4];
+
+ for (i = 0; i < 4; i++)
+ {
+ src1.a[i] = 1. / (i + 1);
+ src2.a[i] = i;
+ }
+
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_max_ss (res1.x, mask, src1.x, src2.x);
+ res2.x = _mm_maskz_max_ss (mask, src1.x, src2.x);
+
+ compute_maxss (src1.a, src2.a, res_ref);
+
+ MASK_MERGE () (res_ref, mask, 1);
+ if (check_union128 (res1, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, 1);
+ if (check_union128 (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vminpd-1.c
new file mode 100644
index 00000000000..a4c993e6431
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminpd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vminpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vminpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vminpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vminpd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vminpd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vminpd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_min_pd (x, x);
+ x = _mm512_mask_min_pd (x, m, x, x);
+ x = _mm512_maskz_min_pd (m, x, x);
+ x = _mm512_min_round_pd (x, x, _MM_FROUND_NO_EXC);
+ x = _mm512_mask_min_round_pd (x, m, x, x, _MM_FROUND_NO_EXC);
+ x = _mm512_maskz_min_round_pd (m, x, x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vminpd-2.c
new file mode 100644
index 00000000000..982dff4dc4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminpd-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (double *r, double *s1, double *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] < s2[i] ? s1[i] : s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_min_pd) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_min_pd) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_min_pd) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vminps-1.c
new file mode 100644
index 00000000000..3cd5904bcc5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vminps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vminps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vminps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vminps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vminps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vminps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_min_ps (x, x);
+ x = _mm512_mask_min_ps (x, m, x, x);
+ x = _mm512_maskz_min_ps (m, x, x);
+ x = _mm512_min_round_ps (x, x, _MM_FROUND_NO_EXC);
+ x = _mm512_mask_min_round_ps (x, m, x, x, _MM_FROUND_NO_EXC);
+ x = _mm512_maskz_min_round_ps (m, x, x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vminps-2.c
new file mode 100644
index 00000000000..b7f668201ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminps-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (float *r, float *s1, float *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] < s2[i] ? s1[i] : s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, ) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_min_ps) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_min_ps) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_min_ps) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c
new file mode 100644
index 00000000000..e934e01767e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_min_sd (x1, m, x1, x2);
+ x1 = _mm_maskz_min_sd (m, x1, x2);
+ x1 = _mm_min_round_sd (x1, x2, _MM_FROUND_NO_EXC);
+ x1 = _mm_mask_min_round_sd (x1, m, x1, x2, _MM_FROUND_NO_EXC);
+ x1 = _mm_maskz_min_round_sd (m, x1, x2, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-2.c
new file mode 100644
index 00000000000..5dac6286cba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-2.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_minsd (double * src1, double *src2, double *dst)
+{
+ int i;
+ dst[0] = src1[0] < src2[0] ? src1[0]: src2[0];
+ dst[1] = src1[1];
+}
+
+void static
+avx512f_test (void)
+{
+ int i;
+ union128d src1, src2, res1, res2;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[2];
+
+ for (i = 0; i < 2; i++)
+ {
+ src1.a[i] = 1. / (i + 1);
+ src2.a[i] = i;
+ }
+
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_min_sd (res1.x, mask, src1.x, src2.x);
+ res2.x = _mm_maskz_min_sd (mask, src1.x, src2.x);
+
+ compute_minsd (src1.a, src2.a, res_ref);
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+ if (check_union128d (res1, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+ if (check_union128d (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c
new file mode 100644
index 00000000000..a02ea951668
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_min_ss (x1, m, x1, x2);
+ x1 = _mm_maskz_min_ss (m, x1, x2);
+ x1 = _mm_min_round_ss (x1, x2, _MM_FROUND_NO_EXC);
+ x1 = _mm_mask_min_round_ss (x1, m, x1, x2, _MM_FROUND_NO_EXC);
+ x1 = _mm_maskz_min_round_ss (m, x1, x2, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vminss-2.c
new file mode 100644
index 00000000000..aab16dd970d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminss-2.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_minss (float * src1, float *src2, float *dst)
+{
+ int i;
+ dst[0] = src1[0] < src2[0] ? src1[0]: src2[0];
+ for (i = 1; i < 4; i++)
+ dst[i] = src1[i];
+}
+
+void static
+avx512f_test (void)
+{
+ int i;
+ union128 src1, src2, res1, res2;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref[4];
+
+ for (i = 0; i < 4; i++)
+ {
+ src1.a[i] = 1. / (i + 1);
+ src2.a[i] = i;
+ }
+
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_min_ss (res1.x, mask, src1.x, src2.x);
+ res2.x = _mm_maskz_min_ss (mask, src1.x, src2.x);
+
+ compute_minss (src1.a, src2.a, res_ref);
+
+ MASK_MERGE () (res_ref, mask, 1);
+ if (check_union128 (res1, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, 1);
+ if (check_union128 (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c
new file mode 100644
index 00000000000..9cae38ff3fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+double *p;
+volatile __m512d x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_mask_mov_pd (x1, m, x2);
+ x1 = _mm512_maskz_mov_pd (m, x2);
+
+ x1 = _mm512_load_pd (p);
+ x1 = _mm512_mask_load_pd (x1, m, p);
+ x1 = _mm512_maskz_load_pd (m, p);
+
+ _mm512_store_pd (p, x1);
+ _mm512_mask_store_pd (p, m, x1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-2.c
new file mode 100644
index 00000000000..fec0e763889
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovapd-2.c
@@ -0,0 +1,69 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE ((AVX512F_LEN) / 64)
+#include "avx512f-mask-type.h"
+#define ALIGN ((AVX512F_LEN) / 8)
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s2, s3, res1, res3, res4, res5, res6;
+ MASK_TYPE mask = MASK_VALUE;
+ double s1[SIZE] __attribute__ ((aligned (ALIGN)));
+ double res2[SIZE] __attribute__ ((aligned (ALIGN)));
+ double res7[SIZE] __attribute__ ((aligned (ALIGN)));
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1[i] = 12.34 * (i + 2000) * sign;
+ s2.a[i] = 56.78 * (i - 30) * sign;
+ s3.a[i] = 90.12 * (i + 40) * sign;
+ res3.a[i] = DEFAULT_VALUE;
+ res5.a[i] = DEFAULT_VALUE;
+ res7[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_load_pd) (s1);
+ INTRINSIC (_store_pd) (res2, s2.x);
+#endif
+ res3.x = INTRINSIC (_mask_mov_pd) (res3.x, mask, s3.x);
+ res4.x = INTRINSIC (_maskz_mov_pd) (mask, s3.x);
+ res5.x = INTRINSIC (_mask_load_pd) (res5.x, mask, s1);
+ res6.x = INTRINSIC (_maskz_load_pd) (mask, s1);
+ INTRINSIC (_mask_store_pd) (res7, mask, s2.x);
+
+#if AVX512F_LEN == 512
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, s1))
+ abort ();
+
+ if (UNION_CHECK (AVX512F_LEN, d) (s2, res2))
+ abort ();
+#endif
+
+ MASK_MERGE (d) (s3.a, mask, SIZE);
+ if (checkVd (res3.a, s3.a, SIZE))
+ abort ();
+
+ MASK_ZERO (d) (s3.a, mask, SIZE);
+ if (checkVd (res4.a, s3.a, SIZE))
+ abort ();
+
+ MASK_MERGE (d) (s1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res5, s1))
+ abort ();
+
+ MASK_ZERO (d) (s1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res6, s1))
+ abort ();
+
+ MASK_MERGE (d) (s2.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (s2, res7))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c
new file mode 100644
index 00000000000..217e29ccb38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+float *p;
+volatile __m512 x1, x2;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_mask_mov_ps (x1, m, x2);
+ x1 = _mm512_maskz_mov_ps (m, x2);
+
+ x1 = _mm512_load_ps (p);
+ x1 = _mm512_mask_load_ps (x1, m, p);
+ x1 = _mm512_maskz_load_ps (m, p);
+
+ _mm512_store_ps (p, x1);
+ _mm512_mask_store_ps (p, m, x1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-2.c
new file mode 100644
index 00000000000..ddb9ca74041
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovaps-2.c
@@ -0,0 +1,69 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE ((AVX512F_LEN) / 32)
+#include "avx512f-mask-type.h"
+#define ALIGN ((AVX512F_LEN) / 8)
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s2, s3, res1, res3, res4, res5, res6;
+ MASK_TYPE mask = MASK_VALUE;
+ float s1[SIZE] __attribute__ ((aligned (ALIGN)));
+ float res2[SIZE] __attribute__ ((aligned (ALIGN)));
+ float res7[SIZE] __attribute__ ((aligned (ALIGN)));
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1[i] = 12.34 * (i + 2000) * sign;
+ s2.a[i] = 56.78 * (i - 30) * sign;
+ s3.a[i] = 90.12 * (i + 40) * sign;
+ res3.a[i] = DEFAULT_VALUE;
+ res5.a[i] = DEFAULT_VALUE;
+ res7[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_load_ps) (s1);
+ INTRINSIC (_store_ps) (res2, s2.x);
+#endif
+ res3.x = INTRINSIC (_mask_mov_ps) (res3.x, mask, s3.x);
+ res4.x = INTRINSIC (_maskz_mov_ps) (mask, s3.x);
+ res5.x = INTRINSIC (_mask_load_ps) (res5.x, mask, s1);
+ res6.x = INTRINSIC (_maskz_load_ps) (mask, s1);
+ INTRINSIC (_mask_store_ps) (res7, mask, s2.x);
+
+#if AVX512F_LEN == 512
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, s1))
+ abort ();
+
+ if (UNION_CHECK (AVX512F_LEN, ) (s2, res2))
+ abort ();
+#endif
+
+ MASK_MERGE () (s3.a, mask, SIZE);
+ if (checkVf (res3.a, s3.a, SIZE))
+ abort ();
+
+ MASK_ZERO () (s3.a, mask, SIZE);
+ if (checkVf (res4.a, s3.a, SIZE))
+ abort ();
+
+ MASK_MERGE () (s1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res5, s1))
+ abort ();
+
+ MASK_ZERO () (s1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res6, s1))
+ abort ();
+
+ MASK_MERGE () (s2.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (s2, res7))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovddup-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovddup-1.c
new file mode 100644
index 00000000000..ccaa078ef74
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovddup-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovddup\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]|vunpcklpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vmovddup\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]|vunpcklpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovddup\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}|vunpcklpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x1, x2;
+volatile __mmask8 m8;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_movedup_pd (x2);
+ x1 = _mm512_mask_movedup_pd (x1, m8, x2);
+ x1 = _mm512_maskz_movedup_pd (m8, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovddup-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovddup-2.c
new file mode 100644
index 00000000000..02f7b8cd7f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovddup-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void static
+CALC (double *s, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE/2; i++)
+ {
+ r[2 * i] = s[2 * i];
+ r[2 * i + 1] = s[2 * i];
+ }
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = i * 123.2 + 32.6;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_movedup_pd) (s.x);
+ res2.x = INTRINSIC (_mask_movedup_pd) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_movedup_pd) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c
new file mode 100644
index 00000000000..1bfd2a591b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+int *p;
+volatile __m512i x1, x2;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_mask_mov_epi32 (x1, m, x2);
+ x1 = _mm512_maskz_mov_epi32 (m, x2);
+
+ x1 = _mm512_load_si512 (p);
+ x1 = _mm512_load_epi32 (p);
+ x1 = _mm512_mask_load_epi32 (x1, m, p);
+ x1 = _mm512_maskz_load_epi32 (m, p);
+
+ _mm512_store_si512 (p, x1);
+ _mm512_store_epi32 (p, x1);
+ _mm512_mask_store_epi32 (p, m, x1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-2.c
new file mode 100644
index 00000000000..d6ad98c9fe7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-2.c
@@ -0,0 +1,78 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE ((AVX512F_LEN) / 32)
+#include "avx512f-mask-type.h"
+#define ALIGN ((AVX512F_LEN) / 8)
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s2, s3, res1, res2, res5, res6, res7, res8;
+ MASK_TYPE mask = MASK_VALUE;
+ int s1[SIZE] __attribute__ ((aligned (ALIGN)));
+ int res3[SIZE] __attribute__ ((aligned (ALIGN)));
+ int res4[SIZE] __attribute__ ((aligned (ALIGN)));
+ int res9[SIZE] __attribute__ ((aligned (ALIGN)));
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1[i] = 1234 * (i + 2000) * sign;
+ s2.a[i] = 5678 * (i - 30) * sign;
+ s3.a[i] = 9012 * (i + 40) * sign;
+ res5.a[i] = DEFAULT_VALUE;
+ res7.a[i] = DEFAULT_VALUE;
+ res9[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_load_si512) (s1);
+ res2.x = INTRINSIC (_load_epi32) (s1);
+ INTRINSIC (_store_si512) (res3, s2.x);
+ INTRINSIC (_store_epi32) (res4, s2.x);
+#endif
+ res5.x = INTRINSIC (_mask_mov_epi32) (res5.x, mask, s3.x);
+ res6.x = INTRINSIC (_maskz_mov_epi32) (mask, s3.x);
+ res7.x = INTRINSIC (_mask_load_epi32) (res7.x, mask, s1);
+ res8.x = INTRINSIC (_maskz_load_epi32) (mask, s1);
+ INTRINSIC (_mask_store_epi32) (res9, mask, s2.x);
+
+#if AVX512F_LEN == 512
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, s1))
+ abort ();
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, s1))
+ abort ();
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (s2, res3))
+ abort ();
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (s2, res4))
+ abort ();
+#endif
+
+ MASK_MERGE (i_d) (s3.a, mask, SIZE);
+ if (checkVi (res5.a, s3.a, SIZE))
+ abort ();
+
+ MASK_ZERO (i_d) (s3.a, mask, SIZE);
+ if (checkVi (res6.a, s3.a, SIZE))
+ abort ();
+
+ MASK_MERGE (i_d) (s1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res7, s1))
+ abort ();
+
+ MASK_ZERO (i_d) (s1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res8, s1))
+ abort ();
+
+ MASK_MERGE (i_d) (s2.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (s2, res9))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c
new file mode 100644
index 00000000000..81f958adb77
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+long long *p;
+volatile __m512i x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm512_mask_mov_epi64 (x1, m, x2);
+ x1 = _mm512_maskz_mov_epi64 (m, x2);
+
+ x1 = _mm512_load_epi64 (p);
+ x1 = _mm512_mask_load_epi64 (x1, m, p);
+ x1 = _mm512_maskz_load_epi64 (m, p);
+
+ _mm512_store_epi64 (p, x1);
+ _mm512_mask_store_epi64 (p, m, x1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-2.c
new file mode 100644
index 00000000000..6388348f8c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-2.c
@@ -0,0 +1,69 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE ((AVX512F_LEN) / 64)
+#include "avx512f-mask-type.h"
+#define ALIGN ((AVX512F_LEN) / 8)
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s2, s3, res1, res3, res4, res5, res6;
+ MASK_TYPE mask = MASK_VALUE;
+ long long s1[SIZE] __attribute__ ((aligned (ALIGN)));
+ long long res2[SIZE] __attribute__ ((aligned (ALIGN)));
+ long long res7[SIZE] __attribute__ ((aligned (ALIGN)));
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1[i] = 1234 * (i + 2000) * sign;
+ s2.a[i] = 5678 * (i - 30) * sign;
+ s3.a[i] = 9012 * (i + 40) * sign;
+ res3.a[i] = DEFAULT_VALUE;
+ res5.a[i] = DEFAULT_VALUE;
+ res7[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = INTRINSIC (_load_epi64) (s1);
+ INTRINSIC (_store_epi64) (res2, s2.x);
+#endif
+ res3.x = INTRINSIC (_mask_mov_epi64) (res3.x, mask, s3.x);
+ res4.x = INTRINSIC (_maskz_mov_epi64) (mask, s3.x);
+ res5.x = INTRINSIC (_mask_load_epi64) (res5.x, mask, s1);
+ res6.x = INTRINSIC (_maskz_load_epi64) (mask, s1);
+ INTRINSIC (_mask_store_epi64) (res7, mask, s2.x);
+
+#if AVX512F_LEN == 512
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, s1))
+ abort ();
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (s2, res2))
+ abort ();
+#endif
+
+ MASK_MERGE (i_q) (s3.a, mask, SIZE);
+ if (checkVl (res3.a, s3.a, SIZE))
+ abort ();
+
+ MASK_ZERO (i_q) (s3.a, mask, SIZE);
+ if (checkVl (res4.a, s3.a, SIZE))
+ abort ();
+
+ MASK_MERGE (i_q) (s1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res5, s1))
+ abort ();
+
+ MASK_ZERO (i_q) (s1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res6, s1))
+ abort ();
+
+ MASK_MERGE (i_q) (s2.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (s2, res7))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c
new file mode 100644
index 00000000000..b8af781834e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+int *p;
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_loadu_si512 (p);
+ x = _mm512_mask_loadu_epi32 (x, m, p);
+ x = _mm512_maskz_loadu_epi32 (m, p);
+
+ _mm512_storeu_si512 (p, x);
+ _mm512_mask_storeu_epi32 (p, m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-2.c
new file mode 100644
index 00000000000..3a3851fae54
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-2.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE ((AVX512F_LEN) / 32)
+#include "avx512f-mask-type.h"
+
+typedef struct
+{
+ char c;
+ int a[SIZE];
+} __attribute__ ((packed)) EVAL(unaligned_array, AVX512F_LEN,);
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s2, res1, res3, res4;
+ EVAL(unaligned_array, AVX512F_LEN,) s1, res2, res5;
+ MASK_TYPE mask = MASK_VALUE;
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 12345 * (i + 2000) * sign;
+ s2.a[i] = 67890 * (i + 2000) * sign;
+ res3.a[i] = DEFAULT_VALUE;
+ res5.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+#if AVX512F_LEN == 512
+ res1.x = _mm512_loadu_si512 (s1.a);
+ _mm512_storeu_si512 (res2.a, s2.x);
+#endif
+ res3.x = INTRINSIC (_mask_loadu_epi32) (res3.x, mask, s1.a);
+ res4.x = INTRINSIC (_maskz_loadu_epi32) (mask, s1.a);
+ INTRINSIC (_mask_storeu_epi32) (res5.a, mask, s2.x);
+
+#if AVX512F_LEN == 512
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, s1.a))
+ abort ();
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (s2, res2.a))
+ abort ();
+#endif
+
+ MASK_MERGE (i_d) (s1.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, s1.a))
+ abort ();
+
+ MASK_ZERO (i_d) (s1.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res4, s1.a))
+ abort ();
+
+ MASK_MERGE (i_d) (s2.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (s2, res5.a))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-1.c
new file mode 100644
index 00000000000..806d1f5867a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+long long *p;
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask_loadu_epi64 (x, m, p);
+ x = _mm512_maskz_loadu_epi64 (m, p);
+
+ _mm512_mask_storeu_epi64 (p, m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-2.c
new file mode 100644
index 00000000000..d73499ee278
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu64-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE ((AVX512F_LEN) / 64)
+#include "avx512f-mask-type.h"
+
+typedef struct
+{
+ char c;
+ long long a[SIZE];
+} __attribute__ ((packed)) EVAL(unaligned_array, AVX512F_LEN,);
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s2, res1, res2;
+ EVAL(unaligned_array, AVX512F_LEN,) s1, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 12345 * (i + 2000) * sign;
+ s2.a[i] = 67890 * (i + 2000) * sign;
+ res1.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_mask_loadu_epi64) (res1.x, mask, s1.a);
+ res2.x = INTRINSIC (_maskz_loadu_epi64) (mask, s1.a);
+ INTRINSIC (_mask_storeu_epi64) (res3.a, mask, s2.x);
+
+ MASK_MERGE (i_q) (s1.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, s1.a))
+ abort ();
+
+ MASK_ZERO (i_q) (s1.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, s1.a))
+ abort ();
+
+ MASK_MERGE (i_q) (s2.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (s2, res3.a))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovntdq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovntdq-1.c
new file mode 100644
index 00000000000..7a3ba47b1cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovntdq-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "vmovntdq\[ \\t\]+\[^\n\]*%zmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+__m512i *x;
+volatile __m512i y;
+
+void extern
+avx512f_test (void)
+{
+ _mm512_stream_si512 (x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovntdq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovntdq-2.c
new file mode 100644
index 00000000000..7b200e37d15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovntdq-2.c
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void static
+avx512f_test (void)
+{
+ union512i_q s, res;
+
+ s.x = _mm512_set_epi64 (39578, -429496, 7856, 0, 85632, -1234, 47563, -1);
+ _mm512_stream_si512 (&res.x, s.x);
+
+ if (check_union512i_q (s, res.a))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovntpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovntpd-1.c
new file mode 100644
index 00000000000..a02162124b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovntpd-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "vmovntpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+double *x;
+volatile __m512d y;
+
+void extern
+avx512f_test (void)
+{
+ _mm512_stream_pd (x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovntpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovntpd-2.c
new file mode 100644
index 00000000000..96c26c21ef4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovntpd-2.c
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void static
+avx512f_test (void)
+{
+ union512d s;
+ double res[8];
+
+ s.x = _mm512_set_pd (-39578.467285, 4294967295.1, -7856.342941, 0,
+ 85632.783567, 1234.9999, 47563.234215, -1.07);
+ _mm512_stream_pd (res, s.x);
+
+ if (check_union512d (s, res))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovntps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovntps-1.c
new file mode 100644
index 00000000000..933f01518aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovntps-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "vmovntps\[ \\t\]+\[^\n\]*%zmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+float *x;
+volatile __m512 y;
+
+void extern
+avx512f_test (void)
+{
+ _mm512_stream_ps (x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovntps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovntps-2.c
new file mode 100644
index 00000000000..9f4c7cb5ab2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovntps-2.c
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+void static
+avx512f_test (void)
+{
+ union512 s;
+ float res[16];
+
+ s.x = _mm512_set_ps (-39578.467285, 4294967295.1, -7856.342941, 0,
+ 85632.783567, 1234.9999, 47563.234215, -1.07,
+ 3453.65743, -1234.9999, 67.234, -1,
+ 0.336624, 34534543, 4345.234234, -1.07234234);
+
+ _mm512_stream_ps (res, s.x);
+
+ if (check_union512 (s, res))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovsd-1.c
new file mode 100644
index 00000000000..d9dae843096
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovsd-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsd\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsd\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+double *p;
+volatile __m128d x1, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_move_sd (x1, m, x2, x3);
+ x1 = _mm_maskz_move_sd (m, x2, x3);
+
+ x1 = _mm_mask_load_sd (x1, m, p);
+ x1 = _mm_maskz_load_sd (m, p);
+
+ _mm_mask_store_sd (p, m, x1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovsd-2.c
new file mode 100644
index 00000000000..9f9772f8762
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovsd-2.c
@@ -0,0 +1,87 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_movesd (double *s1, double *s2, double *r)
+{
+ r[0] = s2[0];
+ r[1] = s1[1];
+}
+
+static void
+compute_loadsd (double *s, double *r)
+{
+ r[0] = s[0];
+ r[1] = 0.0;
+}
+
+static void
+compute_storesd (double *res, double *s, double *r)
+{
+ r[0] = s[0];
+ r[1] = res[1];
+}
+
+static void
+avx512f_test (void)
+{
+ union128d s1, s2, s4, res1, res2, res3, res4;
+ __mmask8 mask = MASK_VALUE;
+ double s3[SIZE];
+ double res5[SIZE] = { 0.0 };
+ double res_ref1[SIZE];
+ double res_ref2[SIZE];
+ double res_ref3[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 12.34 * (i + 1) * sign;
+ s2.a[i] = 56.78 * (i + 2) * sign;
+ s3[i] = 9000.12 * (i + 3) * sign;
+ s4.a[i] = 34.56 * (i + 4) * sign;
+ sign = -sign;
+ }
+
+ res1.a[0] = DEFAULT_VALUE;
+ res3.a[0] = DEFAULT_VALUE;
+ res5[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_move_sd (res1.x, mask, s1.x, s2.x);
+ res2.x = _mm_maskz_move_sd (mask, s1.x, s2.x);
+ res3.x = _mm_mask_load_sd (res3.x, mask, s3);
+ res4.x = _mm_maskz_load_sd (mask, s3);
+ _mm_mask_store_sd (res5, mask, s4.x);
+
+ compute_movesd (s1.a, s2.a, res_ref1);
+ compute_loadsd (s3, res_ref2);
+ compute_storesd (res5, s4.a, res_ref3);
+
+ MASK_MERGE (d) (res_ref1, mask, 1);
+ if (check_union128d (res1, res_ref1))
+ abort ();
+
+ MASK_ZERO (d) (res_ref1, mask, 1);
+ if (check_union128d (res2, res_ref1))
+ abort ();
+
+ MASK_MERGE (d) (res_ref2, mask, 1);
+ if (checkVd (res3.a, res_ref2, SIZE))
+ abort ();
+
+ MASK_ZERO (d) (res_ref2, mask, 1);
+ if (checkVd (res4.a, res_ref2, SIZE))
+ abort ();
+
+ MASK_MERGE (d) (res_ref3, mask, 1);
+ if (checkVd (res_ref3, res5, SIZE))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovshdup-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovshdup-1.c
new file mode 100644
index 00000000000..b23df0a00ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovshdup-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovshdup\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vmovshdup\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovshdup\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_movehdup_ps (x);
+ x = _mm512_mask_movehdup_ps (x, m, x);
+ x = _mm512_maskz_movehdup_ps (m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovshdup-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovshdup-2.c
new file mode 100644
index 00000000000..5fb52302673
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovshdup-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+void static
+CALC (float *s, float *r)
+{
+ int i;
+
+ for (i = 1; i < SIZE; i += 2)
+ {
+ r[i - 1] = r[i] = s[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = i * 123.2 + 32.6;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_movehdup_ps) (s.x);
+ res2.x = INTRINSIC (_mask_movehdup_ps) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_movehdup_ps) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovsldup-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovsldup-1.c
new file mode 100644
index 00000000000..f2fd4e07d2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovsldup-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovsldup\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vmovsldup\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsldup\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_moveldup_ps (x);
+ x = _mm512_mask_moveldup_ps (x, m, x);
+ x = _mm512_maskz_moveldup_ps (m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovsldup-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovsldup-2.c
new file mode 100644
index 00000000000..1a6a3fd979a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovsldup-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+void static
+CALC (float *s, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i += 2)
+ {
+ r[i] = r[i + 1] = s[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = i * 123.2 + 32.6;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_moveldup_ps) (s.x);
+ res2.x = INTRINSIC (_mask_moveldup_ps) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_moveldup_ps) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovss-1.c
new file mode 100644
index 00000000000..1e8cf37fcd3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovss-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+float *p;
+volatile __m128 x1, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_move_ss (x1, m, x2, x3);
+ x1 = _mm_maskz_move_ss (m, x2, x3);
+
+ x1 = _mm_mask_load_ss (x1, m, p);
+ x1 = _mm_maskz_load_ss (m, p);
+
+ _mm_mask_store_ss (p, m, x1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovss-2.c
new file mode 100644
index 00000000000..a7971500d5c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovss-2.c
@@ -0,0 +1,91 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_movess (float *s1, float *s2, float *r)
+{
+ r[0] = s2[0];
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+compute_loadss (float *s, float *r)
+{
+ r[0] = s[0];
+ r[1] = r[2] = r[3] = 0.0;
+}
+
+static void
+compute_storess (float *res, float *s, float *r)
+{
+ r[0] = s[0];
+ r[1] = res[1];
+ r[2] = res[2];
+ r[3] = res[3];
+}
+
+static void
+avx512f_test (void)
+{
+ union128 s1, s2, s4, res1, res2, res3, res4;
+ __mmask8 mask = MASK_VALUE;
+ float s3[SIZE];
+ float res5[SIZE] = { 0.0 };
+ float res_ref1[SIZE];
+ float res_ref2[SIZE];
+ float res_ref3[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 12.34 * (i + 1) * sign;
+ s2.a[i] = 56.78 * (i + 2) * sign;
+ s3[i] = 9000.12 * (i + 3) * sign;
+ s4.a[i] = 34.56 * (i + 4) * sign;
+ sign = -sign;
+ }
+
+ res1.a[0] = DEFAULT_VALUE;
+ res3.a[0] = DEFAULT_VALUE;
+ res5[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_move_ss (res1.x, mask, s1.x, s2.x);
+ res2.x = _mm_maskz_move_ss (mask, s1.x, s2.x);
+ res3.x = _mm_mask_load_ss (res3.x, mask, s3);
+ res4.x = _mm_maskz_load_ss (mask, s3);
+ _mm_mask_store_ss (res5, mask, s4.x);
+
+ compute_movess (s1.a, s2.a, res_ref1);
+ compute_loadss (s3, res_ref2);
+ compute_storess (res5, s4.a, res_ref3);
+
+ MASK_MERGE () (res_ref1, mask, 1);
+ if (check_union128 (res1, res_ref1))
+ abort ();
+
+ MASK_ZERO () (res_ref1, mask, 1);
+ if (check_union128 (res2, res_ref1))
+ abort ();
+
+ MASK_MERGE () (res_ref2, mask, 1);
+ if (checkVf (res3.a, res_ref2, SIZE))
+ abort ();
+
+ MASK_ZERO () (res_ref2, mask, 1);
+ if (checkVf (res4.a, res_ref2, SIZE))
+ abort ();
+
+ MASK_MERGE () (res_ref3, mask, 1);
+ if (checkVf (res_ref3, res5, SIZE))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovupd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovupd-1.c
new file mode 100644
index 00000000000..f505819e3fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovupd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovupd\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovupd\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovupd\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovupd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovupd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+double *p;
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_loadu_pd (p);
+ x = _mm512_mask_loadu_pd (x, m, p);
+ x = _mm512_maskz_loadu_pd (m, p);
+
+ _mm512_storeu_pd (p, x);
+ _mm512_mask_storeu_pd (p, m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovupd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovupd-2.c
new file mode 100644
index 00000000000..0f2c42162d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovupd-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, s2;
+ MASK_TYPE mask = MASK_VALUE;
+ double s1[SIZE];
+ double res4[SIZE];
+ double res5[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1[i] = 123.456 * (i + 2000) * sign;
+ s2.a[i] = 789.012 * (i + 3000) * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ res5[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_loadu_pd) (s1);
+ res2.x = INTRINSIC (_mask_loadu_pd) (res2.x, mask, s1);
+ res3.x = INTRINSIC (_maskz_loadu_pd) (mask, s1);
+ INTRINSIC (_storeu_pd) (res4, s2.x);
+ INTRINSIC (_mask_storeu_pd) (res5, mask, s2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, s1))
+ abort ();
+
+ MASK_MERGE (d) (s1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, s1))
+ abort ();
+
+ MASK_ZERO (d) (s1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, s1))
+ abort ();
+
+ if (UNION_CHECK (AVX512F_LEN, d) (s2, res4))
+ abort ();
+
+ MASK_MERGE (d) (s2.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (s2, res5))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovups-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovups-1.c
new file mode 100644
index 00000000000..93b76876ceb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovups-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+float *p;
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_loadu_ps (p);
+ x = _mm512_mask_loadu_ps (x, m, p);
+ x = _mm512_maskz_loadu_ps (m, p);
+
+ _mm512_storeu_ps (p, x);
+ _mm512_mask_storeu_ps (p, m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovups-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovups-2.c
new file mode 100644
index 00000000000..4016bbacac9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovups-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) res1, res2, res3, s2;
+ MASK_TYPE mask = MASK_VALUE;
+ float s1[SIZE];
+ float res4[SIZE];
+ float res5[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1[i] = 123.456 * (i + 2000) * sign;
+ s2.a[i] = 789.012 * (i + 3000) * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ res5[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_loadu_ps) (s1);
+ res2.x = INTRINSIC (_mask_loadu_ps) (res2.x, mask, s1);
+ res3.x = INTRINSIC (_maskz_loadu_ps) (mask, s1);
+ INTRINSIC (_storeu_ps) (res4, s2.x);
+ INTRINSIC (_mask_storeu_ps) (res5, mask, s2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, s1))
+ abort ();
+
+ MASK_MERGE () (s1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, s1))
+ abort ();
+
+ MASK_ZERO () (s1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res3, s1))
+ abort ();
+
+ if (UNION_CHECK (AVX512F_LEN, ) (s2, res4))
+ abort ();
+
+ MASK_MERGE () (s2.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (s2, res5))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulpd-1.c
new file mode 100644
index 00000000000..fd3e3ac6942
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulpd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vmulpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vmulpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmulpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vmulpd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmulpd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmulpd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mul_pd (x, x);
+ x = _mm512_mask_mul_pd (x, m, x, x);
+ x = _mm512_maskz_mul_pd (m, x, x);
+ x = _mm512_mul_round_pd (x, x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_mul_round_pd (x, m, x, x, _MM_FROUND_TO_NEG_INF);
+ x = _mm512_maskz_mul_round_pd (m, x, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulpd-2.c
new file mode 100644
index 00000000000..8f235a036ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulpd-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (double *r, double *s1, double *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] * s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_mul_pd) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_mul_pd) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_mul_pd) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulps-1.c
new file mode 100644
index 00000000000..e86f8972174
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vmulps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vmulps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmulps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vmulps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmulps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmulps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mul_ps (x, x);
+ x = _mm512_mask_mul_ps (x, m, x, x);
+ x = _mm512_maskz_mul_ps (m, x, x);
+ x = _mm512_mul_round_ps (x, x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_mul_round_ps (x, m, x, x, _MM_FROUND_TO_POS_INF);
+ x = _mm512_maskz_mul_round_ps (m, x, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulps-2.c
new file mode 100644
index 00000000000..5f50698b008
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulps-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (float *r, float *s1, float *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] * s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN,) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_mul_ps) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_mul_ps) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_mul_ps) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-1.c
new file mode 100644
index 00000000000..f444f4aa39e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmulsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmulsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vmulsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmulsd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmulsd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_mul_sd (x1, m, x1, x2);
+ x1 = _mm_maskz_mul_sd (m, x1, x2);
+ x1 = _mm_mul_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm_mask_mul_round_sd (x1, m, x1, x2, _MM_FROUND_TO_NEG_INF);
+ x1 = _mm_maskz_mul_round_sd (m, x1, x2, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-2.c
new file mode 100644
index 00000000000..f47f62c5596
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulsd-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vmulsd (double *s1, double *s2, double *r)
+{
+ r[0] = s1[0] * s2[0];
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ union128d s1, s2, res1, res2;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[2];
+
+ s1.x = _mm_set_pd (-3.0, 111.111);
+ s2.x = _mm_set_pd (222.222, -4.5);
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_mul_sd (res1.x, mask, s1.x, s2.x);
+ res2.x = _mm_maskz_mul_sd (mask, s1.x, s2.x);
+
+ compute_vmulsd (s1.a, s2.a, res_ref);
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+ if (check_fp_union128d (res1, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+ if (check_fp_union128d (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulss-1.c
new file mode 100644
index 00000000000..ca2953aec9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulss-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmulss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmulss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vmulss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmulss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmulss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_mul_ss (x1, m, x1, x2);
+ x1 = _mm_maskz_mul_ss (m, x1, x2);
+ x1 = _mm_mul_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm_mask_mul_round_ss (x1, m, x1, x2, _MM_FROUND_TO_NEG_INF);
+ x1 = _mm_maskz_mul_round_ss (m, x1, x2, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmulss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmulss-2.c
new file mode 100644
index 00000000000..d0520cbeedb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmulss-2.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vmulss (float *s1, float *s2, float *r)
+{
+ r[0] = s1[0] * s2[0];
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ union128 s1, s2, res1, res2;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref[4];
+
+ s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46);
+ s2.x = _mm_set_ps (222.22, -333.33, 444.44, -4.56);
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_mul_ss (res1.x, mask, s1.x, s2.x);
+ res2.x = _mm_maskz_mul_ss (mask, s1.x, s2.x);
+
+ compute_vmulss (s1.a, s2.a, res_ref);
+
+ MASK_MERGE () (res_ref, mask, 1);
+ if (check_fp_union128 (res1, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, 1);
+ if (check_fp_union128 (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpabsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpabsd-2.c
new file mode 100644
index 00000000000..b01268f96b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpabsd-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *i1, int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ if (i1[i] < 0)
+ r[i] = -i1[i];
+ else
+ r[i] = i1[i];
+}
+
+static void
+TEST (void)
+{
+ int ck[SIZE];
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_d) s, d, dm, dz;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = i * 7 + (i << 15) + 356;
+ d.a[i] = DEFAULT_VALUE;
+ dm.a[i] = DEFAULT_VALUE;
+ dz.a[i] = DEFAULT_VALUE;
+ }
+
+ CALC (s.a, ck);
+
+ d.x = INTRINSIC (_abs_epi32) (s.x);
+ dz.x = INTRINSIC (_maskz_abs_epi32) (mask, s.x);
+ dm.x = INTRINSIC (_mask_abs_epi32) (dm.x, mask, s.x);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (d, ck))
+ abort ();
+
+ MASK_MERGE (i_d) (ck, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (dm, ck))
+ abort ();
+
+ MASK_ZERO (i_d) (ck, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (dz, ck))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpabsd512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpabsd512-1.c
new file mode 100644
index 00000000000..67b1def173a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpabsd512-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpabsd\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpabsd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_abs_epi32 (x);
+ x = _mm512_maskz_abs_epi32 (7, x);
+ x = _mm512_mask_abs_epi32 (x, 6, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpabsq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpabsq-2.c
new file mode 100644
index 00000000000..1276b6c2dcc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpabsq-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (long long *i1, long long *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ if (i1[i] < 0)
+ r[i] = -i1[i];
+ else
+ r[i] = i1[i];
+}
+
+static void
+TEST (void)
+{
+ long long ck[SIZE];
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_q) s, d, dm, dz;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = i * 7 + (i << 15) + 356;
+ d.a[i] = DEFAULT_VALUE;
+ dm.a[i] = DEFAULT_VALUE;
+ dz.a[i] = DEFAULT_VALUE;
+ }
+
+ CALC (s.a, ck);
+
+ d.x = INTRINSIC (_abs_epi64) (s.x);
+ dz.x = INTRINSIC (_maskz_abs_epi64) (mask, s.x);
+ dm.x = INTRINSIC (_mask_abs_epi64) (dm.x, mask, s.x);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (d, ck))
+ abort ();
+
+ MASK_MERGE (i_q) (ck, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (dm, ck))
+ abort ();
+
+ MASK_ZERO (i_q) (ck, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (dz, ck))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpabsq512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpabsq512-1.c
new file mode 100644
index 00000000000..fee48b1b740
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpabsq512-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpabsq\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpabsq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_abs_epi64 (x);
+ x = _mm512_maskz_abs_epi64 (2, x);
+ x = _mm512_mask_abs_epi64 (x, 3, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpaddd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpaddd-1.c
new file mode 100644
index 00000000000..f4bf7eb3f35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpaddd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpaddd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpaddd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_add_epi32 (x, x);
+ x = _mm512_mask_add_epi32 (x, m, x, x);
+ x = _mm512_maskz_add_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpaddd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpaddd-2.c
new file mode 100644
index 00000000000..8a1bc4dda8c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpaddd-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, int *s1, int *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] + s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_add_epi32) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_add_epi32) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_add_epi32) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpaddq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpaddq-1.c
new file mode 100644
index 00000000000..6f8223e1140
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpaddq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpaddq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpaddq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_add_epi64 (x, x);
+ x = _mm512_mask_add_epi64 (x, m, x, x);
+ x = _mm512_maskz_add_epi64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpaddq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpaddq-2.c
new file mode 100644
index 00000000000..4f0b9be00ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpaddq-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, long long *s1, long long *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] + s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_add_epi64) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_add_epi64) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_add_epi64) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpandd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpandd-1.c
new file mode 100644
index 00000000000..fbf8a49a376
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpandd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpandd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vpandd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpandd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_and_si512 (x, x);
+ x = _mm512_and_epi32 (x, x);
+ x = _mm512_mask_and_epi32 (x, m, x, x);
+ x = _mm512_maskz_and_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpandd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpandd-2.c
new file mode 100644
index 00000000000..9ec5023942f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpandd-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *s1, int *s2, int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = s1[i] & s2[i];
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, s2, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * sign;
+ s2.a[i] = (i + 20) * sign;
+ sign = -sign;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_and_si512) (s1.x, s2.x);
+ res2.x = INTRINSIC (_and_epi32) (s1.x, s2.x);
+ res3.x = INTRINSIC (_mask_and_epi32) (res3.x, mask, s1.x, s2.x);
+ res4.x = INTRINSIC (_maskz_and_epi32) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res4, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpandnd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpandnd-1.c
new file mode 100644
index 00000000000..8f48d601c30
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpandnd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpandnd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vpandnd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpandnd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_andnot_si512 (x, x);
+ x = _mm512_andnot_epi32 (x, x);
+ x = _mm512_mask_andnot_epi32 (x, m, x, x);
+ x = _mm512_maskz_andnot_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpandnd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpandnd-2.c
new file mode 100644
index 00000000000..84c99da56c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpandnd-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *s1, int *s2, int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = (~s1[i]) & s2[i];
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, s2, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * sign;
+ s2.a[i] = (i + 20) * sign;
+ sign = -sign;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_andnot_si512) (s1.x, s2.x);
+ res2.x = INTRINSIC (_andnot_epi32) (s1.x, s2.x);
+ res3.x = INTRINSIC (_mask_andnot_epi32) (res3.x, mask, s1.x, s2.x);
+ res4.x = INTRINSIC (_maskz_andnot_epi32) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res4, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpandnq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpandnq-1.c
new file mode 100644
index 00000000000..348fb159656
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpandnq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpandnq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpandnq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpandnq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_andnot_epi64 (x, x);
+ x = _mm512_mask_andnot_epi64 (x,m, x, x);
+ x = _mm512_maskz_andnot_epi64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpandnq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpandnq-2.c
new file mode 100644
index 00000000000..e862ff6a922
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpandnq-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (long long *s1, long long *s2, long long *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = (~s1[i]) & s2[i];
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * sign;
+ s2.a[i] = (i + 20) * sign;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_andnot_epi64) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_andnot_epi64) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_andnot_epi64) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpandq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpandq-1.c
new file mode 100644
index 00000000000..343ff59f579
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpandq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpandq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpandq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpandq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_and_epi64 (x, x);
+ x = _mm512_mask_and_epi64 (x,m, x, x);
+ x = _mm512_maskz_and_epi64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpandq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpandq-2.c
new file mode 100644
index 00000000000..2d51edf56a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpandq-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (long long *s1, long long *s2, long long *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = s1[i] & s2[i];
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * sign;
+ s2.a[i] = (i + 20) * sign;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_and_epi64) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_and_epi64) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_and_epi64) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpblendmd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpblendmd-1.c
new file mode 100644
index 00000000000..3a0aa2429a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpblendmd-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "(vpblendmd|vmovdqa32)\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask_blend_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpblendmd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpblendmd-2.c
new file mode 100644
index 00000000000..8764c4d1d83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpblendmd-2.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, int *s1, int *s2, MASK_TYPE mask)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (mask & (1LL << i)) ? s2[i] : s1[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 15 + 3467 * i * sign;
+ src2.a[i] = -2217 * i * sign;
+ sign = sign * -1;
+ }
+
+ res1.x = INTRINSIC (_mask_blend_epi32) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a, mask);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpblendmq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpblendmq-1.c
new file mode 100644
index 00000000000..38581beaf6a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpblendmq-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "(vpblendmq|vmovdqa64)\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask_blend_epi64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpblendmq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpblendmq-2.c
new file mode 100644
index 00000000000..d12504ff2b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpblendmq-2.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, long long *s1, long long *s2, MASK_TYPE mask)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (mask & (1LL << i)) ? s2[i] : s1[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 15 + 3467 * i * sign;
+ src2.a[i] = -2217 * i * sign;
+ sign = sign * -1;
+ }
+
+ res1.x = INTRINSIC (_mask_blend_epi64) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a, mask);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastd-1.c
new file mode 100644
index 00000000000..668db6b81a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastd-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+%e\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+%e\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+%e\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m128i y;
+volatile int z;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_broadcastd_epi32 (y);
+ x = _mm512_mask_broadcastd_epi32 (x, m, y);
+ x = _mm512_maskz_broadcastd_epi32 (m, y);
+
+ x = _mm512_set1_epi32 (z);
+ x = _mm512_mask_set1_epi32 (x, m, z);
+ x = _mm512_maskz_set1_epi32 (m, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastd-2.c
new file mode 100644
index 00000000000..efdf5891ab5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastd-2.c
@@ -0,0 +1,70 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, int *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[0];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
+ UNION_TYPE (128, i_d) src;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 4; i++)
+ {
+ src.a[i] = 1 + 34 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_broadcastd_epi32) (src.x);
+ res2.x = INTRINSIC (_mask_broadcastd_epi32) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcastd_epi32) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+
+ res1.x = INTRINSIC (_set1_epi32) (src.a[0]);
+ res2.x = INTRINSIC (_mask_set1_epi32) (res2.x, mask, src.a[0]);
+ res3.x = INTRINSIC (_maskz_set1_epi32) (mask, src.a[0]);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastq-1.c
new file mode 100644
index 00000000000..7c4698a34dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastq-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+%r\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 { target { ! { ia32 } } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+%r\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 { target { ! { ia32 } } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+%r\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 { target { ! { ia32 } } } } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m128i y;
+volatile long long z;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_broadcastq_epi64 (y);
+ x = _mm512_mask_broadcastq_epi64 (x, m, y);
+ x = _mm512_maskz_broadcastq_epi64 (m, y);
+
+ x = _mm512_set1_epi64 (z);
+ x = _mm512_mask_set1_epi64 (x, m, z);
+ x = _mm512_maskz_set1_epi64 (m, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastq-2.c
new file mode 100644
index 00000000000..7f47b846a1b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastq-2.c
@@ -0,0 +1,70 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, long long *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[0];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ UNION_TYPE (128, i_q) src;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 2; i++)
+ {
+ src.a[i] = 1 + 34 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_broadcastq_epi64) (src.x);
+ res2.x = INTRINSIC (_mask_broadcastq_epi64) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcastq_epi64) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+
+ res1.x = INTRINSIC (_set1_epi64) (src.a[0]);
+ res2.x = INTRINSIC (_mask_set1_epi64) (res2.x, mask, src.a[0]);
+ res3.x = INTRINSIC (_maskz_set1_epi64) (mask, src.a[0]);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-1.c
new file mode 100644
index 00000000000..7e835db1e5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler "vpcmpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ m = _mm512_cmp_epi32_mask (x, x, _MM_CMPINT_GE);
+ m = _mm512_mask_cmp_epi32_mask (m, x, x, _MM_CMPINT_NLE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c
new file mode 100644
index 00000000000..5c7efdc25aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+#if AVX512F_LEN == 512
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 16; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm512_loadu_si512 (s1); \
+ source2.x = _mm512_loadu_si512 (s2); \
+ dst1 = _mm512_cmp_epi32_mask (source1.x, source2.x, imm);\
+ dst2 = _mm512_mask_cmp_epi32_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+static void
+TEST ()
+{
+ UNION_TYPE (AVX512F_LEN, i_d) source1, source2;
+ MASK_TYPE dst1, dst2, dst_ref;
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+ int s1[16] = {2134, 6678, 453, 54646,
+ 231, 5674, 111, 23241,
+ 12314, 145, 671, 77575,
+ 23455, 166, 5321, 5673};
+ int s2[16] = {41124, 6678, 8653, 856,
+ 231, 4646, 111, 124,
+ 2745, 4567, 3676, 123,
+ 714, 3589, 5683, 5673};
+
+ CMP(0x00, s1[i] == s2[i]);
+ CMP(0x01, s1[i] < s2[i]);
+ CMP(0x02, s1[i] <= s2[i]);
+ CMP(0x03, 0);
+ CMP(0x04, s1[i] != s2[i]);
+ CMP(0x05, s1[i] >= s2[i]);
+ CMP(0x06, s1[i] > s2[i]);
+ CMP(0x07, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqd-1.c
new file mode 100644
index 00000000000..834fae79a43
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqd-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[0-9\]\{" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ m = _mm512_cmpeq_epi32_mask (x, x);
+ m = _mm512_mask_cmpeq_epi32_mask (3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqd-2.c
new file mode 100644
index 00000000000..2c69427c6ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqd-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *r, int *s1, int *s2)
+{
+ int i;
+ *r = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (s1[i] == s2[i])
+ *r = *r | (one << i);
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_d) src1, src2;
+ MASK_TYPE res_ref, res1, res2;
+ MASK_TYPE mask = MASK_VALUE;
+ res1 = 0;
+ res2 = 0;
+
+ for (i = 0; i < SIZE / 2; i++)
+ {
+ src1.a[i * 2] = i;
+ src1.a[i * 2 + 1] = i * i;
+ src2.a[i * 2] = 2 * i;
+ src2.a[i * 2 + 1] = i * i;
+ }
+
+ res1 = INTRINSIC (_cmpeq_epi32_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_cmpeq_epi32_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res_ref != res1)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res_ref != res2)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqq-1.c
new file mode 100644
index 00000000000..8689fe3a0cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqq-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpcmpeqq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpcmpeqq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[0-9\]\{" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ m = _mm512_cmpeq_epi64_mask (x, x);
+ m = _mm512_mask_cmpeq_epi64_mask (3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqq-2.c
new file mode 100644
index 00000000000..1cefd0bfaef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpeqq-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *r, long long *s1, long long *s2)
+{
+ int i;
+ *r = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (s1[i] == s2[i])
+ *r = *r | (one << i);
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_q) src1, src2;
+ MASK_TYPE res1, res2, res_ref;
+ MASK_TYPE mask = MASK_VALUE;
+ res1 = 0;
+ res2 = 0;
+
+ for (i = 0; i < SIZE / 2; i++)
+ {
+ src1.a[i * 2] = i;
+ src1.a[i * 2 + 1] = i * i;
+ src2.a[i * 2] = 2 * i;
+ src2.a[i * 2 + 1] = i * i;
+ }
+
+ res1 = INTRINSIC (_cmpeq_epi64_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_cmpeq_epi64_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res1 != res_ref)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res2 != res_ref)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtd-1.c
new file mode 100644
index 00000000000..1be0f8d263b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtd-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpcmpgtd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpcmpgtd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[0-9\]\{" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ m = _mm512_cmpgt_epi32_mask (x, x);
+ m = _mm512_mask_cmpgt_epi32_mask (3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtd-2.c
new file mode 100644
index 00000000000..59ddef87c3a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtd-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *r, int *s1, int *s2)
+{
+ int i;
+ *r = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (s1[i] > s2[i])
+ *r = *r | (one << i);
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_d) src1, src2;
+ MASK_TYPE res_ref, res1, res2;
+ MASK_TYPE mask = MASK_VALUE;
+ res1 = 0;
+ res2 = 0;
+
+ for (i = 0; i < SIZE / 2; i++)
+ {
+ src1.a[i * 2] = i;
+ src1.a[i * 2 + 1] = i * i;
+ src2.a[i * 2] = 2 * i;
+ src2.a[i * 2 + 1] = i * i;
+ }
+
+ res1 = INTRINSIC (_cmpgt_epi32_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_cmpgt_epi32_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res_ref != res1)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res_ref != res2)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtq-1.c
new file mode 100644
index 00000000000..b94be287ebc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtq-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpcmpgtq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpcmpgtq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[0-9\]\{" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ m = _mm512_cmpgt_epi64_mask (x, x);
+ m = _mm512_mask_cmpgt_epi64_mask (3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtq-2.c
new file mode 100644
index 00000000000..940c56a8a87
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpgtq-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *r, long long *s1, long long *s2)
+{
+ int i;
+ *r = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (s1[i] > s2[i])
+ *r = *r | (one << i);
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_q) src1, src2;
+ MASK_TYPE res1, res2, res_ref;
+ MASK_TYPE mask = MASK_VALUE;
+ res1 = 0;
+ res2 = 0;
+
+ for (i = 0; i < SIZE / 2; i++)
+ {
+ src1.a[i * 2] = i;
+ src1.a[i * 2 + 1] = i * i;
+ src2.a[i * 2] = 2 * i;
+ src2.a[i * 2 + 1] = i * i;
+ }
+
+ res1 = INTRINSIC (_cmpgt_epi64_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_cmpgt_epi64_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res1 != res_ref)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res2 != res_ref)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-1.c
new file mode 100644
index 00000000000..800140d0325
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler "vpcmpq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vpcmpq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ m = _mm512_cmp_epi64_mask (x, x, _MM_CMPINT_NE);
+ m = _mm512_mask_cmp_epi64_mask (m, x, x, _MM_CMPINT_NLT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c
new file mode 100644
index 00000000000..39bf70b9f04
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+__mmask8 dst_ref;
+
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 8; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm512_loadu_si512 (s1); \
+ source2.x = _mm512_loadu_si512 (s2); \
+ dst1 = _mm512_cmp_epi64_mask (source1.x, source2.x, imm);\
+ dst2 = _mm512_mask_cmp_epi64_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+
+static void
+TEST ()
+{
+ UNION_TYPE (AVX512F_LEN, i_d) source1, source2;
+ MASK_TYPE dst1, dst2, dst_ref;
+ MASK_TYPE mask = MASK_VALUE;
+ long long s1[8] = {2134, 6678, 453, 54646,
+ 231, 5674, 111, 23241};
+ long long s2[8] = {41124, 6678, 8653, 856,
+ 231, 4646, 111, 124};
+ int i;
+
+ CMP(0x00, s1[i] == s2[i]);
+ CMP(0x01, s1[i] < s2[i]);
+ CMP(0x02, s1[i] <= s2[i]);
+ CMP(0x03, 0);
+ CMP(0x04, s1[i] != s2[i]);
+ CMP(0x05, s1[i] >= s2[i]);
+ CMP(0x06, s1[i] > s2[i]);
+ CMP(0x07, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-1.c
new file mode 100644
index 00000000000..110c0904768
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler "vpcmpud\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpud\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ m = _mm512_cmp_epu32_mask (x, x, _MM_CMPINT_EQ);
+ m = _mm512_mask_cmp_epu32_mask (m, x, x, _MM_CMPINT_LT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c
new file mode 100644
index 00000000000..84853ee74c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+#if AVX512F_LEN == 512
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 16; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm512_loadu_si512 (s1); \
+ source2.x = _mm512_loadu_si512 (s2); \
+ dst1 = _mm512_cmp_epu32_mask (source1.x, source2.x, imm);\
+ dst2 = _mm512_mask_cmp_epu32_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+static void
+TEST ()
+{
+ unsigned int s1[16] = {2134, 6678, 453, 54646,
+ 231, 5674, 111, 23241,
+ 12314, 145, 671, 77575,
+ 23455, 166, 5321, 5673};
+ unsigned int s2[16] = {41124, 6678, 8653, 856,
+ 231, 4646, 111, 124,
+ 2745, 4567, 3676, 123,
+ 714, 3589, 5683, 5673};
+ UNION_TYPE (AVX512F_LEN, i_d) source1, source2;
+ MASK_TYPE dst1, dst2, dst_ref;
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+
+ CMP(0x00, s1[i] == s2[i]);
+ CMP(0x01, s1[i] < s2[i]);
+ CMP(0x02, s1[i] <= s2[i]);
+ CMP(0x03, 0);
+ CMP(0x04, s1[i] != s2[i]);
+ CMP(0x05, s1[i] >= s2[i]);
+ CMP(0x06, s1[i] > s2[i]);
+ CMP(0x07, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-1.c
new file mode 100644
index 00000000000..2f79f4dccbe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler "vpcmpuq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpuq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ m = _mm512_cmp_epu64_mask (x, x, _MM_CMPINT_LE);
+ m = _mm512_mask_cmp_epu64_mask (m, x, x, _MM_CMPINT_UNUSED);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c
new file mode 100644
index 00000000000..ec33f741696
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+#if AVX512F_LEN == 512
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 8; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm512_loadu_si512 (s1); \
+ source2.x = _mm512_loadu_si512 (s2); \
+ dst1 = _mm512_cmp_epu64_mask (source1.x, source2.x, imm);\
+ dst2 = _mm512_mask_cmp_epu64_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+static void
+TEST ()
+{
+ UNION_TYPE (AVX512F_LEN, i_q) source1, source2;
+ MASK_TYPE dst1, dst2, dst_ref;
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+ unsigned long long s1[8] = {2134, 6678, 453, 54646,
+ 231, 5674, 111, 23241};
+ unsigned long long s2[8] = {41124, 6678, 8653, 856,
+ 231, 4646, 111, 124};
+
+ CMP(0x00, s1[i] == s2[i]);
+ CMP(0x01, s1[i] < s2[i]);
+ CMP(0x02, s1[i] <= s2[i]);
+ CMP(0x03, 0);
+ CMP(0x04, s1[i] != s2[i]);
+ CMP(0x05, s1[i] >= s2[i]);
+ CMP(0x06, s1[i] > s2[i]);
+ CMP(0x07, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcompressd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressd-1.c
new file mode 100644
index 00000000000..162fa7aef07
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressd-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpcompressd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpcompressd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpcompressd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+int *p;
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask_compress_epi32 (x, m, x);
+ x = _mm512_maskz_compress_epi32 (m, x);
+
+ _mm512_mask_compressstoreu_epi32 (p, m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcompressd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressd-2.c
new file mode 100644
index 00000000000..4bb589c85ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressd-2.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#define MASK ((1 << SIZE) - 1)
+#include <x86intrin.h>
+
+static void
+CALC (int *s, int *r, MASK_TYPE mask)
+{
+ int i, k;
+
+ for (i = 0, k = 0; i < SIZE; i++)
+ {
+ if (mask & (1 << i))
+ r[k++] = s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s, res1, res2;
+ int res3[SIZE];
+ MASK_TYPE compressed_mask, mask = MASK_VALUE;
+ int res_ref[SIZE];
+ int i, mask_bit_count, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 12345 * (i + 200) * sign;
+ res1.a[i] = DEFAULT_VALUE;
+ res3[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_mask_compress_epi32) (res1.x, mask, s.x);
+ res2.x = INTRINSIC (_maskz_compress_epi32) (mask, s.x);
+ INTRINSIC (_mask_compressstoreu_epi32) (res3, mask, s.x);
+
+ mask_bit_count = __popcntd (mask & MASK);
+ compressed_mask = (1 << mask_bit_count) - 1;
+ CALC (s.a, res_ref, mask);
+
+ MASK_MERGE (i_d) (res_ref, compressed_mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, compressed_mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, compressed_mask, SIZE);
+ if (checkVi (res3, res_ref, SIZE))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcompressq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressq-1.c
new file mode 100644
index 00000000000..3a07ee89bd8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressq-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpcompressq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpcompressq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpcompressq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+long long *p;
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask_compress_epi64 (x, m, x);
+ x = _mm512_maskz_compress_epi64 (m, x);
+
+ _mm512_mask_compressstoreu_epi64 (p, m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcompressq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressq-2.c
new file mode 100644
index 00000000000..02527428dcf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressq-2.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#define MASK ((1 << SIZE) - 1)
+#include <x86intrin.h>
+
+static void
+CALC (long long *s, long long *r, MASK_TYPE mask)
+{
+ int i, k;
+
+ for (i = 0, k = 0; i < SIZE; i++)
+ {
+ if (mask & (1 << i))
+ r[k++] = s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s, res1, res2;
+ long long res3[SIZE];
+ MASK_TYPE compressed_mask, mask = MASK_VALUE;
+ long long res_ref[SIZE];
+ int i, mask_bit_count, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 12345 * (i + 200) * sign;
+ res1.a[i] = DEFAULT_VALUE;
+ res3[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_mask_compress_epi64) (res1.x, mask, s.x);
+ res2.x = INTRINSIC (_maskz_compress_epi64) (mask, s.x);
+ INTRINSIC (_mask_compressstoreu_epi64) (res3, mask, s.x);
+
+ mask_bit_count = __popcntd (mask & MASK);
+ compressed_mask = (1 << mask_bit_count) - 1;
+ CALC (s.a, res_ref, mask);
+
+ MASK_MERGE (i_q) (res_ref, compressed_mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, compressed_mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, compressed_mask, SIZE);
+ if (checkVl (res3, res_ref, SIZE))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermd-1.c
new file mode 100644
index 00000000000..4b5f8d91a17
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_permutexvar_epi32 (x, x);
+ x = _mm512_maskz_permutexvar_epi32 (m, x, x);
+ x = _mm512_mask_permutexvar_epi32 (x, m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c
new file mode 100644
index 00000000000..aa4900af8f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *src1, int *mask, int *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ dst[i] = src1[mask[i] & 15];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = (i + 10) * (i + 10) * sign;
+ src2.a[i] = (i + 30);
+ sign = -sign;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_permutexvar_epi32) (src1.x, src2.x);
+ res2.x = INTRINSIC (_maskz_permutexvar_epi32) (mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_mask_permutexvar_epi32) (res3.x, mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermi2d-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2d-1.c
new file mode 100644
index 00000000000..0436dfd709b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2d-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermi2d\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask2_permutex2var_epi32 (x, x, m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermi2d-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2d-2.c
new file mode 100644
index 00000000000..a7ac2bfbc62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2d-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "math.h"
+#include "values.h"
+
+static void
+CALC (int *dst, int *src1, int *ind, int *src2)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ unsigned long long offset = ind[i] & (SIZE - 1);
+ unsigned long long cond = ind[i] & SIZE;
+
+ dst[i] = cond ? src2[offset] : src1[offset];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_d) s1, s2, res, ind;
+ int res_ref[SIZE];
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ ind.a[i] = DEFAULT_VALUE;
+ s1.a[i] = 34 * i + 1;
+ s2.a[i] = 34 * i;
+
+ res.a[i] = DEFAULT_VALUE;
+ }
+
+ CALC (res_ref, s1.a, ind.a, s2.a);
+
+ res.x =
+ INTRINSIC (_mask2_permutex2var_epi32) (s1.x, ind.x, mask, s2.x);
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermi2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2pd-1.c
new file mode 100644
index 00000000000..e2b74cc9910
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2pd-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermi2pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __m512i y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask2_permutex2var_pd (x, y, m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermi2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2pd-2.c
new file mode 100644
index 00000000000..a192a499a60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2pd-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include "math.h"
+#include "values.h"
+
+static void
+CALC (double *dst, double *src1, long long *ind, double *src2)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ unsigned long long offset = ind[i] & (SIZE - 1);
+ unsigned long long cond = ind[i] & SIZE;
+
+ dst[i] = cond ? src2[offset] : src1[offset];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, k;
+ UNION_TYPE (AVX512F_LEN, d) s1, s2, res;
+ UNION_TYPE (AVX512F_LEN, i_q) ind;
+ double res_ref[SIZE];
+
+ union
+ {
+ double f;
+ long long i;
+ } ind_copy[SIZE];
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ /* Some of the integer indexes may be interpreted as floating point
+ values in mask-merge mode, that's why we use IND_COPY. */
+ ind.a[i] = ind_copy[i].i = 17 * (i << 1);
+ s1.a[i] = 42.5 * i + 1;
+ s2.a[i] = 22.5 * i;
+
+ res.a[i] = DEFAULT_VALUE;
+ }
+
+ CALC (res_ref, s1.a, ind.a, s2.a);
+
+ res.x = INTRINSIC (_mask2_permutex2var_pd) (s1.x, ind.x, mask, s2.x);
+
+ /* Standard MASK_MERGE cannot be used since VPERMI2PD in mask-merge mode
+ merges vectors of two different types (_m512d and __m512i). */
+ for (k = 0; k < SIZE; k++)
+ res_ref[k] = (mask & (1LL << k)) ? res_ref[k] : ind_copy[k].f;
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermi2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2ps-1.c
new file mode 100644
index 00000000000..fc103a90e18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2ps-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermi2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __m512i y;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask2_permutex2var_ps (x, y, m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermi2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2ps-2.c
new file mode 100644
index 00000000000..5933fc9c890
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2ps-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "math.h"
+#include "values.h"
+
+static void
+CALC (float *dst, float *src1, int *ind, float *src2)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ unsigned long long offset = ind[i] & (SIZE - 1);
+ unsigned long long cond = ind[i] & SIZE;
+
+ dst[i] = cond ? src2[offset] : src1[offset];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, k;
+ UNION_TYPE (AVX512F_LEN,) s1, s2, res;
+ UNION_TYPE (AVX512F_LEN, i_d) ind;
+ float res_ref[SIZE];
+
+ union
+ {
+ float f;
+ int i;
+ } ind_copy[SIZE];
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ /* Some of the integer indexes may be interpreted as floating point
+ values in mask-merge mode, that's why we use IND_COPY. */
+ ind.a[i] = ind_copy[i].i = 17 * (i << 1);
+ s1.a[i] = 42.5 * i + 1;
+ s2.a[i] = 22.5 * i;
+
+ res.a[i] = DEFAULT_VALUE;
+ }
+
+ CALC (res_ref, s1.a, ind.a, s2.a);
+
+ res.x = INTRINSIC (_mask2_permutex2var_ps) (s1.x, ind.x, mask, s2.x);
+
+ /* Standard MASK_MERGE cannot be used since VPERMI2PS in mask-merge mode
+ merges vectors of two different types (_m512 and __m512i). */
+ for (k = 0; k < SIZE; k++)
+ res_ref[k] = (mask & (1LL << k)) ? res_ref[k] : ind_copy[k].f;
+
+ if (UNION_CHECK (AVX512F_LEN,) (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermi2q-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2q-1.c
new file mode 100644
index 00000000000..7d780b2a3b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2q-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermi2q\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask2_permutex2var_epi64 (x, x, m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermi2q-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2q-2.c
new file mode 100644
index 00000000000..ea93f9e0554
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermi2q-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include "math.h"
+#include "values.h"
+
+static void
+CALC (long long *dst, long long *src1, long long *ind, long long *src2)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ unsigned long long offset = ind[i] & (SIZE - 1);
+ unsigned long long cond = ind[i] & SIZE;
+
+ dst[i] = cond ? src2[offset] : src1[offset];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_q) s1, s2, res, ind;
+ long long res_ref[SIZE];
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ ind.a[i] = DEFAULT_VALUE;
+ s1.a[i] = 34 * i + 1;
+ s2.a[i] = 34 * i;
+
+ res.a[i] = DEFAULT_VALUE;
+ }
+
+ CALC (res_ref, s1.a, ind.a, s2.a);
+
+ res.x =
+ INTRINSIC (_mask2_permutex2var_epi64) (s1.x, ind.x, mask, s2.x);
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermilpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermilpd-1.c
new file mode 100644
index 00000000000..061a6253591
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermilpd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __m512i c;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_permutevar_pd (x, c);
+ x = _mm512_mask_permutevar_pd (x, m, x, c);
+ x = _mm512_maskz_permutevar_pd (m, x, c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermilpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermilpd-2.c
new file mode 100644
index 00000000000..e7cce39b3f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermilpd-2.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+#ifndef CTRL
+#define CTRL 6
+#endif
+
+#undef mask_v
+#define mask_v(pos) (((CTRL & (1ULL << (pos))) >> (pos)) << 1)
+
+static void
+CALC (double *s1, long long *s2, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = s1[(2 * (i / 2)) + ((s2[i] & 0x02) >> 1)];
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1, res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_q) s2;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i + 10.;
+ s2.a[i] = mask_v (i);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_permutevar_pd) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_permutevar_pd) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_permutevar_pd) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermilpdi-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermilpdi-1.c
new file mode 100644
index 00000000000..8b5ffd023af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermilpdi-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*13\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*13\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*13\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_permute_pd (x, 13);
+ x = _mm512_mask_permute_pd (x, m, x, 13);
+ x = _mm512_maskz_permute_pd (m, x, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermilpdi-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermilpdi-2.c
new file mode 100644
index 00000000000..8913a024a17
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermilpdi-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+#ifndef CTRL
+#define CTRL 129
+#endif
+
+static void
+CALC (double *s1, int s2, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (s2 & (1 << i)) ? s1[1 + 2 * (i / 2)] : s1[2 * (i / 2)];
+ }
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i + 10.;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_permute_pd) (s1.x, CTRL);
+ res2.x = INTRINSIC (_mask_permute_pd) (res2.x, mask, s1.x, CTRL);
+ res3.x = INTRINSIC (_maskz_permute_pd) (mask, s1.x, CTRL);
+
+ CALC (s1.a, CTRL, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermilps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermilps-1.c
new file mode 100644
index 00000000000..b46182b247d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermilps-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __m512i c;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_permutevar_ps (x, c);
+ x = _mm512_mask_permutevar_ps (x, m, x, c);
+ x = _mm512_maskz_permutevar_ps (m, x, c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermilps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermilps-2.c
new file mode 100644
index 00000000000..5f8f24df369
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermilps-2.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+#ifndef CTRL
+#define CTRL 233
+#endif
+
+#undef mask_v
+#define mask_v(pos) ((CTRL & (0x3 << (pos))) >> (pos))
+
+static void
+CALC (float *s1, int *s2, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = s1[(4 * (i / 4)) + (s2[i] & 0x03)];
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) s1, res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_d) s2;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i + 10.;
+ s2.a[i] = mask_v (i);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_permutevar_ps) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_permutevar_ps) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_permutevar_ps) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermilpsi-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermilpsi-1.c
new file mode 100644
index 00000000000..f09213e03e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermilpsi-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*13\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*13\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*13\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_permute_ps (x, 13);
+ x = _mm512_mask_permute_ps (x, m, x, 13);
+ x = _mm512_maskz_permute_ps (m, x, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermilpsi-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermilpsi-2.c
new file mode 100644
index 00000000000..30866be889c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermilpsi-2.c
@@ -0,0 +1,80 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+#ifndef CTRL
+#define CTRL 129
+#endif
+
+#ifndef SELECT4_DEFINED
+#define SELECT4_DEFINED
+static int
+select4 (int i, unsigned ctrl)
+{
+ int res;
+ switch (i % 4)
+ {
+ case 0:
+ res = (CTRL & 0x03);
+ break;
+ case 1:
+ res = ((CTRL & 0x0c) >> 2);
+ break;
+ case 2:
+ res = ((CTRL & 0x30) >> 4);
+ break;
+ case 3:
+ res = ((CTRL & 0xc0) >> 6);
+ break;
+ }
+ return res;
+}
+#endif
+
+static void
+CALC (float *s, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[(4 * (i / 4)) + select4 (i, CTRL)];
+ }
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) s1, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i + 10.;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_permute_ps) (s1.x, CTRL);
+ res2.x = INTRINSIC (_mask_permute_ps) (res2.x, mask, s1.x, CTRL);
+ res3.x = INTRINSIC (_maskz_permute_ps) (mask, s1.x, CTRL);
+
+ CALC (s1.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermpd-1.c
new file mode 100644
index 00000000000..d2e8b9c971b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermpd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m512d y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm512_permutexvar_pd (x, y);
+ y = _mm512_mask_permutexvar_pd (y, m, x, y);
+ y = _mm512_maskz_permutexvar_pd (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermpd-2.c
new file mode 100644
index 00000000000..4716e672acc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermpd-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (double *s1, long long *mask, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[mask[i] & 7 % SIZE];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) src1, res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_q) src2;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * sign;
+ src2.a[i] = i + 20;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_permutexvar_pd) (src2.x, src1.x);
+ res2.x = INTRINSIC (_mask_permutexvar_pd) (res2.x, mask, src2.x, src1.x);
+ res3.x = INTRINSIC (_maskz_permutexvar_pd) (mask, src2.x, src1.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermpdi-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermpdi-1.c
new file mode 100644
index 00000000000..97fd92c840f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermpdi-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_permutex_pd (x, 13);
+ x = _mm512_mask_permutex_pd (x, m, x, 13);
+ x = _mm512_maskz_permutex_pd (m, x, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermpdi-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermpdi-2.c
new file mode 100644
index 00000000000..bce4dc7891f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermpdi-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+#define N 0x7c
+
+static void
+CALC (double *s1, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ int index = (N >> ((i % 4) * 2)) & 3;
+ int base = i / 4;
+ r[i] = s1[4 * base + index];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) src1, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * i * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_permutex_pd) (src1.x, N);
+ res2.x = INTRINSIC (_mask_permutex_pd) (res2.x, mask, src1.x, N);
+ res3.x = INTRINSIC (_maskz_permutex_pd) (mask, src1.x, N);
+
+ CALC (src1.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermps-1.c
new file mode 100644
index 00000000000..7b7367afad9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermps-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m512 y;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ y = _mm512_permutexvar_ps (x, y);
+ y = _mm512_mask_permutexvar_ps (y, m, x, y);
+ y = _mm512_maskz_permutexvar_ps (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermps-2.c
new file mode 100644
index 00000000000..57ddb3128e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermps-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (float *s1, int *mask, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[mask[i] & 15 % SIZE];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) src1, res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_d) src2;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * sign;
+ src2.a[i] = i + 20;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_permutexvar_ps) (src2.x, src1.x);
+ res2.x = INTRINSIC (_mask_permutexvar_ps) (res2.x, mask, src2.x, src1.x);
+ res3.x = INTRINSIC (_maskz_permutexvar_ps) (mask, src2.x, src1.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-1.c
new file mode 100644
index 00000000000..ef0271b612a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_permutex_epi64 (x, 13);
+ x = _mm512_mask_permutex_epi64 (x, m, x, 13);
+ x = _mm512_maskz_permutex_epi64 (m, x, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c
new file mode 100644
index 00000000000..5701ccdc9ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+#define IMM_MASK 0x7c
+
+static void
+CALC (long long *src1, int mask, long long *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ int index = ((mask >> (2 * (i % 4))) & 3);
+ int base = i / 4;
+ dst[i] = src1[4 * base + index];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = (i + 10) * (i + 10) * sign;
+ sign = -sign;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_permutex_epi64) (src1.x, IMM_MASK);
+ res2.x = INTRINSIC (_maskz_permutex_epi64) (mask, src1.x, IMM_MASK);
+ res3.x = INTRINSIC (_mask_permutex_epi64) (res3.x, mask, src1.x, IMM_MASK);
+
+ CALC (src1.a, IMM_MASK, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-1.c
new file mode 100644
index 00000000000..62b28c33d57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_permutexvar_epi64 (x, x);
+ x = _mm512_maskz_permutexvar_epi64 (m, x, x);
+ x = _mm512_mask_permutexvar_epi64 (x, m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c
new file mode 100644
index 00000000000..15d2f2155cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (long long *src1, long long *mask, long long *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ dst[i] = src1[mask[i] & 7];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = (i + 10) * (i + 10) * sign;
+ src2.a[i] = 2 * i + 10;
+ sign = -sign;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_permutexvar_epi64) (src1.x, src2.x);
+ res2.x = INTRINSIC (_maskz_permutexvar_epi64) (mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_mask_permutexvar_epi64) (res3.x, mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-1.c
new file mode 100644
index 00000000000..892b5710dad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_permutex2var_epi32 (x, x, x);
+ x = _mm512_mask_permutex2var_epi32 (x, m, x, x);
+ x = _mm512_maskz_permutex2var_epi32 (m, x, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-2.c
new file mode 100644
index 00000000000..4459431062c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "math.h"
+#include "values.h"
+
+static void
+CALC (int *dst, int *src1, int *ind, int *src2)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ unsigned long long offset = ind[i] & (SIZE - 1);
+ unsigned long long cond = ind[i] & SIZE;
+
+ dst[i] = cond ? src2[offset] : src1[offset];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_d) s1, s2, res1, res2, res3, ind;
+ int res_ref[SIZE];
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ ind.a[i] = 17 * (i << 1);
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 34 * i;
+
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ CALC (res_ref, s1.a, ind.a, s2.a);
+
+ res1.x = INTRINSIC (_permutex2var_epi32) (s1.x, ind.x, s2.x);
+ res2.x =
+ INTRINSIC (_mask_permutex2var_epi32) (s1.x, mask, ind.x, s2.x);
+ res3.x =
+ INTRINSIC (_maskz_permutex2var_epi32) (mask, s1.x, ind.x, s2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-1.c
new file mode 100644
index 00000000000..01595233f9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __m512i y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_permutex2var_pd (x, y, x);
+ x = _mm512_mask_permutex2var_pd (x, m, y, x);
+ x = _mm512_maskz_permutex2var_pd (m, x, y, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-2.c
new file mode 100644
index 00000000000..c4e225906a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include "math.h"
+#include "values.h"
+
+static void
+CALC (double *dst, double *src1, long long *ind, double *src2)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ unsigned long long offset = ind[i] & (SIZE - 1);
+ unsigned long long cond = ind[i] & SIZE;
+
+ dst[i] = cond ? src2[offset] : src1[offset];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, d) s1, s2, res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_q) ind;
+ double res_ref[SIZE];
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ ind.a[i] = 17 * (i << 1);
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 22.5 * i;
+
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ CALC (res_ref, s1.a, ind.a, s2.a);
+
+ res1.x = INTRINSIC (_permutex2var_pd) (s1.x, ind.x, s2.x);
+ res2.x = INTRINSIC (_mask_permutex2var_pd) (s1.x, mask, ind.x, s2.x);
+ res3.x =
+ INTRINSIC (_maskz_permutex2var_pd) (mask, s1.x, ind.x, s2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-1.c
new file mode 100644
index 00000000000..f315055da84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __m512i y;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_permutex2var_ps (x, y, x);
+ x = _mm512_mask_permutex2var_ps (x, m, y, x);
+ x = _mm512_maskz_permutex2var_ps (m, x, y, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-2.c
new file mode 100644
index 00000000000..dfd493e866f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "math.h"
+#include "values.h"
+
+static void
+CALC (float *dst, float *src1, int *ind, float *src2)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ unsigned long long offset = ind[i] & (SIZE - 1);
+ unsigned long long cond = ind[i] & SIZE;
+
+ dst[i] = cond ? src2[offset] : src1[offset];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN,) s1, s2, res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_d) ind;
+ float res_ref[SIZE];
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ ind.a[i] = 17 * (i << 1);
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 22.5 * i;
+
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ CALC (res_ref, s1.a, ind.a, s2.a);
+
+ res1.x = INTRINSIC (_permutex2var_ps) (s1.x, ind.x, s2.x);
+ res2.x = INTRINSIC (_mask_permutex2var_ps) (s1.x, mask, ind.x, s2.x);
+ res3.x =
+ INTRINSIC (_maskz_permutex2var_ps) (mask, s1.x, ind.x, s2.x);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-1.c
new file mode 100644
index 00000000000..65f4afb0406
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_permutex2var_epi64 (x, x, x);
+ x = _mm512_mask_permutex2var_epi64 (x, m, x, x);
+ x = _mm512_maskz_permutex2var_epi64 (m, x, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-2.c
new file mode 100644
index 00000000000..1772996f40f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include "math.h"
+#include "values.h"
+
+static void
+CALC (long long *dst, long long *src1, long long *ind, long long *src2)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ unsigned long long offset = ind[i] & (SIZE - 1);
+ unsigned long long cond = ind[i] & SIZE;
+
+ dst[i] = cond ? src2[offset] : src1[offset];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_q) s1, s2, res1, res2, res3, ind;
+ long long res_ref[SIZE];
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ ind.a[i] = 17 * (i << 1);
+ s1.a[i] = DEFAULT_VALUE;
+ s2.a[i] = 34 * i;
+
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ CALC (res_ref, s1.a, ind.a, s2.a);
+
+ res1.x = INTRINSIC (_permutex2var_epi64) (s1.x, ind.x, s2.x);
+ res2.x =
+ INTRINSIC (_mask_permutex2var_epi64) (s1.x, mask, ind.x, s2.x);
+ res3.x =
+ INTRINSIC (_maskz_permutex2var_epi64) (mask, s1.x, ind.x, s2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpexpandd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandd-1.c
new file mode 100644
index 00000000000..c70a2abc9ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandd-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+
+#include <immintrin.h>
+
+int *p;
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask_expand_epi32 (x, m, x);
+ x = _mm512_maskz_expand_epi32 (m, x);
+
+ x = _mm512_mask_expandloadu_epi32 (x, m, p);
+ x = _mm512_maskz_expandloadu_epi32 (m, p);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpexpandd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandd-2.c
new file mode 100644
index 00000000000..91f0e7bc89f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandd-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *s, int *r, MASK_TYPE mask)
+{
+ int i, k;
+
+ for (i = 0, k = 0; i < SIZE; i++)
+ {
+ if (mask & (1 << i))
+ r[i] = s[k++];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ int s2[SIZE];
+ int res_ref1[SIZE];
+ int res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 12345 * (i + 200) * sign;
+ s2[i] = 67890 * (i + 300) * sign;
+ res1.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_mask_expand_epi32) (res1.x, mask, s1.x);
+ res2.x = INTRINSIC (_maskz_expand_epi32) (mask, s1.x);
+ res3.x = INTRINSIC (_mask_expandloadu_epi32) (res3.x, mask, s2);
+ res4.x = INTRINSIC (_maskz_expandloadu_epi32) (mask, s2);
+
+ CALC (s1.a, res_ref1, mask);
+ CALC (s2, res_ref2, mask);
+
+ MASK_MERGE (i_d) (res_ref1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref1))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref1))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref2, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res4, res_ref2))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpexpandq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandq-1.c
new file mode 100644
index 00000000000..fc477f209a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandq-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+
+#include <immintrin.h>
+
+long long *p;
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mask_expand_epi64 (x, m, x);
+ x = _mm512_maskz_expand_epi64 (m, x);
+
+ x = _mm512_mask_expandloadu_epi64 (x, m, p);
+ x = _mm512_maskz_expandloadu_epi64 (m, p);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpexpandq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandq-2.c
new file mode 100644
index 00000000000..8385d746386
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandq-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (long long *s, long long *r, MASK_TYPE mask)
+{
+ int i, k;
+
+ for (i = 0, k = 0; i < SIZE; i++)
+ {
+ if (mask & (1 << i))
+ r[i] = s[k++];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s1, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ long long s2[SIZE];
+ long long res_ref1[SIZE];
+ long long res_ref2[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 12345 * (i + 200) * sign;
+ s2[i] = 67890 * (i + 300) * sign;
+ res1.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_mask_expand_epi64) (res1.x, mask, s1.x);
+ res2.x = INTRINSIC (_maskz_expand_epi64) (mask, s1.x);
+ res3.x = INTRINSIC (_mask_expandloadu_epi64) (res3.x, mask, s2);
+ res4.x = INTRINSIC (_maskz_expandloadu_epi64) (mask, s2);
+
+ CALC (s1.a, res_ref1, mask);
+ CALC (s2, res_ref2, mask);
+
+ MASK_MERGE (i_q) (res_ref1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref1))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref1, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref1))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref2, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref2))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref2, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res4, res_ref2))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmaxsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxsd-1.c
new file mode 100644
index 00000000000..2c88e92bd85
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxsd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmaxsd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmaxsd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_max_epi32 (x, x);
+ x = _mm512_mask_max_epi32 (x, m, x, x);
+ x = _mm512_maskz_max_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmaxsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxsd-2.c
new file mode 100644
index 00000000000..7322e787c77
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxsd-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+
+CALC (int *src1, int *src2, int *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * sign;
+ src2.a[i] = (i + 2000) * sign;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_max_epi32) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_max_epi32) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_max_epi32) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmaxsq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxsq-1.c
new file mode 100644
index 00000000000..e15fa2ab3d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxsq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmaxsq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmaxsq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_max_epi64 (x, x);
+ x = _mm512_mask_max_epi64 (x, m, x, x);
+ x = _mm512_maskz_max_epi64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmaxsq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxsq-2.c
new file mode 100644
index 00000000000..b3c6c03d85c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxsq-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+
+CALC (long long *src1, long long *src2, long long *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * sign;
+ src2.a[i] = (i + 2000) * sign;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_max_epi64) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_max_epi64) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_max_epi64) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmaxud-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxud-1.c
new file mode 100644
index 00000000000..321992ac102
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxud-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmaxud\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmaxud\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxud\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_max_epu32 (x, x);
+ x = _mm512_mask_max_epu32 (x, m, x, x);
+ x = _mm512_maskz_max_epu32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmaxud-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxud-2.c
new file mode 100644
index 00000000000..2386f41d248
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxud-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+
+CALC (unsigned *src1, unsigned *src2,
+ unsigned *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_d) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i;
+ src2.a[i] = (i + 2000);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_max_epu32) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_max_epu32) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_max_epu32) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmaxuq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxuq-1.c
new file mode 100644
index 00000000000..2cf8b4cc458
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxuq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmaxuq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmaxuq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxuq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_max_epu64 (x, x);
+ x = _mm512_mask_max_epu64 (x, m, x, x);
+ x = _mm512_maskz_max_epu64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmaxuq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxuq-2.c
new file mode 100644
index 00000000000..59eefb7bca7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmaxuq-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+
+CALC (unsigned long long *src1, unsigned long long *src2,
+ unsigned long long *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_q) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned long long res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i;
+ src2.a[i] = (i + 2000);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_max_epu64) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_max_epu64) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_max_epu64) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpminsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpminsd-1.c
new file mode 100644
index 00000000000..2beffc6e2b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpminsd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpminsd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpminsd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_min_epi32 (x, x);
+ x = _mm512_mask_min_epi32 (x, m, x, x);
+ x = _mm512_maskz_min_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpminsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpminsd-2.c
new file mode 100644
index 00000000000..3c3d5e80cba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpminsd-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+
+CALC (int *src1, int *src2, int *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * sign;
+ src2.a[i] = (i + 2000) * sign;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_min_epi32) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_min_epi32) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_min_epi32) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpminsq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpminsq-1.c
new file mode 100644
index 00000000000..8270307fb24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpminsq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpminsq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpminsq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_min_epi64 (x, x);
+ x = _mm512_mask_min_epi64 (x, m, x, x);
+ x = _mm512_maskz_min_epi64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpminsq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpminsq-2.c
new file mode 100644
index 00000000000..e2f9c52bab7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpminsq-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+
+CALC (long long *src1, long long *src2, long long *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * sign;
+ src2.a[i] = (i + 2000) * sign;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_min_epi64) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_min_epi64) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_min_epi64) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpminud-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpminud-1.c
new file mode 100644
index 00000000000..6cbb9d631f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpminud-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpminud\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpminud\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminud\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_min_epu32 (x, x);
+ x = _mm512_mask_min_epu32 (x, m, x, x);
+ x = _mm512_maskz_min_epu32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpminud-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpminud-2.c
new file mode 100644
index 00000000000..3daa44d7d4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpminud-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+
+CALC (unsigned *src1, unsigned *src2,
+ unsigned *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_d) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * i;
+ src2.a[i] = i + 20;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_min_epu32) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_min_epu32) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_min_epu32) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpminuq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpminuq-1.c
new file mode 100644
index 00000000000..816c11bccaf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpminuq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpminuq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpminuq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminuq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_min_epu64 (x, x);
+ x = _mm512_mask_min_epu64 (x, m, x, x);
+ x = _mm512_maskz_min_epu64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpminuq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpminuq-2.c
new file mode 100644
index 00000000000..c1dfb9ea084
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpminuq-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+
+CALC (unsigned long long *src1, unsigned long long *src2,
+ unsigned long long *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_q) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned long long res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i;
+ src2.a[i] = (i + 2000);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_min_epu64) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_min_epu64) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_min_epu64) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovdb-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovdb-1.c
new file mode 100644
index 00000000000..c634d888d68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovdb-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m128i res;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepi32_epi8 (s);
+ res = _mm512_mask_cvtepi32_epi8 (res, m, s);
+ res = _mm512_maskz_cvtepi32_epi8 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovdb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovdb-2.c
new file mode 100644
index 00000000000..8a0c25f1829
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovdb-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (char *r, int *s)
+{
+ int i;
+ for (i = 0; i < 16; i++)
+ {
+ r[i] = (i < SIZE) ? (char) s[i] : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (128, i_b) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_d) src;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[16];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i * sign;
+ sign = sign * -1;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtepi32_epi8) (src.x);
+ res2.x = INTRINSIC (_mask_cvtepi32_epi8) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtepi32_epi8) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (128, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovdw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovdw-1.c
new file mode 100644
index 00000000000..bd66defe6c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovdw-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m256i res;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepi32_epi16 (s);
+ res = _mm512_mask_cvtepi32_epi16 (res, m, s);
+ res = _mm512_maskz_cvtepi32_epi16 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovdw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovdw-2.c
new file mode 100644
index 00000000000..6bc5f876599
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovdw-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#define SIZE_HALF (AVX512F_LEN_HALF / 16)
+
+CALC (short *r, int *s)
+{
+ int i;
+ for (i = 0; i < SIZE_HALF; i++)
+ {
+ r[i] = (i < SIZE) ? (short) s[i] : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN_HALF, i_w) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_d) src;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE_HALF];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i * sign;
+ sign = sign * -1;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtepi32_epi16) (src.x);
+ res2.x = INTRINSIC (_mask_cvtepi32_epi16) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtepi32_epi16) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovqb-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqb-1.c
new file mode 100644
index 00000000000..7fc5980c3a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqb-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m128i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepi64_epi8 (s);
+ res = _mm512_mask_cvtepi64_epi8 (res, m, s);
+ res = _mm512_maskz_cvtepi64_epi8 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovqb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqb-2.c
new file mode 100644
index 00000000000..f1f1e9096bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqb-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (char *r, long long *s)
+{
+ int i;
+ for (i = 0; i < 16; i++)
+ {
+ r[i] = (i < SIZE) ? (char) s[i] : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (128, i_b) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_q) src;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[16];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i * sign;
+ sign = sign * -1;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtepi64_epi8) (src.x);
+ res2.x = INTRINSIC (_mask_cvtepi64_epi8) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtepi64_epi8) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (128, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovqd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqd-1.c
new file mode 100644
index 00000000000..f0f80b10175
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m256i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepi64_epi32 (s);
+ res = _mm512_mask_cvtepi64_epi32 (res, m, s);
+ res = _mm512_maskz_cvtepi64_epi32 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovqd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqd-2.c
new file mode 100644
index 00000000000..cf1cb27a4bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqd-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#define SIZE_HALF (AVX512F_LEN_HALF / 32)
+
+CALC (int *r, long long *s)
+{
+ int i;
+ for (i = 0; i < SIZE_HALF; i++)
+ {
+ r[i] = (i < SIZE) ? (int) s[i] : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN_HALF, i_d) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_q) src;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE_HALF];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i * sign;
+ sign = sign * -1;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtepi64_epi32) (src.x);
+ res2.x = INTRINSIC (_mask_cvtepi64_epi32) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtepi64_epi32) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovqw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqw-1.c
new file mode 100644
index 00000000000..20011ee75a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqw-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m128i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepi64_epi16 (s);
+ res = _mm512_mask_cvtepi64_epi16 (res, m, s);
+ res = _mm512_maskz_cvtepi64_epi16 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovqw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqw-2.c
new file mode 100644
index 00000000000..07a3789c822
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqw-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (short *r, long long *s)
+{
+ int i;
+ for (i = 0; i < 8; i++)
+ {
+ r[i] = (i < SIZE) ? (short) s[i] : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (128, i_w) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_q) src;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[8];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i * sign;
+ sign = sign * -1;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtepi64_epi16) (src.x);
+ res2.x = INTRINSIC (_mask_cvtepi64_epi16) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtepi64_epi16) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (128, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdb-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdb-1.c
new file mode 100644
index 00000000000..c14dc04e004
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdb-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m128i res;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtsepi32_epi8 (s);
+ res = _mm512_mask_cvtsepi32_epi8 (res, m, s);
+ res = _mm512_maskz_cvtsepi32_epi8 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdb-2.c
new file mode 100644
index 00000000000..03a0e39b900
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdb-2.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include <limits.h>
+
+CALC (char *r, int *s)
+{
+ int i;
+ for (i = 0; i < 16; i++)
+ {
+ if (s[i] < CHAR_MIN)
+ r[i] = CHAR_MIN;
+ else if (s[i] > CHAR_MAX)
+ r[i] = CHAR_MAX;
+ else
+ r[i] = s[i];
+ r[i] = (i < SIZE) ? r[i] : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (128, i_b) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_d) src;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[16];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i * sign;
+ sign = sign * -1;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtsepi32_epi8) (src.x);
+ res2.x = INTRINSIC (_mask_cvtsepi32_epi8) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtsepi32_epi8) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (128, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdw-1.c
new file mode 100644
index 00000000000..4984626ae06
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdw-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m256i res;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtsepi32_epi16 (s);
+ res = _mm512_mask_cvtsepi32_epi16 (res, m, s);
+ res = _mm512_maskz_cvtsepi32_epi16 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdw-2.c
new file mode 100644
index 00000000000..3baac3d2de5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdw-2.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#define SIZE_HALF (AVX512F_LEN_HALF / 16)
+#include <limits.h>
+
+CALC (short *r, int *s)
+{
+ int i;
+ for (i = 0; i < SIZE_HALF; i++)
+ {
+ if (s[i] < SHRT_MIN)
+ r[i] = SHRT_MIN;
+ else if (s[i] > SHRT_MAX)
+ r[i] = SHRT_MAX;
+ else
+ r[i] = s[i];
+ r[i] = (i < SIZE) ? r[i] : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN_HALF, i_w) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_d) src;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE_HALF];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i * sign;
+ sign = sign * -1;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtsepi32_epi16) (src.x);
+ res2.x = INTRINSIC (_mask_cvtsepi32_epi16) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtsepi32_epi16) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqb-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqb-1.c
new file mode 100644
index 00000000000..bcd6992b902
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqb-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m128i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtsepi64_epi8 (s);
+ res = _mm512_mask_cvtsepi64_epi8 (res, m, s);
+ res = _mm512_maskz_cvtsepi64_epi8 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqb-2.c
new file mode 100644
index 00000000000..183476de857
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqb-2.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include <limits.h>
+
+CALC (char *r, long long *s)
+{
+ int i;
+ for (i = 0; i < 16; i++)
+ {
+ if (s[i] < CHAR_MIN)
+ r[i] = CHAR_MIN;
+ else if (s[i] > CHAR_MAX)
+ r[i] = CHAR_MAX;
+ else
+ r[i] = s[i];
+ r[i] = (i < SIZE) ? r[i] : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (128, i_b) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_q) src;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[16];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i * sign;
+ sign = sign * -1;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtsepi64_epi8) (src.x);
+ res2.x = INTRINSIC (_mask_cvtsepi64_epi8) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtsepi64_epi8) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (128, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqd-1.c
new file mode 100644
index 00000000000..acec81bb32d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m256i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtsepi64_epi32 (s);
+ res = _mm512_mask_cvtsepi64_epi32 (res, m, s);
+ res = _mm512_maskz_cvtsepi64_epi32 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqd-2.c
new file mode 100644
index 00000000000..7876c783ba7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqd-2.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#define SIZE_HALF (AVX512F_LEN_HALF / 32)
+#include <limits.h>
+
+CALC (int *r, long long *s)
+{
+ int i;
+ for (i = 0; i < SIZE_HALF; i++)
+ {
+ if (s[i] < INT_MIN)
+ r[i] = INT_MIN;
+ else if (s[i] > INT_MAX)
+ r[i] = INT_MAX;
+ else
+ r[i] = s[i];
+ r[i] = (i < SIZE) ? r[i] : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN_HALF, i_d) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_q) src;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE_HALF];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i * sign;
+ sign = sign * -1;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtsepi64_epi32) (src.x);
+ res2.x = INTRINSIC (_mask_cvtsepi64_epi32) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtsepi64_epi32) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqw-1.c
new file mode 100644
index 00000000000..2952aca0764
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqw-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m128i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtsepi64_epi16 (s);
+ res = _mm512_mask_cvtsepi64_epi16 (res, m, s);
+ res = _mm512_maskz_cvtsepi64_epi16 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqw-2.c
new file mode 100644
index 00000000000..949683216ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqw-2.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include <limits.h>
+
+CALC (short *r, long long *s)
+{
+ int i;
+ for (i = 0; i < 8; i++)
+ {
+ if (s[i] < SHRT_MIN)
+ r[i] = SHRT_MIN;
+ else if (s[i] > SHRT_MAX)
+ r[i] = SHRT_MAX;
+ else
+ r[i] = s[i];
+ r[i] = (i < SIZE) ? r[i] : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (128, i_w) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_q) src;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[8];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i * sign;
+ sign = sign * -1;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtsepi64_epi16) (src.x);
+ res2.x = INTRINSIC (_mask_cvtsepi64_epi16) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtsepi64_epi16) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (128, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbd-1.c
new file mode 100644
index 00000000000..18a34ae0120
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsxbd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s;
+volatile __m512i res;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepi8_epi32 (s);
+ res = _mm512_mask_cvtepi8_epi32 (res, m, s);
+ res = _mm512_maskz_cvtepi8_epi32 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbd-2.c
new file mode 100644
index 00000000000..f3d425ce6be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbd-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (char *s, int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (int) s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ union128i_b s;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 8 * i * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_cvtepi8_epi32) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepi8_epi32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepi8_epi32) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbq-1.c
new file mode 100644
index 00000000000..e902b6e764e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsxbq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s;
+volatile __m512i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepi8_epi64 (s);
+ res = _mm512_mask_cvtepi8_epi64 (res, m, s);
+ res = _mm512_maskz_cvtepi8_epi64 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbq-2.c
new file mode 100644
index 00000000000..3cbde261d5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxbq-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (char *s, long long int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (long long int) s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ union128i_b s;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long int res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 8 * i * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_cvtepi8_epi64) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepi8_epi64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepi8_epi64) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxdq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxdq-1.c
new file mode 100644
index 00000000000..265c9fe3237
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxdq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsxdq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxdq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxdq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i s;
+volatile __m512i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepi32_epi64 (s);
+ res = _mm512_mask_cvtepi32_epi64 (res, m, s);
+ res = _mm512_maskz_cvtepi32_epi64 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxdq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxdq-2.c
new file mode 100644
index 00000000000..d9122ff4587
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxdq-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *s, long long int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (long long int) s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN_HALF, i_d) s;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long int res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 2000 * i * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_cvtepi32_epi64) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepi32_epi64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepi32_epi64) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwd-1.c
new file mode 100644
index 00000000000..cdcba564eae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsxwd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i s;
+volatile __m512i res;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepi16_epi32 (s);
+ res = _mm512_mask_cvtepi16_epi32 (res, m, s);
+ res = _mm512_maskz_cvtepi16_epi32 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwd-2.c
new file mode 100644
index 00000000000..16d81eb49e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwd-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (short *s, int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (int) s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN_HALF, i_w) s;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 2000 * i * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_cvtepi16_epi32) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepi16_epi32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepi16_epi32) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwq-1.c
new file mode 100644
index 00000000000..28d6b17ba2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsxwq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s;
+volatile __m512i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepi16_epi64 (s);
+ res = _mm512_mask_cvtepi16_epi64 (res, m, s);
+ res = _mm512_maskz_cvtepi16_epi64 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwq-2.c
new file mode 100644
index 00000000000..f56633e8601
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsxwq-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (short *s, long long int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (long long int) s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ union128i_w s;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long int res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 2000 * i * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_cvtepi16_epi64) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepi16_epi64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepi16_epi64) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdb-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdb-1.c
new file mode 100644
index 00000000000..90019f3f39d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdb-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovusdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m128i res;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtusepi32_epi8 (s);
+ res = _mm512_mask_cvtusepi32_epi8 (res, m, s);
+ res = _mm512_maskz_cvtusepi32_epi8 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdb-2.c
new file mode 100644
index 00000000000..3dac2ad3462
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdb-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include <limits.h>
+
+CALC (unsigned char *r, unsigned int *s)
+{
+ int i;
+ for (i = 0; i < 16; i++)
+ {
+ r[i] = (s[i] > UCHAR_MAX) ? UCHAR_MAX : s[i];
+ r[i] = (i < SIZE) ? r[i] : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (128, i_b) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_d) src;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[16];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtusepi32_epi8) (src.x);
+ res2.x = INTRINSIC (_mask_cvtusepi32_epi8) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtusepi32_epi8) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (128, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdw-1.c
new file mode 100644
index 00000000000..0ee86c982c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdw-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovusdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m256i res;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtusepi32_epi16 (s);
+ res = _mm512_mask_cvtusepi32_epi16 (res, m, s);
+ res = _mm512_maskz_cvtusepi32_epi16 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdw-2.c
new file mode 100644
index 00000000000..aeffedb6e48
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdw-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#define SIZE_HALF (AVX512F_LEN_HALF / 16)
+#include <limits.h>
+
+CALC (unsigned short *r, unsigned int *s)
+{
+ int i;
+ for (i = 0; i < SIZE_HALF; i++)
+ {
+ r[i] = (s[i] > USHRT_MAX) ? USHRT_MAX : s[i];
+ r[i] = (i < SIZE) ? r[i] : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN_HALF, i_w) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_d) src;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE_HALF];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtusepi32_epi16) (src.x);
+ res2.x = INTRINSIC (_mask_cvtusepi32_epi16) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtusepi32_epi16) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqb-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqb-1.c
new file mode 100644
index 00000000000..4da4076bcb0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqb-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovusqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m128i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtusepi64_epi8 (s);
+ res = _mm512_mask_cvtusepi64_epi8 (res, m, s);
+ res = _mm512_maskz_cvtusepi64_epi8 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqb-2.c
new file mode 100644
index 00000000000..17389083564
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqb-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include <limits.h>
+
+CALC (unsigned char *r, unsigned long long *s)
+{
+ int i;
+ for (i = 0; i < 16; i++)
+ {
+ r[i] = (s[i] > UCHAR_MAX) ? UCHAR_MAX : s[i];
+ r[i] = (i < SIZE) ? r[i] : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (128, i_b) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_q) src;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[16];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtusepi64_epi8) (src.x);
+ res2.x = INTRINSIC (_mask_cvtusepi64_epi8) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtusepi64_epi8) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (128, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqd-1.c
new file mode 100644
index 00000000000..69520d91d91
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovusqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m256i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtusepi64_epi32 (s);
+ res = _mm512_mask_cvtusepi64_epi32 (res, m, s);
+ res = _mm512_maskz_cvtusepi64_epi32 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqd-2.c
new file mode 100644
index 00000000000..a8dda5cd78f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqd-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#define SIZE_HALF (AVX512F_LEN_HALF / 32)
+#include <limits.h>
+
+CALC (unsigned int *r, unsigned long long *s)
+{
+ int i;
+ for (i = 0; i < SIZE_HALF; i++)
+ {
+ r[i] = (s[i] > UINT_MAX) ? UINT_MAX : s[i];
+ r[i] = (i < SIZE) ? r[i] : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN_HALF, i_d) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_q) src;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned int res_ref[SIZE_HALF];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtusepi64_epi32) (src.x);
+ res2.x = INTRINSIC (_mask_cvtusepi64_epi32) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtusepi64_epi32) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqw-1.c
new file mode 100644
index 00000000000..e6ca2283c4b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqw-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovusqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s;
+volatile __m128i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtusepi64_epi16 (s);
+ res = _mm512_mask_cvtusepi64_epi16 (res, m, s);
+ res = _mm512_maskz_cvtusepi64_epi16 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqw-2.c
new file mode 100644
index 00000000000..557aee92029
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqw-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include <limits.h>
+
+CALC (unsigned short *r, unsigned long long *s)
+{
+ int i;
+ for (i = 0; i < 8; i++)
+ {
+ r[i] = (s[i] > USHRT_MAX) ? USHRT_MAX : s[i];
+ r[i] = (i < SIZE) ? r[i] : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (128, i_w) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_q) src;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[8];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtusepi64_epi16) (src.x);
+ res2.x = INTRINSIC (_mask_cvtusepi64_epi16) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtusepi64_epi16) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (128, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (128, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbd-1.c
new file mode 100644
index 00000000000..6b4976dca71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovzxbd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s;
+volatile __m512i res;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepu8_epi32 (s);
+ res = _mm512_mask_cvtepu8_epi32 (res, m, s);
+ res = _mm512_maskz_cvtepu8_epi32 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbd-2.c
new file mode 100644
index 00000000000..ab1c3f1087a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbd-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (unsigned char *s, int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ union128i_b s;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 16 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtepu8_epi32) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepu8_epi32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepu8_epi32) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbq-1.c
new file mode 100644
index 00000000000..758e06654f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovzxbq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s;
+volatile __m512i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepu8_epi64 (s);
+ res = _mm512_mask_cvtepu8_epi64 (res, m, s);
+ res = _mm512_maskz_cvtepu8_epi64 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbq-2.c
new file mode 100644
index 00000000000..8f3ce8c34fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxbq-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (unsigned char *s, long long int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ union128i_b s;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long int res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 16 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtepu8_epi64) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepu8_epi64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepu8_epi64) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxdq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxdq-1.c
new file mode 100644
index 00000000000..1a8c37a13c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxdq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovzxdq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxdq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxdq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i s;
+volatile __m512i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepu32_epi64 (s);
+ res = _mm512_mask_cvtepu32_epi64 (res, m, s);
+ res = _mm512_maskz_cvtepu32_epi64 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxdq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxdq-2.c
new file mode 100644
index 00000000000..ca9a68bf404
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxdq-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (unsigned *s, long long int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN_HALF, i_d) s;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long int res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 2000 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtepu32_epi64) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepu32_epi64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepu32_epi64) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwd-1.c
new file mode 100644
index 00000000000..6f955585428
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovzxwd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i s;
+volatile __m512i res;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepu16_epi32 (s);
+ res = _mm512_mask_cvtepu16_epi32 (res, m, s);
+ res = _mm512_maskz_cvtepu16_epi32 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwd-2.c
new file mode 100644
index 00000000000..aa84b94dfff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwd-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (unsigned short *s, int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN_HALF, i_w) s;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 2000 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtepu16_epi32) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepu16_epi32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepu16_epi32) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwq-1.c
new file mode 100644
index 00000000000..13f893e6300
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmovzxwq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s;
+volatile __m512i res;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm512_cvtepu16_epi64 (s);
+ res = _mm512_mask_cvtepu16_epi64 (res, m, s);
+ res = _mm512_maskz_cvtepu16_epi64 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwq-2.c
new file mode 100644
index 00000000000..292bc3d589a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovzxwq-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (unsigned short *s, long long int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (long long int) s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ union128i_w s;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long int res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 2000 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtepu16_epi64) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepu16_epi64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepu16_epi64) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmuldq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmuldq-1.c
new file mode 100644
index 00000000000..091de8c3933
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmuldq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmuldq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmuldq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmuldq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mul_epi32 (x, x);
+ x = _mm512_mask_mul_epi32 (x, m, x, x);
+ x = _mm512_maskz_mul_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmuldq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmuldq-2.c
new file mode 100644
index 00000000000..7c00edb94a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmuldq-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SRC_SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#define DST_SIZE (AVX512F_LEN / 64)
+
+static void
+CALC (int *s1, int *s2, long long int *r)
+{
+ int i;
+
+ for (i = 0; i < DST_SIZE; i++)
+ r[i] = s1[i * 2] * s2[i * 2];
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, s2;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[DST_SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s1.a[i] = i * 20;
+ s2.a[i] = i + 20;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ CALC (s1.a, s2.a, res_ref);
+
+ res1.x = INTRINSIC (_mul_epi32) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_mul_epi32) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_mul_epi32) (mask, s1.x, s2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmulld-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmulld-1.c
new file mode 100644
index 00000000000..d1d77d2e4a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmulld-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmulld\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmulld\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulld\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 mx;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mullo_epi32 (x, x);
+ x = _mm512_mask_mullo_epi32 (x, mx, x, x);
+ x = _mm512_maskz_mullo_epi32 (mx, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmulld-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmulld-2.c
new file mode 100644
index 00000000000..a097c09ac6d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmulld-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+
+static void
+CALC (int *src1, int *src2, int *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] * src2[i];
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int dst_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i + 50;
+ src2.a[i] = i + 100;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_mullo_epi32) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_mullo_epi32) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_mullo_epi32) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, dst_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, dst_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, dst_ref))
+ abort ();
+
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmuludq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmuludq-1.c
new file mode 100644
index 00000000000..b6fd4daf753
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmuludq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpmuludq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmuludq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmuludq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_mul_epu32 (x, x);
+ x = _mm512_mask_mul_epu32 (x, m, x, x);
+ x = _mm512_maskz_mul_epu32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmuludq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmuludq-2.c
new file mode 100644
index 00000000000..bc1a24926f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmuludq-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SRC_SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#define DST_SIZE (AVX512F_LEN / 64)
+
+static void
+CALC (unsigned int *s1, unsigned int *s2, unsigned long long *r)
+{
+ int i;
+
+ for (i = 0; i < DST_SIZE; i++)
+ r[i] = s1[i * 2] * s2[i * 2];
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, s2;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned long long res_ref[DST_SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s1.a[i] = i * 20;
+ s2.a[i] = i + 20;
+ }
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ CALC (s1.a, s2.a, res_ref);
+
+ res1.x = INTRINSIC (_mul_epu32) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_mul_epu32) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_mul_epu32) (mask, s1.x, s2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpord-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpord-1.c
new file mode 100644
index 00000000000..78650751cb8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpord-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpord\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vpord\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpord\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_or_si512 (x, x);
+ x = _mm512_or_epi32 (x, x);
+ x = _mm512_mask_or_epi32 (x, m, x, x);
+ x = _mm512_maskz_or_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpord-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpord-2.c
new file mode 100644
index 00000000000..c53c59244cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpord-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *s1, int *s2, int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = s1[i] | s2[i];
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, s2, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * sign;
+ s2.a[i] = (i + 20) * sign;
+ sign = -sign;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_or_si512) (s1.x, s2.x);
+ res2.x = INTRINSIC (_or_epi32) (s1.x, s2.x);
+ res3.x = INTRINSIC (_mask_or_epi32) (res3.x, mask, s1.x, s2.x);
+ res4.x = INTRINSIC (_maskz_or_epi32) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res4, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vporq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vporq-1.c
new file mode 100644
index 00000000000..c6f8bb576ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vporq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vporq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vporq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vporq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_or_epi64 (x, x);
+ x = _mm512_mask_or_epi64 (x, m, x, x);
+ x = _mm512_maskz_or_epi64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vporq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vporq-2.c
new file mode 100644
index 00000000000..0ad20235018
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vporq-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (long long *s1, long long *s2, long long *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = s1[i] | s2[i];
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * sign;
+ s2.a[i] = (i + 20) * sign;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_or_epi64) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_or_epi64) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_or_epi64) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprold-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vprold-1.c
new file mode 100644
index 00000000000..4a98e199251
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprold-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vprold\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprold\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprold\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_rol_epi32 (x, 12);
+ x = _mm512_mask_rol_epi32 (x, m, x, 12);
+ x = _mm512_maskz_rol_epi32 (m, x, 12);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprold-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vprold-2.c
new file mode 100644
index 00000000000..1c83c526853
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprold-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+#define N 0x5
+
+static void
+CALC (int *s1, int count, int *r)
+{
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = (s1[i] << count) | (s1[i] >> sizeof (s1[i]) * 8 - count);
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 137 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_rol_epi32) (s1.x, N);
+ res2.x = INTRINSIC (_mask_rol_epi32) (res2.x, mask, s1.x, N);
+ res3.x = INTRINSIC (_maskz_rol_epi32) (mask, s1.x, N);
+
+ CALC (s1.a, N, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprolq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vprolq-1.c
new file mode 100644
index 00000000000..91b2462ac8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprolq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vprolq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprolq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprolq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_rol_epi64 (x, 12);
+ x = _mm512_mask_rol_epi64 (x, m, x, 12);
+ x = _mm512_maskz_rol_epi64 (m, x, 12);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprolq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vprolq-2.c
new file mode 100644
index 00000000000..d436afb7619
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprolq-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+#define N 0x5
+
+static void
+CALC (long long *s1, int count, long long *r)
+{
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = (s1[i] << count) | (s1[i] >> sizeof (s1[i]) * 8 - count);
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s1, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 137 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_rol_epi64) (s1.x, N);
+ res2.x = INTRINSIC (_mask_rol_epi64) (res2.x, mask, s1.x, N);
+ res3.x = INTRINSIC (_maskz_rol_epi64) (mask, s1.x, N);
+
+ CALC (s1.a, N, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprolvd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vprolvd-1.c
new file mode 100644
index 00000000000..10331a61dc5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprolvd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vprolvd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprolvd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprolvd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_rolv_epi32 (x, x);
+ x = _mm512_mask_rolv_epi32 (x, m, x, x);
+ x = _mm512_maskz_rolv_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprolvd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vprolvd-2.c
new file mode 100644
index 00000000000..acff91efd11
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprolvd-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *s1, int *s2, int *r)
+{
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = (s1[i] << s2[i]) | (s1[i] >> sizeof (s1[i]) * 8 - s2[i]);
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 137 * i;
+ s2.a[i] = (i + 7);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_rolv_epi32) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_rolv_epi32) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_rolv_epi32) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprolvq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vprolvq-1.c
new file mode 100644
index 00000000000..a182a620324
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprolvq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vprolvq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprolvq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprolvq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_rolv_epi64 (x, x);
+ x = _mm512_mask_rolv_epi64 (x, m, x, x);
+ x = _mm512_maskz_rolv_epi64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprolvq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vprolvq-2.c
new file mode 100644
index 00000000000..b8240141ad9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprolvq-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (long long *s1, long long *s2, long long *r)
+{
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = (s1[i] << s2[i]) | (s1[i] >> sizeof (s1[i]) * 8 - s2[i]);
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 137 * i;
+ s2.a[i] = (i + 7);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_rolv_epi64) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_rolv_epi64) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_rolv_epi64) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprord-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vprord-1.c
new file mode 100644
index 00000000000..c1cf8a5f0d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprord-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vprord\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprord\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprord\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_ror_epi32 (x, 12);
+ x = _mm512_mask_ror_epi32 (x, m, x, 12);
+ x = _mm512_maskz_ror_epi32 (m, x, 12);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprord-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vprord-2.c
new file mode 100644
index 00000000000..a3c95b9caed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprord-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+#define N 0x5
+
+static void
+CALC (int *s1, int count, int *r)
+{
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = (s1[i] >> count) | (s1[i] << sizeof (s1[i]) * 8 - count);
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 137 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_ror_epi32) (s1.x, N);
+ res2.x = INTRINSIC (_mask_ror_epi32) (res2.x, mask, s1.x, N);
+ res3.x = INTRINSIC (_maskz_ror_epi32) (mask, s1.x, N);
+
+ CALC (s1.a, N, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprorq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vprorq-1.c
new file mode 100644
index 00000000000..66b9e0391c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprorq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vprorq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprorq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprorq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_ror_epi64 (x, 12);
+ x = _mm512_mask_ror_epi64 (x, m, x, 12);
+ x = _mm512_maskz_ror_epi64 (m, x, 12);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprorq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vprorq-2.c
new file mode 100644
index 00000000000..448d23e329e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprorq-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+#define N 0x5
+
+static void
+CALC (long long *s1, int count, long long *r)
+{
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = (s1[i] >> count) | (s1[i] << sizeof (s1[i]) * 8 - count);
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s1, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 137 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_ror_epi64) (s1.x, N);
+ res2.x = INTRINSIC (_mask_ror_epi64) (res2.x, mask, s1.x, N);
+ res3.x = INTRINSIC (_maskz_ror_epi64) (mask, s1.x, N);
+
+ CALC (s1.a, N, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprorvd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vprorvd-1.c
new file mode 100644
index 00000000000..59f0c95e278
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprorvd-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "vprorvd\[ \\t\]+\[^\n\]*%zmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_rorv_epi32 (x, x);
+ x = _mm512_mask_rorv_epi32 (x, m, x, x);
+ x = _mm512_maskz_rorv_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprorvd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vprorvd-2.c
new file mode 100644
index 00000000000..ebc4128c66a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprorvd-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *s1, int *s2, int *r)
+{
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = (s1[i] >> s2[i]) | (s1[i] << sizeof (s1[i]) * 8 - s2[i]);
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 137 * i;
+ s2.a[i] = (i + 7);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_rorv_epi32) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_rorv_epi32) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_rorv_epi32) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprorvq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vprorvq-1.c
new file mode 100644
index 00000000000..31b59b18887
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprorvq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vprorvq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprorvq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprorvq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_rorv_epi64 (x, x);
+ x = _mm512_mask_rorv_epi64 (x, m, x, x);
+ x = _mm512_maskz_rorv_epi64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vprorvq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vprorvq-2.c
new file mode 100644
index 00000000000..6f98a7419f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vprorvq-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (long long *s1, long long *s2, long long *r)
+{
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = (s1[i] >> s2[i]) | (s1[i] << sizeof (s1[i]) * 8 - s2[i]);
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+ unsigned int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 137 * i;
+ s2.a[i] = (i + 7);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_rorv_epi64) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_rorv_epi64) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_rorv_epi64) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshufd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshufd-1.c
new file mode 100644
index 00000000000..9b7afc54e85
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshufd-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpshufd\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpshufd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_shuffle_epi32 (x, _MM_PERM_AADB);
+ x = _mm512_mask_shuffle_epi32 (x, 2, x, _MM_PERM_AADB);
+ x = _mm512_maskz_shuffle_epi32 (2, x, _MM_PERM_AADB);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshufd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshufd-2.c
new file mode 100644
index 00000000000..a1769819e43
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshufd-2.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *s1, unsigned char imm, int *r)
+{
+ int i, j, offset;
+
+ for (j = 0; j < SIZE / 4; j++)
+ {
+ offset = j * 4;
+ for (i = 0; i < 4; i++)
+ r[i + offset] =
+ s1[((imm & (0x3 << (2 * i))) >> (2 * i)) + offset];
+ }
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, res1, res2, res3;
+ int res_ref[SIZE];
+ int i, j, sign = 1;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s1.a[j] = j * i * sign;
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_shuffle_epi32) (s1.x, 0xec);
+ res2.x = INTRINSIC (_mask_shuffle_epi32) (res2.x, mask, s1.x, 0xec);
+ res3.x = INTRINSIC (_maskz_shuffle_epi32) (mask, s1.x, 0xec);
+
+ CALC (s1.a, 0xec, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpslld-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpslld-1.c
new file mode 100644
index 00000000000..a2c3711df58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpslld-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m128i y;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_sll_epi32 (x, y);
+ x = _mm512_mask_sll_epi32 (x, m, x, y);
+ x = _mm512_maskz_sll_epi32 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpslld-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpslld-2.c
new file mode 100644
index 00000000000..c8215bd4c0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpslld-2.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, int *s1, int* s2)
+{
+ int i;
+ int count = s2[0];
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = count < 32 ? (s1[i] << count) : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1;
+ UNION_TYPE (128, i_d) src2;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ long long imm;
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ sign = sign * -1;
+ }
+
+ for (imm = 1; imm <= 33; imm++)
+ {
+ src2.a[0] = imm;
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_sll_epi32) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_sll_epi32) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_sll_epi32) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpslldi-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpslldi-1.c
new file mode 100644
index 00000000000..c81ac920027
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpslldi-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+#define y 7
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_slli_epi32 (x, y);
+ x = _mm512_mask_slli_epi32 (x, m, x, y);
+ x = _mm512_maskz_slli_epi32 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpslldi-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpslldi-2.c
new file mode 100644
index 00000000000..bdcf8c12826
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpslldi-2.c
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, int *s1, int count)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = count < 32 ? (s1[i] << count) : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ sign = sign * -1;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_slli_epi32) (src1.x, 2);
+ res2.x = INTRINSIC (_mask_slli_epi32) (res2.x, mask, src1.x, 2);
+ res3.x = INTRINSIC (_maskz_slli_epi32) (mask, src1.x, 2);
+
+ CALC (res_ref, src1.a, 2);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_slli_epi32) (src1.x, 33);
+ res2.x = INTRINSIC (_mask_slli_epi32) (res2.x, mask, src1.x, 33);
+ res3.x = INTRINSIC (_maskz_slli_epi32) (mask, src1.x, 33);
+
+ CALC (res_ref, src1.a, 33);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsllq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsllq-1.c
new file mode 100644
index 00000000000..491234e882b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsllq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_sll_epi64 (x, y);
+ x = _mm512_mask_sll_epi64 (x, m, x, y);
+ x = _mm512_maskz_sll_epi64 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsllq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsllq-2.c
new file mode 100644
index 00000000000..ea33d3df3f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsllq-2.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, long long *s1, long long* s2)
+{
+ int i;
+ long long count = s2[0];
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = count < 64 ? (s1[i] << count) : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1;
+ UNION_TYPE (128, i_q) src2;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ long long imm;
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ sign = sign * -1;
+ }
+
+ for (imm = 1; imm <= 65; imm++)
+ {
+ src2.a[0] = imm;
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_sll_epi64) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_sll_epi64) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_sll_epi64) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsllqi-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsllqi-1.c
new file mode 100644
index 00000000000..7e077e41b83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsllqi-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+#define y 7
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_slli_epi64 (x, y);
+ x = _mm512_mask_slli_epi64 (x, m, x, y);
+ x = _mm512_maskz_slli_epi64 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsllqi-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsllqi-2.c
new file mode 100644
index 00000000000..c87d9e23be6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsllqi-2.c
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, long long *s1, long long count)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = count < 64 ? (s1[i] << count) : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ sign = sign * -1;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_slli_epi64) (src1.x, 3);
+ res2.x = INTRINSIC (_mask_slli_epi64) (res2.x, mask, src1.x, 3);
+ res3.x = INTRINSIC (_maskz_slli_epi64) (mask, src1.x, 3);
+
+ CALC (res_ref, src1.a, 3);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_slli_epi64) (src1.x, 65);
+ res2.x = INTRINSIC (_mask_slli_epi64) (res2.x, mask, src1.x, 65);
+ res3.x = INTRINSIC (_maskz_slli_epi64) (mask, src1.x, 65);
+
+ CALC (res_ref, src1.a, 65);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsllvd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsllvd-1.c
new file mode 100644
index 00000000000..0a966af3804
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsllvd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsllvd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsllvd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllvd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m512i y;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_sllv_epi32 (x, y);
+ x = _mm512_mask_sllv_epi32 (x, m, x, y);
+ x = _mm512_maskz_sllv_epi32 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsllvd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsllvd-2.c
new file mode 100644
index 00000000000..675f4111aee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsllvd-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned int *r, unsigned int *s1, unsigned int *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s2[i] < 32 ? (s1[i] << s2[i]) : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned int res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + 7 * i % 291;
+ src2.a[i] = 1 + 17 * i % 71;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_sllv_epi32) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_sllv_epi32) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_sllv_epi32) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsllvq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsllvq-1.c
new file mode 100644
index 00000000000..8faeef02afa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsllvq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsllvq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsllvq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllvq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m512i y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_sllv_epi64 (x, y);
+ x = _mm512_mask_sllv_epi64 (x, m, x, y);
+ x = _mm512_maskz_sllv_epi64 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsllvq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsllvq-2.c
new file mode 100644
index 00000000000..0b9eb407fc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsllvq-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned long long *r, unsigned long long *s1,
+ unsigned long long *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s2[i] < 64 ? (s1[i] << s2[i]) : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned long long res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + 7 * i % 291;
+ src2.a[i] = 1 + 17 * i % 71;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_sllv_epi64) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_sllv_epi64) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_sllv_epi64) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsllvq512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsllvq512-1.c
new file mode 100644
index 00000000000..e93b8c52974
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsllvq512-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "vpsllvq\[ \\t\]+\[^\n\]*%zmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_sllv_epi64 (x, x);
+} \ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsllvq512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsllvq512-2.c
new file mode 100644
index 00000000000..0c970dbb0fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsllvq512-2.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include <string.h>
+#include "avx512f-check.h"
+
+static void
+compute_psllvq512 (long long int *s1, long long int *s2, long long int *r)
+{
+ int i;
+ long long int count;
+
+ for (i = 0; i < 8; ++i)
+ {
+ count = s2[i];
+ r[i] = s1[i] << count;
+ }
+}
+
+void static
+avx512f_test (void)
+{
+ union512i_q s1, s2, res;
+ long long int res_ref[8];
+ int i, j, sign = 1;
+ int fail = 0;
+
+ for (i = 0; i < 10; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ s1.a[j] = j * i * sign;
+ s2.a[j] = (j + i) >> 2;
+ sign = -sign;
+ }
+
+ res.x = _mm512_sllv_epi64 (s1.x, s2.x);
+
+ compute_psllvq512 (s1.a, s2.a, res_ref);
+
+ fail += check_union512i_q (res, res_ref);
+ }
+
+ if (fail != 0)
+ abort ();
+} \ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrad-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrad-1.c
new file mode 100644
index 00000000000..3d6c5fc13a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrad-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m128i y;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_sra_epi32 (x, y);
+ x = _mm512_mask_sra_epi32 (x, m, x, y);
+ x = _mm512_maskz_sra_epi32 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrad-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrad-2.c
new file mode 100644
index 00000000000..88267f6cb1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrad-2.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, int *s1, int *s2)
+{
+ int i;
+ int count = s2[0];
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] =
+ count < 32 ? (s1[i] >> count) : (s1[i] > 0 ? 0 : 0xFFFFFFFF);
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1;
+ UNION_TYPE (128, i_d) src2;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ long long imm;
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ sign = sign * -1;
+ }
+
+ for (imm = 1; imm <= 33; imm++)
+ {
+ src2.a[0] = imm;
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_sra_epi32) (src1.x, src2.x);
+ res2.x =
+ INTRINSIC (_mask_sra_epi32) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_sra_epi32) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsradi-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsradi-1.c
new file mode 100644
index 00000000000..c7bf9385dc9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsradi-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+#define y 7
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_srai_epi32 (x, y);
+ x = _mm512_mask_srai_epi32 (x, m, x, y);
+ x = _mm512_maskz_srai_epi32 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsradi-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsradi-2.c
new file mode 100644
index 00000000000..6a9ab5222fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsradi-2.c
@@ -0,0 +1,76 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, int *s1, int count)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] =
+ count < 32 ? (s1[i] >> count) : (s1[i] > 0 ? 0 : 0xFFFFFFFF);
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ sign = sign * -1;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srai_epi32) (src1.x, 3);
+ res2.x =
+ INTRINSIC (_mask_srai_epi32) (res2.x, mask, src1.x, 3);
+ res3.x = INTRINSIC (_maskz_srai_epi32) (mask, src1.x, 3);
+
+ CALC (res_ref, src1.a, 3);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srai_epi32) (src1.x, 33);
+ res2.x =
+ INTRINSIC (_mask_srai_epi32) (res2.x, mask, src1.x, 33);
+ res3.x = INTRINSIC (_maskz_srai_epi32) (mask, src1.x, 33);
+
+ CALC (res_ref, src1.a, 33);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsraq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsraq-1.c
new file mode 100644
index 00000000000..1c7a43db439
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsraq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_sra_epi64 (x, y);
+ x = _mm512_mask_sra_epi64 (x, m, x, y);
+ x = _mm512_maskz_sra_epi64 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsraq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsraq-2.c
new file mode 100644
index 00000000000..898d1b9b2f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsraq-2.c
@@ -0,0 +1,63 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, long long *s1, long long *s2)
+{
+ int i;
+ long long count = s2[0];
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] =
+ count < 64 ? (s1[i] >> count) : (s1[i] >
+ 0 ? 0 : 0xFFFFFFFFFFFFFFFFLL);
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1;
+ UNION_TYPE (128, i_q) src2;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ long long imm;
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ sign = sign * -1;
+ }
+
+ for (imm = 1; imm <= 65; imm++)
+ {
+ src2.a[0] = imm;
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_sra_epi64) (src1.x, src2.x);
+ res2.x =
+ INTRINSIC (_mask_sra_epi64) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_sra_epi64) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsraqi-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsraqi-1.c
new file mode 100644
index 00000000000..6400ef4c7d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsraqi-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+#define y 7
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_srai_epi64 (x, y);
+ x = _mm512_mask_srai_epi64 (x, m, x, y);
+ x = _mm512_maskz_srai_epi64 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsraqi-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsraqi-2.c
new file mode 100644
index 00000000000..cf13170084a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsraqi-2.c
@@ -0,0 +1,78 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, long long *s1, long long count)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] =
+ count < 64 ? (s1[i] >> count) : (s1[i] >
+ 0 ? 0 : 0xFFFFFFFFFFFFFFFFLL);
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ sign = sign * -1;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srai_epi64) (src1.x, 3);
+ res2.x =
+ INTRINSIC (_mask_srai_epi64) (res2.x, mask, src1.x, 3);
+ res3.x = INTRINSIC (_maskz_srai_epi64) (mask, src1.x, 3);
+
+ CALC (res_ref, src1.a, 3);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srai_epi64) (src1.x, 65);
+ res2.x =
+ INTRINSIC (_mask_srai_epi64) (res2.x, mask, src1.x, 65);
+ res3.x = INTRINSIC (_maskz_srai_epi64) (mask, src1.x, 65);
+
+ CALC (res_ref, src1.a, 65);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsravd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsravd-1.c
new file mode 100644
index 00000000000..80414c10928
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsravd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsravd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsravd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsravd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, y;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_srav_epi32 (x, y);
+ x = _mm512_mask_srav_epi32 (x, m, x, y);
+ x = _mm512_maskz_srav_epi32 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsravd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsravd-2.c
new file mode 100644
index 00000000000..d60b38080c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsravd-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, int *s1, int *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] =
+ s2[i] < 32 ? (s1[i] >> s2[i]) : (s1[i] > 0 ? 0 : 0xFFFFFFFF);
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ src2.a[i] = 1 + 17 * i % 71;
+ sign = sign * -1;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srav_epi32) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_srav_epi32) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_srav_epi32) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsravq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsravq-1.c
new file mode 100644
index 00000000000..db6b8dd37af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsravq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsravq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsravq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsravq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_srav_epi64 (x, y);
+ x = _mm512_mask_srav_epi64 (x, m, x, y);
+ x = _mm512_maskz_srav_epi64 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsravq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsravq-2.c
new file mode 100644
index 00000000000..c8a931476a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsravq-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, long long *s1, long long *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] =
+ s2[i] < 64 ? (s1[i] >> s2[i]) : (s1[i] >
+ 0 ? 0 : 0xFFFFFFFFFFFFFFFFLL);
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ src2.a[i] = 1 + 17 * i % 71;
+ sign = sign * -1;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srav_epi64) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_srav_epi64) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_srav_epi64) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsravq512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsravq512-1.c
new file mode 100644
index 00000000000..318769b1647
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsravq512-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "vpsravq\[ \\t\]+\[^\n\]*%zmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_srav_epi64 (x, x);
+} \ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsravq512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsravq512-2.c
new file mode 100644
index 00000000000..c2511e9f5ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsravq512-2.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include <string.h>
+#include "avx512f-check.h"
+
+static void
+compute_psravq512 (long long int *s1, long long int *s2, long long int *r)
+{
+ int i;
+ long long int count;
+
+ for (i = 0; i < 8; ++i)
+ {
+ count = s2[i];
+ r[i] = s1[i] >> count;
+ }
+}
+
+void static
+avx512f_test (void)
+{
+ union512i_q s1, s2, res;
+ long long int res_ref[8];
+ int i, j, sign = 1;
+ int fail = 0;
+
+ for (i = 0; i < 10; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ s1.a[j] = j * i * sign;
+ s2.a[j] = (j + i) >> 2;
+ sign = -sign;
+ }
+
+ res.x = _mm512_srav_epi64 (s1.x, s2.x);
+
+ compute_psravq512 (s1.a, s2.a, res_ref);
+
+ fail += check_union512i_q (res, res_ref);
+ }
+
+ if (fail != 0)
+ abort ();
+} \ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrld-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrld-1.c
new file mode 100644
index 00000000000..7c9ea161080
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrld-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m128i y;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_srl_epi32 (x, y);
+ x = _mm512_mask_srl_epi32 (x, m, x, y);
+ x = _mm512_maskz_srl_epi32 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrld-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrld-2.c
new file mode 100644
index 00000000000..06e73e8f3ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrld-2.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned int *r, unsigned int *s1, unsigned int* s2)
+{
+ int i;
+ unsigned int count = s2[0];
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = count < 32 ? (s1[i] >> count) : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1;
+ UNION_TYPE (128, i_d) src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned int res_ref[SIZE];
+
+ unsigned long long imm;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + 7 * i % 291;
+ }
+
+ for (imm = 1; imm <= 33; imm++)
+ {
+ src2.a[0] = imm;
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srl_epi32) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_srl_epi32) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_srl_epi32) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrldi-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrldi-1.c
new file mode 100644
index 00000000000..c21566d1ef1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrldi-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+#define y 7
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_srli_epi32 (x, y);
+ x = _mm512_mask_srli_epi32 (x, m, x, y);
+ x = _mm512_maskz_srli_epi32 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrldi-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrldi-2.c
new file mode 100644
index 00000000000..190a0f3d178
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrldi-2.c
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned int *r, unsigned int *s1, unsigned int count)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = count < 32 ? (s1[i] >> count) : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned int res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + 7 * i % 291;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srli_epi32) (src1.x, 3);
+ res2.x =
+ INTRINSIC (_mask_srli_epi32) (res2.x, mask, src1.x, 3);
+ res3.x = INTRINSIC (_maskz_srli_epi32) (mask, src1.x, 3);
+
+ CALC (res_ref, src1.a, 3);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srli_epi32) (src1.x, 33);
+ res2.x =
+ INTRINSIC (_mask_srli_epi32) (res2.x, mask, src1.x, 33);
+ res3.x = INTRINSIC (_maskz_srli_epi32) (mask, src1.x, 33);
+
+ CALC (res_ref, src1.a, 33);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrlq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlq-1.c
new file mode 100644
index 00000000000..d3af6091f17
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_srl_epi64 (x, y);
+ x = _mm512_mask_srl_epi64 (x, m, x, y);
+ x = _mm512_maskz_srl_epi64 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrlq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlq-2.c
new file mode 100644
index 00000000000..9d380b49d28
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlq-2.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned long long *r, unsigned long long *s1, unsigned long long* s2)
+{
+ int i;
+ unsigned long long count = s2[0];
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = count < 64 ? (s1[i] >> count) : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1;
+ UNION_TYPE (128, i_q) src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned long long res_ref[SIZE];
+
+ unsigned long long imm;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + 7 * i % 291;
+ }
+
+ for (imm = 1; imm <= 65; imm++)
+ {
+ src2.a[0] = imm;
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srl_epi64) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_srl_epi64) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_srl_epi64) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrlqi-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlqi-1.c
new file mode 100644
index 00000000000..b1f6d2766da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlqi-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+#define y 7
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_srli_epi64 (x, y);
+ x = _mm512_mask_srli_epi64 (x, m, x, y);
+ x = _mm512_maskz_srli_epi64 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrlqi-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlqi-2.c
new file mode 100644
index 00000000000..7fc4c776d3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlqi-2.c
@@ -0,0 +1,75 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned long long *r, unsigned long long *s1,
+ unsigned long long count)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = count < 64 ? (s1[i] >> count) : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned long long res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + 7 * i % 291;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srli_epi64) (src1.x, 3);
+ res2.x =
+ INTRINSIC (_mask_srli_epi64) (res2.x, mask, src1.x, 3);
+ res3.x = INTRINSIC (_maskz_srli_epi64) (mask, src1.x, 3);
+
+ CALC (res_ref, src1.a, 3);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srli_epi64) (src1.x, 65);
+ res2.x =
+ INTRINSIC (_mask_srli_epi64) (res2.x, mask, src1.x, 65);
+ res3.x = INTRINSIC (_maskz_srli_epi64) (mask, src1.x, 65);
+
+ CALC (res_ref, src1.a, 65);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvd-1.c
new file mode 100644
index 00000000000..c8fe74d734e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsrlvd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrlvd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlvd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m512i y;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_srlv_epi32 (x, y);
+ x = _mm512_mask_srlv_epi32 (x, m, x, y);
+ x = _mm512_maskz_srlv_epi32 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvd-2.c
new file mode 100644
index 00000000000..ced1fe41491
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvd-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned int *r, unsigned int *s1, unsigned int *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s2[i] < 32 ? (s1[i] >> s2[i]) : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned int res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + 7 * i % 291;
+ src2.a[i] = 1 + 17 * i % 71;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srlv_epi32) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_srlv_epi32) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_srlv_epi32) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq-1.c
new file mode 100644
index 00000000000..b316f68f59b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsrlvq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrlvq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlvq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m512i y;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_srlv_epi64 (x, y);
+ x = _mm512_mask_srlv_epi64 (x, m, x, y);
+ x = _mm512_maskz_srlv_epi64 (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq-2.c
new file mode 100644
index 00000000000..ad4dff7c68c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned long long *r, unsigned long long *s1,
+ unsigned long long *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s2[i] < 64 ? (s1[i] >> s2[i]) : 0;
+ }
+}
+
+void static
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned long long res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + 7 * i % 291;
+ src2.a[i] = 1 + 17 * i % 71;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srlv_epi64) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_srlv_epi64) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_srlv_epi64) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq512-1.c
new file mode 100644
index 00000000000..99b12d200b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq512-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "vpsrlvq\[ \\t\]+\[^\n\]*%zmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_srlv_epi64 (x, x);
+} \ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq512-2.c
new file mode 100644
index 00000000000..d262a83527d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsrlvq512-2.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include <string.h>
+#include "avx512f-check.h"
+
+static void
+compute_psrlvq512 (long long int *s1, long long int *s2, long long int *r)
+{
+ int i;
+ long long int count;
+
+ for (i = 0; i < 8; ++i)
+ {
+ count = s2[i];
+ r[i] = ((unsigned long long) s1[i]) >> count;
+ }
+}
+
+void static
+avx512f_test (void)
+{
+ union512i_q s1, s2, res;
+ long long int res_ref[8];
+ int i, j, sign = 1;
+ int fail = 0;
+
+ for (i = 0; i < 10; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ s1.a[j] = j * i * sign;
+ s2.a[j] = (j + i) >> 2;
+ sign = -sign;
+ }
+
+ res.x = _mm512_srlv_epi64 (s1.x, s2.x);
+
+ compute_psrlvq512 (s1.a, s2.a, res_ref);
+
+ fail += check_union512i_q (res, res_ref);
+ }
+
+ if (fail != 0)
+ abort ();
+} \ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsubd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsubd-1.c
new file mode 100644
index 00000000000..28c3584e13d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsubd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsubd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsubd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_sub_epi32 (x, x);
+ x = _mm512_mask_sub_epi32 (x, m, x, x);
+ x = _mm512_maskz_sub_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsubd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsubd-2.c
new file mode 100644
index 00000000000..5c8df72a010
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsubd-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, int *s1, int *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] - s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_sub_epi32) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_sub_epi32) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_sub_epi32) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsubq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsubq-1.c
new file mode 100644
index 00000000000..c51b291dae8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsubq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vpsubq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsubq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_sub_epi64 (x, x);
+ x = _mm512_mask_sub_epi64 (x, m, x, x);
+ x = _mm512_maskz_sub_epi64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpsubq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpsubq-2.c
new file mode 100644
index 00000000000..36f9b6e8bfc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpsubq-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, long long *s1, long long *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] - s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_sub_epi64) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_sub_epi64) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_sub_epi64) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-1.c
new file mode 100644
index 00000000000..e4708bf51b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, y, z;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_ternarylogic_epi32 (x, y, z, 0xF0);
+ x = _mm512_mask_ternarylogic_epi32 (x, m, y, z, 0xF0);
+ x = _mm512_maskz_ternarylogic_epi32 (m, x, y, z, 0xF0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-2.c
new file mode 100644
index 00000000000..b06ee150162
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-2.c
@@ -0,0 +1,69 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *src1, int *src2, int *src3, int imm, int *r)
+{
+ int i, j, index, res, mask, one_mask = 1;
+ int src1_bit, src2_bit, src3_bit, imm_bit;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res = 0;
+ for (j = 0; j < 32; j++)
+ {
+ mask = one_mask << j;
+ src1_bit = ((src1[i] & mask) >> j) << 2;
+ src2_bit = ((src2[i] & mask) >> j) << 1;
+ src3_bit = ((src3[i] & mask) >> j);
+ index = src1_bit | src2_bit | src3_bit;
+ imm_bit = (imm & (one_mask << index)) >> index;
+ res = res | (imm_bit << j);
+ }
+ r[i] = res;
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) src2, src3, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+ int i, imm = 0x7D;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ src2.a[i] = 145132 * i + 123123;
+ src3.a[i] = 1223 * i + 895;
+ }
+
+ CALC (res1.a, src2.a, src3.a, imm, res_ref);
+
+ res1.x = INTRINSIC (_ternarylogic_epi32) (res1.x, src2.x, src3.x,
+ imm);
+ res2.x = INTRINSIC (_mask_ternarylogic_epi32) (res2.x, mask, src2.x,
+ src3.x, imm);
+ res3.x = INTRINSIC (_maskz_ternarylogic_epi32) (mask, res3.x, src2.x,
+ src3.x, imm);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpternlogq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogq-1.c
new file mode 100644
index 00000000000..7d074668d93
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, y, z;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_ternarylogic_epi64 (x, y, z, 0xF0);
+ x = _mm512_mask_ternarylogic_epi64 (x, m, y, z, 0xF0);
+ x = _mm512_maskz_ternarylogic_epi64 (m, x, y, z, 0xF0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpternlogq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogq-2.c
new file mode 100644
index 00000000000..d903a828426
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogq-2.c
@@ -0,0 +1,71 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (long long *src1, long long *src2, long long *src3,
+ long long imm, long long *r)
+{
+ int i, j;
+ long long res, index, mask, one_mask = 1;
+ long long src1_bit, src2_bit, src3_bit, imm_bit;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res = 0;
+ for (j = 0; j < 64; j++)
+ {
+ mask = one_mask << j;
+ src1_bit = ((src1[i] & mask) >> j) << 2;
+ src2_bit = ((src2[i] & mask) >> j) << 1;
+ src3_bit = ((src3[i] & mask) >> j);
+ index = src1_bit | src2_bit | src3_bit;
+ imm_bit = (imm & (one_mask << index)) >> index;
+ res = res | (imm_bit << j);
+ }
+ r[i] = res;
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) src2, src3, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+ int i, imm = 0x7D;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ src2.a[i] = 145132 * i + 123123;
+ src3.a[i] = 1223 * i + 895;
+ }
+
+ CALC (res1.a, src2.a, src3.a, imm, res_ref);
+
+ res1.x = INTRINSIC (_ternarylogic_epi64) (res1.x, src2.x, src3.x,
+ imm);
+ res2.x = INTRINSIC (_mask_ternarylogic_epi64) (res2.x, mask, src2.x,
+ src3.x, imm);
+ res3.x = INTRINSIC (_maskz_ternarylogic_epi64) (mask, res3.x, src2.x,
+ src3.x, imm);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vptestmd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vptestmd-1.c
new file mode 100644
index 00000000000..2242314ce08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vptestmd-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vptestmd\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n^k\]*k\[1-7\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vptestmd\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n^k\]*k\[1-7\]\{" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m16;
+
+void extern
+avx512f_test (void)
+{
+ m16 = _mm512_test_epi32_mask (x, x);
+ m16 = _mm512_mask_test_epi32_mask (3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vptestmd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vptestmd-2.c
new file mode 100644
index 00000000000..069111e4085
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vptestmd-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *res, int *src1, int *src2)
+{
+ int i;
+ *res = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (src1[i] & src2[i])
+ *res = *res | one << i;
+}
+
+static void
+TEST (void)
+{
+ int i, sign = 1;
+ UNION_TYPE (AVX512F_LEN, i_d) src1, src2;
+ MASK_TYPE res_ref, res1, res2;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * i * sign;
+ src2.a[i] = i + 20;
+ sign = -sign;
+ }
+
+ res1 = INTRINSIC (_test_epi32_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_test_epi32_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res1 != res_ref)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res2 != res_ref)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vptestmq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vptestmq-1.c
new file mode 100644
index 00000000000..9a92903a2bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vptestmq-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vptestmq\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n^k\]*k\[1-7\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vptestmq\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n^k\]*k\[1-7\]\{" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m8;
+
+void extern
+avx512f_test (void)
+{
+ m8 = _mm512_test_epi64_mask (x, x);
+ m8 = _mm512_mask_test_epi64_mask (3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vptestmq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vptestmq-2.c
new file mode 100644
index 00000000000..aa2c84d4d75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vptestmq-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *res, long long *src1, long long *src2)
+{
+ int i;
+ *res = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (src1[i] & src2[i])
+ *res = *res | one << i;
+}
+
+static void
+TEST (void)
+{
+ int i, sign = 1;
+ UNION_TYPE (AVX512F_LEN, i_q) src1, src2;
+ MASK_TYPE res_ref, res1, res2;
+ MASK_TYPE mask = MASK_VALUE;
+ res1 = 0;
+ res2 = 0;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * i * sign;
+ src2.a[i] = i + 20;
+ sign = -sign;
+ }
+
+ res1 = INTRINSIC (_test_epi64_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_test_epi64_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res1 != res_ref)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res2 != res_ref)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vptestnmd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vptestnmd-1.c
new file mode 100644
index 00000000000..1094ee5b0ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vptestnmd-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vptestnmd\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n^k\]*k\[1-7\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vptestnmd\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n^k\]*k\[1-7\]\{" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m16;
+
+void extern
+avx512f_test (void)
+{
+ m16 = _mm512_testn_epi32_mask (x, x);
+ m16 = _mm512_mask_testn_epi32_mask (3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vptestnmd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vptestnmd-2.c
new file mode 100644
index 00000000000..ebdd0a31329
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vptestnmd-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *res, int *src1, int *src2)
+{
+ int i;
+ *res = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (!(src1[i] & src2[i]))
+ *res = *res | one << i;
+}
+
+static void
+TEST (void)
+{
+ int i, sign = 1;
+ UNION_TYPE (AVX512F_LEN, i_d) src1, src2;
+ MASK_TYPE res_ref, res1, res2;
+ MASK_TYPE mask = MASK_VALUE;
+ res1 = 0;
+ res2 = 0;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * i * sign;
+ src2.a[i] = i + 20;
+ sign = -sign;
+ }
+
+ res1 = INTRINSIC (_testn_epi32_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_testn_epi32_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res1 != res_ref)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res2 != res_ref)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vptestnmq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vptestnmq-1.c
new file mode 100644
index 00000000000..081a25e1738
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vptestnmq-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vptestnmq\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n^k\]*k\[1-7\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vptestnmq\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n^k\]*k\[1-7\]\{" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m8;
+
+void extern
+avx512f_test (void)
+{
+ m8 = _mm512_testn_epi64_mask (x, x);
+ m8 = _mm512_mask_testn_epi64_mask (3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vptestnmq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vptestnmq-2.c
new file mode 100644
index 00000000000..ff4ae2a22c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vptestnmq-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *res, long long *src1, long long *src2)
+{
+ int i;
+ *res = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (!(src1[i] & src2[i]))
+ *res = *res | one << i;
+}
+
+static void
+TEST (void)
+{
+ int i, sign = 1;
+ UNION_TYPE (AVX512F_LEN, i_q) src1, src2;
+ MASK_TYPE res_ref, res1, res2;
+ MASK_TYPE mask = MASK_VALUE;
+ res1 = 0;
+ res2 = 0;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * i * sign;
+ src2.a[i] = i + 20;
+ sign = -sign;
+ }
+
+ res1 = INTRINSIC (_testn_epi64_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_testn_epi64_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res1 != res_ref)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res2 != res_ref)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpunpckhdq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpunpckhdq-1.c
new file mode 100644
index 00000000000..800e1e0ef5d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpunpckhdq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpunpckhdq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhdq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhdq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, y, z;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_unpackhi_epi32 (y, z);
+ x = _mm512_mask_unpackhi_epi32 (x, m, y, z);
+ x = _mm512_maskz_unpackhi_epi32 (m, y, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpunpckhdq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpunpckhdq-2.c
new file mode 100644
index 00000000000..bd40fdcdb21
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpunpckhdq-2.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, int *s1, int *s2)
+{
+ int i;
+ for (i = 0; i < SIZE / 4; i++)
+ {
+ r[4 * i] = s1[4 * i + 2];
+ r[4 * i + 1] = s2[4 * i + 2];
+ r[4 * i + 2] = s1[4 * i + 3];
+ r[4 * i + 3] = s2[4 * i + 3];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 34 * i * sign;
+ src1.a[i] = 179 * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_unpackhi_epi32) (src1.x, src2.x);
+ res2.x =
+ INTRINSIC (_mask_unpackhi_epi32) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_unpackhi_epi32) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpunpckhqdq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpunpckhqdq-1.c
new file mode 100644
index 00000000000..05b22297f8c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpunpckhqdq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpunpckhqdq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhqdq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhqdq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, y, z;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_unpackhi_epi64 (y, z);
+ x = _mm512_mask_unpackhi_epi64 (x, m, y, z);
+ x = _mm512_maskz_unpackhi_epi64 (m, y, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpunpckhqdq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpunpckhqdq-2.c
new file mode 100644
index 00000000000..88096c8cd5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpunpckhqdq-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, long long *s1, long long *s2)
+{
+ int i;
+ for (i = 0; i < SIZE / 2; i++)
+ {
+ r[2 * i] = s1[2 * i + 1];
+ r[2 * i + 1] = s2[2 * i + 1];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 34 * i * sign;
+ src1.a[i] = 179 * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_unpackhi_epi64) (src1.x, src2.x);
+ res2.x =
+ INTRINSIC (_mask_unpackhi_epi64) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_unpackhi_epi64) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpunpckldq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpunpckldq-1.c
new file mode 100644
index 00000000000..29a2c8dc697
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpunpckldq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpunpckldq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckldq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckldq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, y, z;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_unpacklo_epi32 (y, z);
+ x = _mm512_mask_unpacklo_epi32 (x, m, y, z);
+ x = _mm512_maskz_unpacklo_epi32 (m, y, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpunpckldq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpunpckldq-2.c
new file mode 100644
index 00000000000..5389c684966
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpunpckldq-2.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, int *s1, int *s2)
+{
+ int i;
+ for (i = 0; i < SIZE / 4; i++)
+ {
+ r[4 * i] = s1[4 * i];
+ r[4 * i + 1] = s2[4 * i];
+ r[4 * i + 2] = s1[4 * i + 1];
+ r[4 * i + 3] = s2[4 * i + 1];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 34 * i * sign;
+ src1.a[i] = 179 * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_unpacklo_epi32) (src1.x, src2.x);
+ res2.x =
+ INTRINSIC (_mask_unpacklo_epi32) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_unpacklo_epi32) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpunpcklqdq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpunpcklqdq-1.c
new file mode 100644
index 00000000000..ac6f2976ade
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpunpcklqdq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpunpcklqdq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklqdq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklqdq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, y, z;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_unpacklo_epi64 (y, z);
+ x = _mm512_mask_unpacklo_epi64 (x, m, y, z);
+ x = _mm512_maskz_unpacklo_epi64 (m, y, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpunpcklqdq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpunpcklqdq-2.c
new file mode 100644
index 00000000000..ae093e20d9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpunpcklqdq-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, long long *s1, long long *s2)
+{
+ int i;
+ for (i = 0; i < SIZE / 2; i++)
+ {
+ r[2 * i] = s1[2 * i];
+ r[2 * i + 1] = s2[2 * i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 34 * i * sign;
+ src1.a[i] = 179 * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_unpacklo_epi64) (src1.x, src2.x);
+ res2.x =
+ INTRINSIC (_mask_unpacklo_epi64) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_unpacklo_epi64) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpxord-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpxord-1.c
new file mode 100644
index 00000000000..99e82bef459
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpxord-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpxord\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vpxord\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpxord\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_xor_si512 (x, x);
+ x = _mm512_xor_epi32 (x, x);
+ x = _mm512_mask_xor_epi32 (x, m, x, x);
+ x = _mm512_maskz_xor_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpxord-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpxord-2.c
new file mode 100644
index 00000000000..1505e1659d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpxord-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *s1, int *s2, int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = s1[i] ^ s2[i];
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, s2, res1, res2, res3, res4;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * sign;
+ s2.a[i] = (i + 20) * sign;
+ sign = -sign;
+ res3.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_xor_si512) (s1.x, s2.x);
+ res2.x = INTRINSIC (_xor_epi32) (s1.x, s2.x);
+ res3.x = INTRINSIC (_mask_xor_epi32) (res3.x, mask, s1.x, s2.x);
+ res4.x = INTRINSIC (_maskz_xor_epi32) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res4, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpxorq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpxorq-1.c
new file mode 100644
index 00000000000..cd2853409e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpxorq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpxorq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpxorq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpxorq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_xor_epi64 (x, x);
+ x = _mm512_mask_xor_epi64 (x, m, x, x);
+ x = _mm512_maskz_xor_epi64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpxorq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpxorq-2.c
new file mode 100644
index 00000000000..b095491437c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpxorq-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (long long *s1, long long *s2, long long *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = s1[i] ^ s2[i];
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * sign;
+ s2.a[i] = (i + 20) * sign;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_xor_epi64) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_xor_epi64) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_xor_epi64) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14pd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14pd-1.c
new file mode 100644
index 00000000000..7342420489d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14pd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrcp14pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrcp14pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrcp14pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_rcp14_pd (x);
+ x = _mm512_mask_rcp14_pd (x, m, x);
+ x = _mm512_maskz_rcp14_pd (m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14pd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14pd-2.c
new file mode 100644
index 00000000000..505e095af53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14pd-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (double *s, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = 1.0 / s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000) * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_rcp14_pd) (s.x);
+ res2.x = INTRINSIC (_mask_rcp14_pd) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_rcp14_pd) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res1, res_ref, 0.0001))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res2, res_ref, 0.0001))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res3, res_ref, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ps-1.c
new file mode 100644
index 00000000000..ea6c68de7ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ps-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrcp14ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrcp14ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrcp14ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_rcp14_ps (x);
+ x = _mm512_mask_rcp14_ps (x, m, x);
+ x = _mm512_maskz_rcp14_ps (m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ps-2.c
new file mode 100644
index 00000000000..7af79139468
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ps-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (float *s, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = 1.0 / s[i];
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) s, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000) * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_rcp14_ps) (s.x);
+ res2.x = INTRINSIC (_mask_rcp14_ps) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_rcp14_ps) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_ROUGH_CHECK (AVX512F_LEN,) (res1, res_ref, 0.0001))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN,) (res2, res_ref, 0.0001))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN,) (res3, res_ref, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-1.c
new file mode 100644
index 00000000000..f2f8e3f908f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrcp14sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrcp14sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrcp14sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_rcp14_sd (x1, x2);
+ x1 = _mm_mask_rcp14_sd (x1, m, x1, x2);
+ x1 = _mm_maskz_rcp14_sd (m, x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c
new file mode 100644
index 00000000000..915b0b00190
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vrcp14sd (double *s1, double *s2, double *r)
+{
+ r[0] = 1.0 / s2[0];
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ union128d s1, s2, res1, res2, res3;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[2];
+
+ s1.x = _mm_set_pd (-3.0, 111.111);
+ s2.x = _mm_set_pd (222.222, -2.0);
+ res2.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_rcp14_sd (s1.x, s2.x);
+ res2.x = _mm_mask_rcp14_sd (res2.x, mask, s1.x, s2.x);
+ res3.x = _mm_maskz_rcp14_sd (mask, s1.x, s2.x);
+
+ compute_vrcp14sd (s1.a, s2.a, res_ref);
+
+ if (check_union128d (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+ if (check_union128d (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+ if (check_union128d (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-1.c
new file mode 100644
index 00000000000..e7fb821a4f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrcp14ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrcp14ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrcp14ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_rcp14_ss (x1, x2);
+ x1 = _mm_mask_rcp14_ss (x1, m, x1, x2);
+ x1 = _mm_maskz_rcp14_ss (m, x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c
new file mode 100644
index 00000000000..1557a623b72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vrcp14ss (float *s1, float *s2, float *r)
+{
+ r[0] = 1.0 / s2[0];
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ union128 s1, s2, res1, res2, res3;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref[4];
+
+ s1.x = _mm_set_ps (-24.043, 68.346, -43.35, 546.46);
+ s2.x = _mm_set_ps (222.222, 333.333, 444.444, -2.0);
+ res2.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_rcp14_ss (s1.x, s2.x);
+ res2.x = _mm_mask_rcp14_ss (res2.x, mask, s1.x, s2.x);
+ res3.x = _mm_maskz_rcp14_ss (mask, s1.x, s2.x);
+
+ compute_vrcp14ss (s1.a, s2.a, res_ref);
+
+ if (check_union128 (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, 1);
+ if (check_union128 (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, 1);
+ if (check_union128 (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrndscalepd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrndscalepd-1.c
new file mode 100644
index 00000000000..3fb0e090ed7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrndscalepd-1.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrndscalepd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 6} } */
+/* { dg-final { scan-assembler-times "vrndscalepd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 9} } */
+/* { dg-final { scan-assembler-times "vrndscalepd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 3} } */
+/* { dg-final { scan-assembler-times "vrndscalepd\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 3} } */
+/* { dg-final { scan-assembler-times "vrndscalepd\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 6} } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_roundscale_pd (x, 0x42);
+ x = _mm512_ceil_pd (x);
+ x = _mm512_floor_pd (x);
+ x = _mm512_mask_roundscale_pd (x, 2, x, 0x42);
+ x = _mm512_mask_ceil_pd (x, 2, x);
+ x = _mm512_mask_floor_pd (x, 2, x);
+ x = _mm512_maskz_roundscale_pd (2, x, 0x42);
+ x = _mm512_maskz_ceil_pd (2, x);
+ x = _mm512_maskz_floor_pd (2, x);
+
+ x = _mm512_roundscale_round_pd (x, 0x42, _MM_FROUND_NO_EXC);
+ x = _mm512_ceil_round_pd (x, _MM_FROUND_NO_EXC);
+ x = _mm512_floor_round_pd (x, _MM_FROUND_NO_EXC);
+ x = _mm512_mask_roundscale_round_pd (x, 2, x, 0x42, _MM_FROUND_NO_EXC);
+ x = _mm512_mask_ceil_round_pd (x, 2, x, _MM_FROUND_NO_EXC);
+ x = _mm512_mask_floor_round_pd (x, 2, x, _MM_FROUND_NO_EXC);
+ x = _mm512_maskz_roundscale_round_pd (2, x, 0x42, _MM_FROUND_NO_EXC);
+ x = _mm512_maskz_ceil_round_pd (2, x, _MM_FROUND_NO_EXC);
+ x = _mm512_maskz_floor_round_pd (2, x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrndscalepd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrndscalepd-2.c
new file mode 100644
index 00000000000..c46a0fc9449
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrndscalepd-2.c
@@ -0,0 +1,94 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include "math.h"
+
+static void
+CALC (double *s, double *r, int imm)
+{
+ int i = 0, rc, m;
+ rc = imm & 0xf;
+ m = imm >> 4;
+ for (i = 0; i < SIZE; i++)
+ switch (rc)
+ {
+ case _MM_FROUND_FLOOR:
+ r[i] = floor (s[i] * pow (2, m)) / pow (2, m);
+ break;
+ case _MM_FROUND_CEIL:
+ r[i] = ceil (s[i] * pow (2, m)) / pow (2, m);
+ break;
+ default:
+ abort ();
+ break;
+ }
+}
+
+void static
+TEST (void)
+{
+ int imm, i, j;
+ UNION_TYPE (AVX512F_LEN, d) res1,res2,res3,s;
+ double res_ref[SIZE];
+ double res_ref_mask[SIZE];
+
+ MASK_TYPE mask = 6 ^ (0xff >> SIZE);
+
+ imm = _MM_FROUND_FLOOR | (7 << 4);
+
+ for (i = 0; i < 3; i++)
+ {
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s.a[j] = j * (j + 12.0231);
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ switch (i)
+ {
+ case 0:
+ imm = _MM_FROUND_FLOOR | (7 << 4);
+ res1.x = INTRINSIC (_roundscale_pd) (s.x, imm);
+ res2.x = INTRINSIC (_mask_roundscale_pd) (res2.x, mask, s.x, imm);
+ res3.x = INTRINSIC (_maskz_roundscale_pd) (mask, s.x, imm);
+ break;
+ case 1:
+ imm = _MM_FROUND_FLOOR;
+ res1.x = INTRINSIC (_floor_pd) (s.x);
+ res2.x = INTRINSIC (_mask_floor_pd) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_floor_pd) (mask, s.x);
+ break;
+ case 2:
+ imm = _MM_FROUND_CEIL;
+ res1.x = INTRINSIC (_ceil_pd) (s.x);
+ res2.x = INTRINSIC (_mask_ceil_pd) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_ceil_pd) (mask, s.x);
+ break;
+ }
+
+ CALC (s.a, res_ref, imm);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE(d) (res_ref,mask,SIZE );
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO(d) (res_ref,mask,SIZE );
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+
+ }
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrndscaleps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrndscaleps-1.c
new file mode 100644
index 00000000000..7d5aeaac8ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrndscaleps-1.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrndscaleps\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6} } */
+/* { dg-final { scan-assembler-times "vrndscaleps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 9} } */
+/* { dg-final { scan-assembler-times "vrndscaleps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 3} } */
+/* { dg-final { scan-assembler-times "vrndscaleps\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3} } */
+/* { dg-final { scan-assembler-times "vrndscaleps\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 6} } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_roundscale_ps (x, 0x42);
+ x = _mm512_ceil_ps (x);
+ x = _mm512_floor_ps (x);
+ x = _mm512_mask_roundscale_ps (x, 2, x, 0x42);
+ x = _mm512_mask_ceil_ps (x, 2, x);
+ x = _mm512_mask_floor_ps (x, 2, x);
+ x = _mm512_maskz_roundscale_ps (2, x, 0x42);
+ x = _mm512_maskz_ceil_ps (2, x);
+ x = _mm512_maskz_floor_ps (2, x);
+
+ x = _mm512_roundscale_round_ps (x, 0x42, _MM_FROUND_NO_EXC);
+ x = _mm512_ceil_round_ps (x, _MM_FROUND_NO_EXC);
+ x = _mm512_floor_round_ps (x, _MM_FROUND_NO_EXC);
+ x = _mm512_mask_roundscale_round_ps (x, 2, x, 0x42, _MM_FROUND_NO_EXC);
+ x = _mm512_mask_ceil_round_ps (x, 2, x, _MM_FROUND_NO_EXC);
+ x = _mm512_mask_floor_round_ps (x, 2, x, _MM_FROUND_NO_EXC);
+ x = _mm512_maskz_roundscale_round_ps (2, x, 0x42, _MM_FROUND_NO_EXC);
+ x = _mm512_maskz_ceil_round_ps (2, x, _MM_FROUND_NO_EXC);
+ x = _mm512_maskz_floor_round_ps (2, x, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrndscaleps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrndscaleps-2.c
new file mode 100644
index 00000000000..5aae5ab9b15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrndscaleps-2.c
@@ -0,0 +1,92 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "math.h"
+
+static void
+CALC (float *s, float *r, int imm)
+{
+ int i = 0, rc, m;
+ rc = imm & 0xf;
+ m = imm >> 4;
+ for (i = 0; i < SIZE; i++)
+ switch (rc)
+ {
+ case _MM_FROUND_FLOOR:
+ r[i] = floor (s[i] * pow (2, m)) / pow (2, m);
+ break;
+ case _MM_FROUND_CEIL:
+ r[i] = ceil (s[i] * pow (2, m)) / pow (2, m);
+ break;
+ default:
+ abort ();
+ break;
+ }
+}
+
+void static
+TEST (void)
+{
+ int imm, i, j;
+ UNION_TYPE (AVX512F_LEN,) res1, res2, res3, s;
+ float res_ref[SIZE];
+
+ MASK_TYPE mask = 6 ^ (0xffff >> SIZE);
+
+ imm = _MM_FROUND_FLOOR | (7 << 4);
+
+ for (i = 0; i < 3; i++)
+ {
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s.a[j] = j * (j + 12.0231);
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ switch (i)
+ {
+ case 0:
+ imm = _MM_FROUND_FLOOR | (7 << 4);
+ res1.x = INTRINSIC (_roundscale_ps) (s.x, imm);
+ res2.x = INTRINSIC (_mask_roundscale_ps) (res2.x, mask, s.x, imm);
+ res3.x = INTRINSIC (_maskz_roundscale_ps) (mask, s.x, imm);
+ break;
+ case 1:
+ imm = _MM_FROUND_FLOOR;
+ res1.x = INTRINSIC (_floor_ps) (s.x);
+ res2.x = INTRINSIC (_mask_floor_ps) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_floor_ps) (mask, s.x);
+ break;
+ case 2:
+ imm = _MM_FROUND_CEIL;
+ res1.x = INTRINSIC (_ceil_ps) (s.x);
+ res2.x = INTRINSIC (_mask_ceil_ps) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_ceil_ps) (mask, s.x);
+ break;
+ }
+
+ CALC (s.a, res_ref, imm);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, SIZE);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, SIZE);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-1.c
new file mode 100644
index 00000000000..da0d110dd93
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_roundscale_sd (x1, x2, 0x42);
+ x1 = _mm_mask_roundscale_sd (x1, m, x1, x2, 0x42);
+ x1 = _mm_maskz_roundscale_sd (m, x1, x2, 0x42);
+
+ x1 = _mm_roundscale_round_sd (x1, x2, 0x42, _MM_FROUND_NO_EXC);
+ x1 = _mm_mask_roundscale_round_sd (x1, m, x1, x2, 0x42, _MM_FROUND_NO_EXC);
+ x1 = _mm_maskz_roundscale_round_sd (m, x1, x2, 0x42, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-2.c
new file mode 100644
index 00000000000..9cc1cab3d6a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrndscalesd-2.c
@@ -0,0 +1,63 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_rndscalesd (double *s1, double *s2, double *r, int imm)
+{
+ int rc, m;
+ rc = imm & 0xf;
+ m = imm >> 4;
+
+ switch (rc)
+ {
+ case _MM_FROUND_FLOOR:
+ r[0] = floor (s2[0] * pow (2, m)) / pow (2, m);
+ break;
+ case _MM_FROUND_CEIL:
+ r[0] = ceil (s2[0] * pow (2, m)) / pow (2, m);
+ break;
+ default:
+ abort ();
+ break;
+ }
+
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ int imm = _MM_FROUND_FLOOR | (7 << 4);
+ union128d s1, s2, res1, res2, res3;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ s1.x = _mm_set_pd (4.05084, -1.23162);
+ s2.x = _mm_set_pd (-3.53222, 7.33527);
+
+ res1.x = _mm_roundscale_sd (s1.x, s2.x, imm);
+ res2.x = _mm_mask_roundscale_sd (res2.x, mask, s1.x, s2.x, imm);
+ res3.x = _mm_maskz_roundscale_sd (mask, s1.x, s2.x, imm);
+
+ compute_rndscalesd (s1.a, s2.a, res_ref, imm);
+
+ if (check_union128d (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+ if (check_union128d (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+ if (check_union128d (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-1.c
new file mode 100644
index 00000000000..a797b18bf84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_roundscale_ss (x1, x2, 0x42);
+ x1 = _mm_mask_roundscale_ss (x1, m, x1, x2, 0x42);
+ x1 = _mm_maskz_roundscale_ss (m, x1, x2, 0x42);
+
+ x1 = _mm_roundscale_round_ss (x1, x2, 0x42, _MM_FROUND_NO_EXC);
+ x1 = _mm_mask_roundscale_round_ss (x1, m, x1, x2, 0x42, _MM_FROUND_NO_EXC);
+ x1 = _mm_maskz_roundscale_round_ss (m, x1, x2, 0x42, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-2.c
new file mode 100644
index 00000000000..154e70e647c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrndscaless-2.c
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_rndscaless (float *s1, float *s2, float *r, int imm)
+{
+ int rc, m;
+ rc = imm & 0xf;
+ m = imm >> 4;
+
+ switch (rc)
+ {
+ case _MM_FROUND_FLOOR:
+ r[0] = floorf (s2[0] * pow (2, m)) / pow (2, m);
+ break;
+ case _MM_FROUND_CEIL:
+ r[0] = ceilf (s2[0] * pow (2, m)) / pow (2, m);
+ break;
+ default:
+ abort ();
+ break;
+ }
+
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ int imm = _MM_FROUND_FLOOR | (7 << 4);
+ union128 s1, s2, res1, res2, res3;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ s1.x = _mm_set_ps (4.05084, -1.23162, 2.00231, -6.22103);
+ s2.x = _mm_set_ps (-4.19319, -3.53222, 7.33527, 5.57655);
+
+ res1.x = _mm_roundscale_ss (s1.x, s2.x, imm);
+ res2.x = _mm_mask_roundscale_ss (res2.x, mask, s1.x, s2.x, imm);
+ res3.x = _mm_maskz_roundscale_ss (mask, s1.x, s2.x, imm);
+
+ compute_rndscaless (s1.a, s2.a, res_ref, imm);
+
+ if (check_union128 (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, 1);
+ if (check_union128 (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, 1);
+ if (check_union128 (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14pd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14pd-1.c
new file mode 100644
index 00000000000..e8818a6b630
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14pd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrsqrt14pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_rsqrt14_pd (x);
+ x = _mm512_mask_rsqrt14_pd (x, m, x);
+ x = _mm512_maskz_rsqrt14_pd (m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14pd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14pd-2.c
new file mode 100644
index 00000000000..304025f2efc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14pd-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (double *s, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = 1.0 / sqrt(s[i]);
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_rsqrt14_pd) (s.x);
+ res2.x = INTRINSIC (_mask_rsqrt14_pd) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_rsqrt14_pd) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res1, res_ref, 0.0001))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res2, res_ref, 0.0001))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res3, res_ref, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ps-1.c
new file mode 100644
index 00000000000..b766d85418b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ps-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrsqrt14ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_rsqrt14_ps (x);
+ x = _mm512_mask_rsqrt14_ps (x, m, x);
+ x = _mm512_maskz_rsqrt14_ps (m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ps-2.c
new file mode 100644
index 00000000000..f794c8bfbd4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ps-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (float *s, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = 1.0 / sqrt(s[i]);
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) s, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_rsqrt14_ps) (s.x);
+ res2.x = INTRINSIC (_mask_rsqrt14_ps) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_rsqrt14_ps) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_ROUGH_CHECK (AVX512F_LEN,) (res1, res_ref, 0.0001))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN,) (res2, res_ref, 0.0001))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_ROUGH_CHECK (AVX512F_LEN,) (res3, res_ref, 0.0001))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c
new file mode 100644
index 00000000000..a41280cd60f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_rsqrt14_sd (x1, x2);
+ x1 = _mm_mask_rsqrt14_sd (x1, m, x1, x2);
+ x1 = _mm_maskz_rsqrt14_sd (m, x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c
new file mode 100644
index 00000000000..a4bfe0da4f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14sd-2.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vrsqrt14sd (double *s1, double *s2, double *r)
+{
+ r[0] = 1.0 / sqrt (s2[0]);
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ union128d s1, s2, res1, res2, res3;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[2];
+
+ s1.x = _mm_set_pd (-3.0, 111.111);
+ s2.x = _mm_set_pd (222.222, 4.0);
+ res2.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_rsqrt14_sd (s1.x, s2.x);
+ res2.x = _mm_mask_rsqrt14_sd (res2.x, mask, s1.x, s2.x);
+ res3.x = _mm_maskz_rsqrt14_sd (mask, s1.x, s2.x);
+
+ compute_vrsqrt14sd (s1.a, s2.a, res_ref);
+
+ if (check_fp_union128d (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+ if (check_fp_union128d (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+ if (check_fp_union128d (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c
new file mode 100644
index 00000000000..2827efe8b97
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_rsqrt14_ss (x1, x2);
+ x1 = _mm_mask_rsqrt14_ss (x1, m, x1, x2);
+ x1 = _mm_maskz_rsqrt14_ss (m, x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c
new file mode 100644
index 00000000000..8147749162d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrsqrt14ss-2.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vrsqrt14ss (float *s1, float *s2, float *r)
+{
+ r[0] = 1.0 / sqrt (s2[0]);
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ union128 s1, s2, res1, res2, res3;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref[4];
+
+ s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46);
+ s2.x = _mm_set_ps (222.222, 333.333, 444.444, 4.0);
+ res2.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_rsqrt14_ss (s1.x, s2.x);
+ res2.x = _mm_mask_rsqrt14_ss (res2.x, mask, s1.x, s2.x);
+ res3.x = _mm_maskz_rsqrt14_ss (mask, s1.x, s2.x);
+
+ compute_vrsqrt14ss (s1.a, s2.a, res_ref);
+
+ if (check_fp_union128 (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, 1);
+ if (check_fp_union128 (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, 1);
+ if (check_fp_union128 (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-1.c
new file mode 100644
index 00000000000..6076e2c1b6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vscalefpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vscalefpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscalefpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vscalefpd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefpd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefpd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_scalef_pd (x, x);
+ x = _mm512_mask_scalef_pd (x, m, x, x);
+ x = _mm512_maskz_scalef_pd (m, x, x);
+ x = _mm512_scalef_round_pd (x, x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_scalef_round_pd (x, m, x, x, _MM_FROUND_TO_NEG_INF);
+ x = _mm512_maskz_scalef_round_pd (m, x, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-2.c
new file mode 100644
index 00000000000..a6fdc4baf6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include <math.h>
+
+CALC (double *r, double *s1, double *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = ldexp (s1[i], floor (s2[i]));
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_scalef_pd) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_scalef_pd) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_scalef_pd) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefps-1.c
new file mode 100644
index 00000000000..37385f56802
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vscalefps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vscalefps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscalefps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vscalefps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_scalef_ps (x, x);
+ x = _mm512_mask_scalef_ps (x, m, x, x);
+ x = _mm512_maskz_scalef_ps (m, x, x);
+ x = _mm512_scalef_round_ps (x, x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_scalef_round_ps (x, m, x, x, _MM_FROUND_TO_POS_INF);
+ x = _mm512_maskz_scalef_round_ps (m, x, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefps-2.c
new file mode 100644
index 00000000000..92e6f606f34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefps-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include <math.h>
+
+CALC (float *r, float *s1, float *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = ldexp (s1[i], floor (s2[i]));
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, ) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_scalef_ps) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_scalef_ps) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_scalef_ps) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c
new file mode 100644
index 00000000000..26a254199ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_scalef_sd (x, x);
+ x = _mm_mask_scalef_sd (x, m, x, x);
+ x = _mm_maskz_scalef_sd (m, x, x);
+ x = _mm_scalef_round_sd (x, x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm_mask_scalef_round_sd (x, m, x, x, _MM_FROUND_TO_NEG_INF);
+ x = _mm_maskz_scalef_round_sd (m, x, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c
new file mode 100644
index 00000000000..83f623e0b21
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_scalefsd (double *s1, double *s2, double *r)
+{
+ r[0] = s1[0] * pow (2, floor (s2[0]));
+ r[1] = s1[1];
+}
+
+void static
+avx512f_test (void)
+{
+ union128d res1, res2, res3, s1, s2;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 11.5 * (i + 1);
+ s2.a[i] = 10.5 * (i + 1);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = _mm_scalef_sd (s1.x, s2.x);
+ res2.x = _mm_mask_scalef_sd (res2.x, mask, s1.x, s2.x);
+ res3.x = _mm_maskz_scalef_sd (mask, s1.x, s2.x);
+
+ compute_scalefsd (s1.a, s2.a, res_ref);
+
+ if (check_union128d (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+ if (check_union128d (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+ if (check_union128d (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c
new file mode 100644
index 00000000000..9da2cd3dcff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm_scalef_ss (x, x);
+ x = _mm_mask_scalef_ss (x, m, x, x);
+ x = _mm_maskz_scalef_ss (m, x, x);
+ x = _mm_scalef_round_ss (x, x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm_mask_scalef_round_ss (x, m, x, x, _MM_FROUND_TO_NEG_INF);
+ x = _mm_maskz_scalef_round_ss (m, x, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c
new file mode 100644
index 00000000000..e49e24967a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_scalefss (float *s1, float *s2, float *r)
+{
+ r[0] = s1[0] * (float) pow (2, floor (s2[0]));
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ union128 res1, res2, res3, s1, s2;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 11.5 * (i + 1);
+ s2.a[i] = 10.5 * (i + 1);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = _mm_scalef_ss (s1.x, s2.x);
+ res2.x = _mm_mask_scalef_ss (res2.x, mask, s1.x, s2.x);
+ res3.x = _mm_maskz_scalef_ss (mask, s1.x, s2.x);
+
+ compute_scalefss (s1.a, s2.a, res_ref);
+
+ if (check_union128 (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, 1);
+ if (check_union128 (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, 1);
+ if (check_union128 (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshuff32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vshuff32x4-1.c
new file mode 100644
index 00000000000..712b3148297
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshuff32x4-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vshuff32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vshuff32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vshuff32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+__m512 x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_shuffle_f32x4 (x, x, 56);
+ x = _mm512_mask_shuffle_f32x4 (x, 4, x, x, 56);
+ x = _mm512_maskz_shuffle_f32x4 (6, x, x, 56);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshuff32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vshuff32x4-2.c
new file mode 100644
index 00000000000..da76733e3f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshuff32x4-2.c
@@ -0,0 +1,66 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void
+CALC (float *e, UNION_TYPE (AVX512F_LEN,) s1, UNION_TYPE (AVX512F_LEN,) s2,
+ int imm)
+{
+ int i, offset, selector;
+ float *source;
+ for (i = 0; i < SIZE / 4; i++)
+ {
+
+#if AVX512F_LEN == 512
+ selector = (imm >> i * 2) & 0x3;
+#else
+ selector = (imm >> i) & 0x1;
+#endif
+
+ offset = i * 4;
+ source = i * 4 * 32 < AVX512F_LEN / 2 ? s1.a : s2.a;
+ memcpy (e + offset, source + selector * 4, 16);
+ }
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) u1, u2, u3, s1, s2;
+ MASK_TYPE mask = MASK_VALUE;
+ float e[SIZE];
+ int i;
+ int imm = 203;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 1.2 / (i + 0.378);
+ s1.a[i] = 91.02 / (i + 4.3578);
+ u1.a[i] = DEFAULT_VALUE;
+ u2.a[i] = DEFAULT_VALUE;
+ u3.a[i] = DEFAULT_VALUE;
+ }
+
+ u1.x = INTRINSIC (_shuffle_f32x4) (s1.x, s2.x, imm);
+ u2.x = INTRINSIC (_mask_shuffle_f32x4) (u2.x, mask, s1.x, s2.x, imm);
+ u3.x = INTRINSIC (_maskz_shuffle_f32x4) (mask, s1.x, s2.x, imm);
+
+ CALC (e, s1, s2, imm);
+
+ if (UNION_CHECK (AVX512F_LEN,) (u1, e))
+ abort ();
+
+ MASK_MERGE ()(e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (u2, e))
+ abort ();
+
+ MASK_ZERO ()(e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (u3, e))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshuff64x2-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vshuff64x2-1.c
new file mode 100644
index 00000000000..c5ac373cc8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshuff64x2-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vshuff64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vshuff64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vshuff64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+__m512d x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_shuffle_f64x2 (x, x, 56);
+ x = _mm512_maskz_shuffle_f64x2 (3, x, x, 56);
+ x = _mm512_mask_shuffle_f64x2 (x, 3, x, x, 56);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshuff64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vshuff64x2-2.c
new file mode 100644
index 00000000000..94dd4f21a9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshuff64x2-2.c
@@ -0,0 +1,66 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void
+CALC (double *e, UNION_TYPE (AVX512F_LEN, d) s1,
+ UNION_TYPE (AVX512F_LEN, d) s2, int imm)
+{
+ int i, offset, selector;
+ double *source;
+ for (i = 0; i < SIZE / 2; i++)
+ {
+
+#if AVX512F_LEN == 512
+ selector = (imm >> i * 2) & 0x3;
+#else
+ selector = (imm >> i) & 0x1;
+#endif
+
+ offset = i * 2;
+ source = i * 2 * 64 < AVX512F_LEN / 2 ? s1.a : s2.a;
+ memcpy (e + offset, source + selector * 2, 16);
+ }
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) u1, u2, u3, s1, s2;
+ MASK_TYPE mask = MASK_VALUE;
+ double e[SIZE];
+ int i;
+ int imm = 203;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 1.2 / (i + 0.378);
+ s1.a[i] = 91.02 / (i + 4.3578);
+ u1.a[i] = DEFAULT_VALUE;
+ u2.a[i] = DEFAULT_VALUE;
+ u3.a[i] = DEFAULT_VALUE;
+ }
+
+ u1.x = INTRINSIC (_shuffle_f64x2) (s1.x, s2.x, imm);
+ u2.x = INTRINSIC (_mask_shuffle_f64x2) (u2.x, mask, s1.x, s2.x, imm);
+ u3.x = INTRINSIC (_maskz_shuffle_f64x2) (mask, s1.x, s2.x, imm);
+
+ CALC (e, s1, s2, imm);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (u1, e))
+ abort ();
+
+ MASK_MERGE (d) (e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (u2, e))
+ abort ();
+
+ MASK_ZERO (d) (e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (u3, e))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshufi32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vshufi32x4-1.c
new file mode 100644
index 00000000000..8e48fdf7ddc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshufi32x4-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vshufi32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vshufi32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vshufi32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+__m512i x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_shuffle_i32x4 (x, x, 56);
+ x = _mm512_mask_shuffle_i32x4 (x, 8, x, x, 56);
+ x = _mm512_maskz_shuffle_i32x4 (8, x, x, 56);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshufi32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vshufi32x4-2.c
new file mode 100644
index 00000000000..3081800dca8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshufi32x4-2.c
@@ -0,0 +1,66 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void
+CALC (int *e, UNION_TYPE (AVX512F_LEN, i_d) s1,
+ UNION_TYPE (AVX512F_LEN, i_d) s2, int imm)
+{
+ int i, offset, selector;
+ int *source;
+ for (i = 0; i < SIZE / 4; i++)
+ {
+
+#if AVX512F_LEN == 512
+ selector = (imm >> i * 2) & 0x3;
+#else
+ selector = (imm >> i) & 0x1;
+#endif
+
+ offset = i * 4;
+ source = i * 4 * 32 < AVX512F_LEN / 2 ? s1.a : s2.a;
+ memcpy (e + offset, source + selector * 4, 16);
+ }
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) u1, u2, u3, s1, s2;
+ MASK_TYPE mask = MASK_VALUE;
+ int e[SIZE];
+ int i;
+ int imm = 203;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 1.2 / (i + 0.378);
+ s1.a[i] = 91.02 / (i + 4.3578);
+ u1.a[i] = DEFAULT_VALUE;
+ u2.a[i] = DEFAULT_VALUE;
+ u3.a[i] = DEFAULT_VALUE;
+ }
+
+ u1.x = INTRINSIC (_shuffle_i32x4) (s1.x, s2.x, imm);
+ u2.x = INTRINSIC (_mask_shuffle_i32x4) (u2.x, mask, s1.x, s2.x, imm);
+ u3.x = INTRINSIC (_maskz_shuffle_i32x4) (mask, s1.x, s2.x, imm);
+
+ CALC (e, s1, s2, imm);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (u1, e))
+ abort ();
+
+ MASK_MERGE (i_d) (e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (u2, e))
+ abort ();
+
+ MASK_ZERO (i_d) (e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (u3, e))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshufi64x2-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vshufi64x2-1.c
new file mode 100644
index 00000000000..5bb5c8f63f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshufi64x2-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vshufi64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vshufi64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vshufi64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+__m512i x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_shuffle_i64x2 (x, x, 56);
+ x = _mm512_mask_shuffle_i64x2 (x, 3, x, x, 56);
+ x = _mm512_maskz_shuffle_i64x2 (2, x, x, 56);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshufi64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vshufi64x2-2.c
new file mode 100644
index 00000000000..fa09c882549
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshufi64x2-2.c
@@ -0,0 +1,66 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void
+CALC (long long *e, UNION_TYPE (AVX512F_LEN, i_q) s1,
+ UNION_TYPE (AVX512F_LEN, i_q) s2, int imm)
+{
+ int i, offset, selector;
+ long long *source;
+ for (i = 0; i < SIZE / 2; i++)
+ {
+
+#if AVX512F_LEN == 512
+ selector = (imm >> i * 2) & 0x3;
+#else
+ selector = (imm >> i) & 0x1;
+#endif
+
+ offset = i * 2;
+ source = i * 2 * 64 < AVX512F_LEN / 2 ? s1.a : s2.a;
+ memcpy (e + offset, source + selector * 2, 16);
+ }
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) u1, u2, u3, s1, s2;
+ MASK_TYPE mask = MASK_VALUE;
+ long long e[SIZE];
+ int i;
+ int imm = 203;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 1.2 / (i + 0.378);
+ s1.a[i] = 91.02 / (i + 4.3578);
+ u1.a[i] = DEFAULT_VALUE;
+ u2.a[i] = DEFAULT_VALUE;
+ u3.a[i] = DEFAULT_VALUE;
+ }
+
+ u1.x = INTRINSIC (_shuffle_i64x2) (s1.x, s2.x, imm);
+ u2.x = INTRINSIC (_mask_shuffle_i64x2) (u2.x, mask, s1.x, s2.x, imm);
+ u3.x = INTRINSIC (_maskz_shuffle_i64x2) (mask, s1.x, s2.x, imm);
+
+ CALC (e, s1, s2, imm);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (u1, e))
+ abort ();
+
+ MASK_MERGE (i_q) (e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (u2, e))
+ abort ();
+
+ MASK_ZERO (i_q) (e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (u3, e))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshufpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vshufpd-1.c
new file mode 100644
index 00000000000..420a6cfd7be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshufpd-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vshufpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vshufpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vshufpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+__m512d x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_shuffle_pd (x, x, 56);
+ x = _mm512_mask_shuffle_pd (x, 2, x, x, 56);
+ x = _mm512_maskz_shuffle_pd (2, x, x, 56);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshufpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vshufpd-2.c
new file mode 100644
index 00000000000..107db541f29
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshufpd-2.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+void static
+CALC (double *e, UNION_TYPE (AVX512F_LEN, d) s1,
+ UNION_TYPE (AVX512F_LEN, d) s2, int imm)
+{
+ e[0] = (imm & (1 << 0)) ? s1.a[1] : s1.a[0];
+ e[1] = (imm & (1 << 1)) ? s2.a[1] : s2.a[0];
+#if AVX512F_LEN > 128
+ e[2] = (imm & (1 << 2)) ? s1.a[3] : s1.a[2];
+ e[3] = (imm & (1 << 3)) ? s2.a[3] : s2.a[2];
+#if AVX512F_LEN > 256
+ e[4] = (imm & (1 << 4)) ? s1.a[5] : s1.a[4];
+ e[5] = (imm & (1 << 5)) ? s2.a[5] : s2.a[4];
+ e[6] = (imm & (1 << 6)) ? s1.a[7] : s1.a[6];
+ e[7] = (imm & (1 << 7)) ? s2.a[7] : s2.a[6];
+#endif
+#endif
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) u1, u2, u3, s1, s2;
+ double e[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 2134.3343 * i + 54846.4641;
+ s2.a[i] = 856.43576 * i + 1124.209;
+ u1.a[i] = DEFAULT_VALUE;
+ u2.a[i] = DEFAULT_VALUE;
+ u3.a[i] = DEFAULT_VALUE;
+ }
+
+ u1.x = INTRINSIC (_shuffle_pd) (s1.x, s2.x, 120);
+ u2.x = INTRINSIC (_mask_shuffle_pd) (u2.x, mask, s1.x, s2.x, 120);
+ u3.x = INTRINSIC (_maskz_shuffle_pd) (mask, s1.x, s2.x, 120);
+ CALC (e, s1, s2, 120);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (u1, e))
+ abort ();
+
+ MASK_MERGE (d) (e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (u2, e))
+ abort ();
+
+ MASK_ZERO (d) (e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (u3, e))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshufps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vshufps-1.c
new file mode 100644
index 00000000000..e3dbf0751f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshufps-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vshufps\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vshufps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vshufps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+__m512 x;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_shuffle_ps (x, x, 56);
+ x = _mm512_mask_shuffle_ps (x, 2, x, x, 56);
+ x = _mm512_maskz_shuffle_ps (2, x, x, 56);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshufps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vshufps-2.c
new file mode 100644
index 00000000000..ef4cc5f5ef4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshufps-2.c
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+void
+CALC (float *e, UNION_TYPE (AVX512F_LEN,) s1, UNION_TYPE (AVX512F_LEN,) s2,
+ int imm)
+{
+ e[0] = s1.a[(imm >> 0) & 0x3];
+ e[1] = s1.a[(imm >> 2) & 0x3];
+ e[2] = s2.a[(imm >> 4) & 0x3];
+ e[3] = s2.a[(imm >> 6) & 0x3];
+#if AVX512F_LEN > 128
+ e[4] = s1.a[4 + ((imm >> 0) & 0x3)];
+ e[5] = s1.a[4 + ((imm >> 2) & 0x3)];
+ e[6] = s2.a[4 + ((imm >> 4) & 0x3)];
+ e[7] = s2.a[4 + ((imm >> 6) & 0x3)];
+#if AVX512F_LEN > 256
+ e[8] = s1.a[8 + ((imm >> 0) & 0x3)];
+ e[9] = s1.a[8 + ((imm >> 2) & 0x3)];
+ e[10] = s2.a[8 + ((imm >> 4) & 0x3)];
+ e[11] = s2.a[8 + ((imm >> 6) & 0x3)];
+ e[12] = s1.a[12 + ((imm >> 0) & 0x3)];
+ e[13] = s1.a[12 + ((imm >> 2) & 0x3)];
+ e[14] = s2.a[12 + ((imm >> 4) & 0x3)];
+ e[15] = s2.a[12 + ((imm >> 6) & 0x3)];
+#endif
+#endif
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) u1, u2, u3, s1, s2;
+ float e[SIZE];
+ int i, sign;
+ MASK_TYPE mask = MASK_VALUE;
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 1.5 + 34.67 * i * sign;
+ s2.a[i] = -22.17 * i * sign;
+ u1.a[i] = DEFAULT_VALUE;
+ u2.a[i] = DEFAULT_VALUE;
+ u3.a[i] = DEFAULT_VALUE;
+ sign = sign * -1;
+ }
+
+
+ u1.x = INTRINSIC (_shuffle_ps) (s1.x, s2.x, 203);
+ u2.x = INTRINSIC (_mask_shuffle_ps) (u2.x, mask, s1.x, s2.x, 203);
+ u3.x = INTRINSIC (_maskz_shuffle_ps) (mask, s1.x, s2.x, 203);
+
+ CALC (e, s1, s2, 203);
+
+ if (UNION_CHECK (AVX512F_LEN,) (u1, e))
+ abort ();
+
+ MASK_MERGE ()(e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (u2, e))
+ abort ();
+
+ MASK_ZERO ()(e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (u3, e))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtpd-1.c
new file mode 100644
index 00000000000..8b5a3d4cbcf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtpd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vsqrtpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vsqrtpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vsqrtpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vsqrtpd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsqrtpd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsqrtpd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_sqrt_pd (x);
+ x = _mm512_mask_sqrt_pd (x, m, x);
+ x = _mm512_maskz_sqrt_pd (m, x);
+ x = _mm512_sqrt_round_pd (x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_sqrt_round_pd (x, m, x, _MM_FROUND_TO_NEG_INF);
+ x = _mm512_maskz_sqrt_round_pd (m, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtpd-2.c
new file mode 100644
index 00000000000..e3832588dde
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtpd-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (double *s, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = sqrt(s[i]);
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_sqrt_pd) (s.x);
+ res2.x = INTRINSIC (_mask_sqrt_pd) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_sqrt_pd) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_FP_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_FP_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_FP_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtps-1.c
new file mode 100644
index 00000000000..f4fdf5590f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vsqrtps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vsqrtps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vsqrtps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vsqrtps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsqrtps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsqrtps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_sqrt_ps (x);
+ x = _mm512_mask_sqrt_ps (x, m, x);
+ x = _mm512_maskz_sqrt_ps (m, x);
+ x = _mm512_sqrt_round_ps (x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_sqrt_round_ps (x, m, x, _MM_FROUND_TO_POS_INF);
+ x = _mm512_maskz_sqrt_round_ps (m, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtps-2.c
new file mode 100644
index 00000000000..aa085568724
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtps-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (float *s, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = sqrt(s[i]);
+ }
+}
+
+static void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) s, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_sqrt_ps) (s.x);
+ res2.x = INTRINSIC (_mask_sqrt_ps) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_sqrt_ps) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_FP_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_FP_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_FP_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-1.c
new file mode 100644
index 00000000000..ced8d126290
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vsqrtsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vsqrtsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vsqrtsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsqrtsd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsqrtsd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_sqrt_sd (x1, m, x1, x2);
+ x1 = _mm_maskz_sqrt_sd (m, x1, x2);
+ x1 = _mm_sqrt_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm_mask_sqrt_round_sd (x1, m, x1, x2, _MM_FROUND_TO_NEG_INF);
+ x1 = _mm_maskz_sqrt_round_sd (m, x1, x2, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-2.c
new file mode 100644
index 00000000000..29bf55d6405
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtsd-2.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vsqrtsd (double *s1, double *s2, double *r)
+{
+ r[0] = sqrt (s2[0]);
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ union128d s1, s2, res1, res2;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[2];
+
+ s1.x = _mm_set_pd (-3.0, 111.111);
+ s2.x = _mm_set_pd (222.222, 4.0);
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_sqrt_sd (res1.x, mask, s1.x, s2.x);
+ res2.x = _mm_maskz_sqrt_sd (mask, s1.x, s2.x);
+
+ compute_vsqrtsd (s1.a, s2.a, res_ref);
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+ if (check_fp_union128d (res1, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+ if (check_fp_union128d (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-1.c
new file mode 100644
index 00000000000..555f15db8ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vsqrtss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vsqrtss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vsqrtss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsqrtss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsqrtss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_sqrt_ss (x1, m, x1, x2);
+ x1 = _mm_maskz_sqrt_ss (m, x1, x2);
+ x1 = _mm_sqrt_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm_mask_sqrt_round_ss (x1, m, x1, x2, _MM_FROUND_TO_NEG_INF);
+ x1 = _mm_maskz_sqrt_round_ss (m, x1, x2, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-2.c
new file mode 100644
index 00000000000..16d613ebe82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsqrtss-2.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include <math.h>
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vsqrtss (float *s1, float *s2, float *r)
+{
+ r[0] = sqrt (s2[0]);
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ union128 s1, s2, res1, res2;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref[4];
+
+ s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46);
+ s2.x = _mm_set_ps (222.222, 333.333, 444.444, 4.0);
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_sqrt_ss (res1.x, mask, s1.x, s2.x);
+ res2.x = _mm_maskz_sqrt_ss (mask, s1.x, s2.x);
+
+ compute_vsqrtss (s1.a, s2.a, res_ref);
+
+ MASK_MERGE () (res_ref, mask, 1);
+ if (check_fp_union128 (res1, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, 1);
+ if (check_fp_union128 (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubpd-1.c
new file mode 100644
index 00000000000..47a78c34047
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubpd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vsubpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vsubpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vsubpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vsubpd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsubpd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsubpd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_sub_pd (x, x);
+ x = _mm512_mask_sub_pd (x, m, x, x);
+ x = _mm512_maskz_sub_pd (m, x, x);
+ x = _mm512_sub_round_pd (x, x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_sub_round_pd (x, m, x, x, _MM_FROUND_TO_NEG_INF);
+ x = _mm512_maskz_sub_round_pd (m, x, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubpd-2.c
new file mode 100644
index 00000000000..708afb9a529
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubpd-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (double *r, double *s1, double *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] - s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_sub_pd) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_sub_pd) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_sub_pd) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubps-1.c
new file mode 100644
index 00000000000..6d2db1e67d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-final { scan-assembler-times "vsubps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vsubps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vsubps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vsubps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsubps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsubps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __mmask16 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_sub_ps (x, x);
+ x = _mm512_mask_sub_ps (x, m, x, x);
+ x = _mm512_maskz_sub_ps (m, x, x);
+ x = _mm512_sub_round_ps (x, x, _MM_FROUND_TO_NEAREST_INT);
+ x = _mm512_mask_sub_round_ps (x, m, x, x, _MM_FROUND_TO_POS_INF);
+ x = _mm512_maskz_sub_round_ps (m, x, x, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubps-2.c
new file mode 100644
index 00000000000..462aa64abba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubps-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (float *r, float *s1, float *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] - s2[i];
+ }
+}
+
+void static
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN,) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 1.5 + 34.67 * i * sign;
+ src2.a[i] = -22.17 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_sub_ps) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_sub_ps) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_sub_ps) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-1.c
new file mode 100644
index 00000000000..3192a77dd37
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vsubsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vsubsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vsubsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsubsd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsubsd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_sub_sd (x1, m, x1, x2);
+ x1 = _mm_maskz_sub_sd (m, x1, x2);
+ x1 = _mm_sub_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm_mask_sub_round_sd (x1, m, x1, x2, _MM_FROUND_TO_NEG_INF);
+ x1 = _mm_maskz_sub_round_sd (m, x1, x2, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-2.c
new file mode 100644
index 00000000000..b65a8812af6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubsd-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vsubsd (double *s1, double *s2, double *r)
+{
+ r[0] = s1[0] - s2[0];
+ r[1] = s1[1];
+}
+
+static void
+avx512f_test (void)
+{
+ union128d s1, s2, res1, res2;
+ __mmask8 mask = MASK_VALUE;
+ double res_ref[2];
+
+ s1.x = _mm_set_pd (-3.0, 111.111);
+ s2.x = _mm_set_pd (222.222, -4.5);
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_sub_sd (res1.x, mask, s1.x, s2.x);
+ res2.x = _mm_maskz_sub_sd (mask, s1.x, s2.x);
+
+ compute_vsubsd (s1.a, s2.a, res_ref);
+
+ MASK_MERGE (d) (res_ref, mask, 1);
+ if (check_fp_union128d (res1, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 1);
+ if (check_fp_union128d (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubss-1.c
new file mode 100644
index 00000000000..8f857715421
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubss-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vsubss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vsubss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vsubss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsubss\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsubss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x1 = _mm_mask_sub_ss (x1, m, x1, x2);
+ x1 = _mm_maskz_sub_ss (m, x1, x2);
+ x1 = _mm_sub_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT);
+ x1 = _mm_mask_sub_round_ss (x1, m, x1, x2, _MM_FROUND_TO_NEG_INF);
+ x1 = _mm_maskz_sub_round_ss (m, x1, x2, _MM_FROUND_TO_ZERO);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vsubss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vsubss-2.c
new file mode 100644
index 00000000000..1d017b7fa30
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vsubss-2.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+#include "avx512f-mask-type.h"
+#include "avx512f-helper.h"
+
+static void
+compute_vsubss (float *s1, float *s2, float *r)
+{
+ r[0] = s1[0] - s2[0];
+ r[1] = s1[1];
+ r[2] = s1[2];
+ r[3] = s1[3];
+}
+
+static void
+avx512f_test (void)
+{
+ union128 s1, s2, res1, res2;
+ __mmask8 mask = MASK_VALUE;
+ float res_ref[4];
+
+ s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46);
+ s2.x = _mm_set_ps (222.22, -333.33, 444.44, -4.56);
+ res1.a[0] = DEFAULT_VALUE;
+
+ res1.x = _mm_mask_sub_ss (res1.x, mask, s1.x, s2.x);
+ res2.x = _mm_maskz_sub_ss (mask, s1.x, s2.x);
+
+ compute_vsubss (s1.a, s2.a, res_ref);
+
+ MASK_MERGE () (res_ref, mask, 1);
+ if (check_fp_union128 (res1, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, 1);
+ if (check_fp_union128 (res2, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vucomisd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vucomisd-1.c
new file mode 100644
index 00000000000..da0df762002
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vucomisd-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "vucomisd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm" } } */
+
+#include <immintrin.h>
+
+volatile __m128d x;
+volatile int res;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm_comi_round_sd (x, x, _CMP_NLE_UQ, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vucomiss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vucomiss-1.c
new file mode 100644
index 00000000000..d4355de0c30
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vucomiss-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler "vucomiss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm" } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile int res;
+
+void extern
+avx512f_test (void)
+{
+ res = _mm_comi_round_ss (x, x, _CMP_EQ_OQ, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vunpckhpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vunpckhpd-1.c
new file mode 100644
index 00000000000..2ce55e4469f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vunpckhpd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vunpckhpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vunpckhpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vunpckhpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x, y, z;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_unpackhi_pd (y, z);
+ x = _mm512_mask_unpackhi_pd (x, m, y, z);
+ x = _mm512_maskz_unpackhi_pd (m, y, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vunpckhpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vunpckhpd-2.c
new file mode 100644
index 00000000000..85871256237
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vunpckhpd-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void static
+CALC (double *s1, double *s2, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE / 2; i++)
+ {
+ r[2 * i] = s1[2 * i + 1];
+ r[2 * i + 1] = s2[2 * i + 1];
+ }
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * 123.2 + 32.6;
+ s2.a[i] = i + 2.5;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_unpackhi_pd) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_unpackhi_pd) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_unpackhi_pd) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vunpckhps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vunpckhps-1.c
new file mode 100644
index 00000000000..9567272c90a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vunpckhps-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vunpckhps\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vunpckhps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vunpckhps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x, y, z;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_unpackhi_ps (y, z);
+ x = _mm512_mask_unpackhi_ps (x, m, y, z);
+ x = _mm512_maskz_unpackhi_ps (m, y, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vunpckhps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vunpckhps-2.c
new file mode 100644
index 00000000000..9eab399b618
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vunpckhps-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+void static
+CALC (float *s1, float *s2, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE / 4; i++)
+ {
+ r[4 * i] = s1[4 * i + 2];
+ r[4 * i + 1] = s2[4 * i + 2];
+ r[4 * i + 2] = s1[4 * i + 3];
+ r[4 * i + 3] = s2[4 * i + 3];
+ }
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * 123.2 + 32.6;
+ s2.a[i] = i + 2.5;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_unpackhi_ps) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_unpackhi_ps) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_unpackhi_ps) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vunpcklpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vunpcklpd-1.c
new file mode 100644
index 00000000000..5a73037846d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vunpcklpd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vunpcklpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vunpcklpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vunpcklpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x, y, z;
+volatile __mmask8 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_unpacklo_pd (y, z);
+ x = _mm512_mask_unpacklo_pd (x, m, y, z);
+ x = _mm512_maskz_unpacklo_pd (m, y, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vunpcklpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vunpcklpd-2.c
new file mode 100644
index 00000000000..c4989287b38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vunpcklpd-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void static
+CALC (double *s1, double *s2, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE / 2; i++)
+ {
+ r[2 * i] = s1[2 * i];
+ r[2 * i + 1] = s2[2 * i];
+ }
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * 123.2 + 32.6;
+ s2.a[i] = i + 2.5;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_unpacklo_pd) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_unpacklo_pd) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_unpacklo_pd) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vunpcklps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vunpcklps-1.c
new file mode 100644
index 00000000000..a007a050b05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vunpcklps-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vunpcklps\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vunpcklps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vunpcklps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x, y, z;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm512_unpacklo_ps (y, z);
+ x = _mm512_mask_unpacklo_ps (x, 2, y, z);
+ x = _mm512_maskz_unpacklo_ps (2, y, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vunpcklps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vunpcklps-2.c
new file mode 100644
index 00000000000..c476254f2b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vunpcklps-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -DAVX512F" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+void static
+CALC (float *e, float *s1, float *s2)
+{
+ int i;
+ for (i = 0; i < SIZE / 4; i++)
+ {
+ e[4 * i] = s1[4 * i];
+ e[4 * i + 1] = s2[4 * i];
+ e[4 * i + 2] = s1[4 * i + 1];
+ e[4 * i + 3] = s2[4 * i + 1];
+ }
+}
+
+void static
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) s1, s2, u1, u2, u3;
+ MASK_TYPE mask = MASK_VALUE;
+ float e[SIZE];
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * 123.2 + 32.6;
+ s2.a[i] = i + 2.5;
+ u1.a[i]= DEFAULT_VALUE;
+ u2.a[i]= DEFAULT_VALUE;
+ u3.a[i]= DEFAULT_VALUE;
+ }
+
+ u1.x = INTRINSIC (_unpacklo_ps) (s1.x, s2.x);
+ u2.x = INTRINSIC (_mask_unpacklo_ps) (u2.x, mask, s1.x, s2.x);
+ u3.x = INTRINSIC (_maskz_unpacklo_ps) (mask, s1.x, s2.x);
+
+ CALC (e, s1.a, s2.a);
+
+ if (UNION_CHECK (AVX512F_LEN,) (u1, e))
+ abort ();
+
+ MASK_MERGE () (e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (u2, e))
+ abort ();
+
+ MASK_ZERO () (e, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (u3, e))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f_cond_move.c b/gcc/testsuite/gcc.target/i386/avx512f_cond_move.c
new file mode 100644
index 00000000000..c06ee263174
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f_cond_move.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512f" } */
+/* { dg-final { scan-assembler "(vpblendmd|vmovdqa32)" } } */
+
+unsigned int x[128];
+unsigned int y[128];
+
+void
+foo ()
+{
+ int i;
+ for (i = 0; i < 128; i++)
+ x[i] = y[i] > 3 ? 2 : 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f_evex_reg_asm-1.c b/gcc/testsuite/gcc.target/i386/avx512f_evex_reg_asm-1.c
new file mode 100644
index 00000000000..34a43537841
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f_evex_reg_asm-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mavx512f -ffixed-xmm0 -ffixed-xmm1 -ffixed-xmm2 -ffixed-xmm3 -ffixed-xmm4 -ffixed-xmm5 -ffixed-xmm6 -ffixed-xmm7 -ffixed-xmm8 -ffixed-xmm9 -ffixed-xmm10 -ffixed-xmm11 -ffixed-xmm12 -ffixed-xmm13 -ffixed-xmm14 -ffixed-xmm15" } */
+
+volatile float a,b,c,d;
+
+void foo()
+{
+ __asm__ __volatile__( "vcmpss $1,%1, %2,%3;" : "=x"(c) : "x"(a),"x"(b),"x"(d) );/* { dg-error "inconsistent operand constraints" } */
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f_evex_reg_asm-2.c b/gcc/testsuite/gcc.target/i386/avx512f_evex_reg_asm-2.c
new file mode 100644
index 00000000000..a0a268559a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f_evex_reg_asm-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mavx512f -ffixed-xmm0 -ffixed-xmm1 -ffixed-xmm2 -ffixed-xmm3 -ffixed-xmm4 -ffixed-xmm5 -ffixed-xmm6 -ffixed-xmm7 -ffixed-xmm8 -ffixed-xmm9 -ffixed-xmm10 -ffixed-xmm11 -ffixed-xmm12 -ffixed-xmm13 -ffixed-xmm14 -ffixed-xmm15" } */
+/* { dg-final { scan-assembler "vaddss\[ \\t\]+\[^\n\]*%xmm(1\[6-9\]|2\[0-9\]|3\[0-1\])\[^\{\]" } } */
+
+volatile float a, b, c, d;
+
+void foo()
+{
+ __asm__ __volatile__( "vaddss %1, %2, %3;" : "=v"(c) : "v"(a),"v"(b),"v"(d) );
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c
new file mode 100644
index 00000000000..a688beceb90
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vgatherpf0dps\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask16 m16;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_mask_prefetch_i32gather_ps (idx, m16, base, 8, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c
new file mode 100644
index 00000000000..9501adf74e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vgatherpf0qps\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask8 m8;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_mask_prefetch_i64gather_ps (idx, m8, base, 8, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c
new file mode 100644
index 00000000000..6557afd1466
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vgatherpf1dps\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask16 m16;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_mask_prefetch_i32gather_ps (idx, m16, base, 8, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c
new file mode 100644
index 00000000000..b0bdfa77b0b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vgatherpf1qps\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask8 m8;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_mask_prefetch_i64gather_ps (idx, m8, base, 8, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dps-1.c
new file mode 100644
index 00000000000..7ad7544a928
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dps-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vscatterpf0dps\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterpf0dps\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask16 m16;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_prefetch_i32scatter_ps (base, idx, 8, 0);
+ _mm512_mask_prefetch_i32scatter_ps (base, m16, idx, 8, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qps-1.c
new file mode 100644
index 00000000000..5d143c5f65e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qps-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vscatterpf0qps\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterpf0qps\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask8 m8;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_prefetch_i64scatter_ps (base, idx, 8, 0);
+ _mm512_mask_prefetch_i64scatter_ps (base, m8, idx, 8, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dps-1.c
new file mode 100644
index 00000000000..b97c38db5d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dps-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vscatterpf1dps\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterpf1dps\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask16 m16;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_prefetch_i32scatter_ps (base, idx, 8, 1);
+ _mm512_mask_prefetch_i32scatter_ps (base, m16, idx, 8, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qps-1.c
new file mode 100644
index 00000000000..6d6be11e451
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qps-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vscatterpf1qps\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterpf1qps\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask8 m8;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_prefetch_i64scatter_ps (base, idx, 8, 1);
+ _mm512_mask_prefetch_i64scatter_ps (base, m8, idx, 8, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp
index 15f744cf2de..43f28d036cf 100644
--- a/gcc/testsuite/gcc.target/i386/i386.exp
+++ b/gcc/testsuite/gcc.target/i386/i386.exp
@@ -265,6 +265,60 @@ proc check_effective_target_rtm { } {
} "-mrtm" ]
}
+# Return 1 if avx512f instructions can be compiled.
+proc check_effective_target_avx512f { } {
+ return [check_no_compiler_messages avx512f object {
+ typedef long long __v8di __attribute__ ((__vector_size__ (64)));
+ __v8di
+ mm512_and_epi64 (__v8di __X, __v8di __Y)
+ {
+ __v8di __W;
+ return __builtin_ia32_pandq512_mask (__X, __Y, __W, -1);
+ }
+ } "-mavx512f" ]
+}
+
+# Return 1 if avx512cd instructions can be compiled.
+proc check_effective_target_avx512cd { } {
+ return [check_no_compiler_messages avx512cd_trans object {
+ typedef long long __v8di __attribute__ ((__vector_size__ (64)));
+ __v8di
+ _mm512_conflict_epi64 (__v8di __W, __v8di __A)
+ {
+ return (__v8di) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
+ (__v8di) __W,
+ -1);
+ }
+ } "-Wno-psabi -mavx512cd" ]
+}
+
+# Return 1 if avx512er instructions can be compiled.
+proc check_effective_target_avx512er { } {
+ return [check_no_compiler_messages avx512er_trans object {
+ typedef float __v16sf __attribute__ ((__vector_size__ (64)));
+ __v16sf
+ mm512_exp2a23_ps (__v16sf __X)
+ {
+ __v16sf __W;
+ return __builtin_ia32_exp2ps_mask (__X, __W, -1, 4);
+ }
+ } "-Wno-psabi -mavx512er" ]
+}
+
+# Return 1 if sha instructions can be compiled.
+proc check_effective_target_sha { } {
+ return [check_no_compiler_messages sha object {
+ typedef long long __m128i __attribute__ ((__vector_size__ (16)));
+ typedef int __v4si __attribute__ ((__vector_size__ (16)));
+
+ __m128i _mm_sha1msg1_epu32 (__m128i __X, __m128i __Y)
+ {
+ return (__m128i) __builtin_ia32_sha1msg1 ((__v4si)__X,
+ (__v4si)__Y);
+ }
+ } "-O2 -msha" ]
+}
+
# If the linker used understands -M <mapfile>, pass it to clear hardware
# capabilities set by the Sun assembler.
# Try mapfile syntax v2 first which is the only way to clear hwcap_2 flags.
diff --git a/gcc/testsuite/gcc.target/i386/m128-check.h b/gcc/testsuite/gcc.target/i386/m128-check.h
index 4e2deecb172..6336717280f 100644
--- a/gcc/testsuite/gcc.target/i386/m128-check.h
+++ b/gcc/testsuite/gcc.target/i386/m128-check.h
@@ -164,3 +164,26 @@ union ieee754_double
} bits __attribute__((packed));
};
#endif
+
+#define CHECK_FP_EXP(UINON_TYPE, VALUE_TYPE, ESP, FMT) \
+static int \
+__attribute__((noinline, unused)) \
+check_fp_##UINON_TYPE (UINON_TYPE u, const VALUE_TYPE *v) \
+{ \
+ int i; \
+ int err = 0; \
+ \
+ for (i = 0; i < ARRAY_SIZE (u.a); i++) \
+ if (u.a[i] > (v[i] + (ESP)) || u.a[i] < (v[i] - (ESP))) \
+ { \
+ err++; \
+ PRINTF ("%i: " FMT " != " FMT "\n", \
+ i, v[i], u.a[i]); \
+ } \
+ return err; \
+}
+
+CHECK_FP_EXP (union128, float, ESP_FLOAT, "%f")
+#ifdef __SSE2__
+CHECK_FP_EXP (union128d, double, ESP_DOUBLE, "%f")
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/m512-check.h b/gcc/testsuite/gcc.target/i386/m512-check.h
new file mode 100644
index 00000000000..3209039d6d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/m512-check.h
@@ -0,0 +1,73 @@
+#include <immintrin.h>
+#include "m256-check.h"
+
+typedef union
+{
+ __m512i x;
+ char a[64];
+} union512i_b;
+
+typedef union
+{
+ __m512i x;
+ short a[32];
+} union512i_w;
+
+typedef union
+{
+ __m512i x;
+ int a[16];
+} union512i_d;
+
+typedef union
+{
+ __m512i x;
+ long long a[8];
+} union512i_q;
+
+typedef union
+{
+ __m512 x;
+ float a[16];
+} union512;
+
+typedef union
+{
+ __m512d x;
+ double a[8];
+} union512d;
+
+CHECK_EXP (union512i_b, char, "%d")
+CHECK_EXP (union512i_w, short, "%d")
+CHECK_EXP (union512i_d, int, "0x%x")
+CHECK_EXP (union512i_q, long long, "0x%llx")
+CHECK_EXP (union512, float, "%f")
+CHECK_EXP (union512d, double, "%f")
+
+CHECK_FP_EXP (union512, float, ESP_FLOAT, "%f")
+CHECK_FP_EXP (union512d, double, ESP_DOUBLE, "%f")
+
+#define CHECK_ROUGH_EXP(UINON_TYPE, VALUE_TYPE, FMT) \
+static int \
+__attribute__((noinline, unused)) \
+check_rough_##UINON_TYPE (UINON_TYPE u, const VALUE_TYPE *v, \
+ VALUE_TYPE eps) \
+{ \
+ int i; \
+ int err = 0; \
+ \
+ for (i = 0; i < ARRAY_SIZE (u.a); i++) \
+ { \
+ VALUE_TYPE rel_err = (u.a[i] - v[i]) / v[i]; \
+ if (((rel_err < 0) ? -rel_err : rel_err) > eps) \
+ { \
+ err++; \
+ PRINTF ("%i: " FMT " != " FMT "\n", \
+ i, v[i], u.a[i]); \
+ } \
+ } \
+ return err; \
+}
+
+CHECK_ROUGH_EXP (union512, float, "%f")
+CHECK_ROUGH_EXP (union512d, double, "%f")
diff --git a/gcc/testsuite/gcc.target/i386/sha-check.h b/gcc/testsuite/gcc.target/i386/sha-check.h
new file mode 100644
index 00000000000..e0a18076e15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha-check.h
@@ -0,0 +1,37 @@
+#include <stdlib.h>
+#include "cpuid.h"
+
+static void sha_test (void);
+
+static void
+__attribute__ ((noinline))
+do_test (void)
+{
+ sha_test ();
+}
+
+int
+main ()
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid_max (0, NULL) >= 7)
+ {
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+
+ /* Run SHA test only if host has SHA support. */
+ if (ebx & bit_SHA)
+ {
+ do_test ();
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+ return 0;
+ }
+ }
+
+#ifdef DEBUG
+ printf ("SKIPPED\n");
+#endif
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1msg1-1.c b/gcc/testsuite/gcc.target/i386/sha1msg1-1.c
new file mode 100644
index 00000000000..808f3617f8e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1msg1-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-final { scan-assembler "sha1msg1\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+
+void extern
+sha_test (void)
+{
+ x = _mm_sha1msg1_epu32 (x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1msg1-2.c b/gcc/testsuite/gcc.target/i386/sha1msg1-2.c
new file mode 100644
index 00000000000..35a60571f86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1msg1-2.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-require-effective-target sha } */
+
+#include "sha-check.h"
+#include "m128-check.h"
+#include <immintrin.h>
+
+static void
+compute_sha1msg1 (int *s1, int *s2, int *r)
+{
+ int w0, w1, w2, w3, w4, w5;
+
+ w0 = s1[3];
+ w1 = s1[2];
+ w2 = s1[1];
+ w3 = s1[0];
+ w4 = s2[3];
+ w5 = s2[2];
+
+ r[0] = w5 ^ w3;
+ r[1] = w4 ^ w2;
+ r[2] = w3 ^ w1;
+ r[3] = w2 ^ w0;
+}
+
+static void
+sha_test (void)
+{
+ union128i_d s1, s2, res;
+ int res_ref[4];
+
+ s1.x = _mm_set_epi32 (111, 222, 333, 444);
+ s2.x = _mm_set_epi32 (555, 666, 0, 0);
+
+ res.x = _mm_sha1msg1_epu32 (s1.x, s2.x);
+
+ compute_sha1msg1 (s1.a, s2.a, res_ref);
+
+ if (check_union128i_d (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1msg2-1.c b/gcc/testsuite/gcc.target/i386/sha1msg2-1.c
new file mode 100644
index 00000000000..9c0ffc13f6d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1msg2-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-final { scan-assembler "sha1msg2\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+
+void extern
+sha_test (void)
+{
+ x = _mm_sha1msg2_epu32 (x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1msg2-2.c b/gcc/testsuite/gcc.target/i386/sha1msg2-2.c
new file mode 100644
index 00000000000..21eaf8dd9fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1msg2-2.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-require-effective-target sha } */
+
+#include "sha-check.h"
+#include "m128-check.h"
+#include <x86intrin.h>
+#include <immintrin.h>
+
+static void
+compute_sha1msg2 (int *s1, int *s2, int *r)
+{
+ int w13, w14, w15, w16, w17, w18, w19;
+
+ w13 = s2[2];
+ w14 = s2[1];
+ w15 = s2[0];
+ w16 = __rold (s1[3] ^ w13, 1);
+ w17 = __rold (s1[2] ^ w14, 1);
+ w18 = __rold (s1[1] ^ w15, 1);
+ w19 = __rold (s1[0] ^ w16, 1);
+
+ r[0] = w19;
+ r[1] = w18;
+ r[2] = w17;
+ r[3] = w16;
+}
+
+static void
+sha_test (void)
+{
+ union128i_d s1, s2, res;
+ int res_ref[4];
+
+ s1.x = _mm_set_epi32 (111, 222, 333, 444);
+ s2.x = _mm_set_epi32 (555, 666, 777, 0);
+
+ res.x = _mm_sha1msg2_epu32 (s1.x, s2.x);
+
+ compute_sha1msg2 (s1.a, s2.a, res_ref);
+
+ if (check_union128i_d (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1nexte-1.c b/gcc/testsuite/gcc.target/i386/sha1nexte-1.c
new file mode 100644
index 00000000000..40edc780ffe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1nexte-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-final { scan-assembler "sha1nexte\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+
+void extern
+sha_test (void)
+{
+ x = _mm_sha1nexte_epu32 (x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1nexte-2.c b/gcc/testsuite/gcc.target/i386/sha1nexte-2.c
new file mode 100644
index 00000000000..f0dc6cbc6a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1nexte-2.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-require-effective-target sha } */
+
+#include "sha-check.h"
+#include "m128-check.h"
+#include <x86intrin.h>
+#include <immintrin.h>
+
+static void
+compute_sha1nexte (int *s1, int *s2, int *r)
+{
+ int tmp = __rold (s1[3], 30);
+
+ r[0] = s2[0];
+ r[1] = s2[1];
+ r[2] = s2[2];
+ r[3] = s2[3] + tmp;
+}
+
+static void
+sha_test (void)
+{
+ union128i_d s1, s2, res;
+ int res_ref[4];
+
+ s1.x = _mm_set_epi32 (111, 0, 0, 0);
+ s2.x = _mm_set_epi32 (222, 333, 444, 555);
+
+ res.x = _mm_sha1nexte_epu32 (s1.x, s2.x);
+
+ compute_sha1nexte (s1.a, s2.a, res_ref);
+
+ if (check_union128i_d (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1rnds4-1.c b/gcc/testsuite/gcc.target/i386/sha1rnds4-1.c
new file mode 100644
index 00000000000..c9da57df000
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1rnds4-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-final { scan-assembler "sha1rnds4\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+
+void extern
+sha_test (void)
+{
+ x = _mm_sha1rnds4_epu32 (x, x, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha1rnds4-2.c b/gcc/testsuite/gcc.target/i386/sha1rnds4-2.c
new file mode 100644
index 00000000000..91210b1f0a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha1rnds4-2.c
@@ -0,0 +1,93 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-require-effective-target sha } */
+
+#include "sha-check.h"
+#include "m128-check.h"
+#include <x86intrin.h>
+#include <immintrin.h>
+
+static int
+f0 (int b, int c, int d)
+{
+ return (b & c) ^ (~b & d);
+}
+
+static int
+f1 (int b, int c, int d)
+{
+ return b ^ c ^ d;
+}
+
+static int
+f2 (int b, int c, int d)
+{
+ return (b & c) ^ (b & d) ^ (c & d);
+}
+
+int (*f_arr[4])(int, int, int) = { f0, f1, f2, f1 };
+const int k_arr[4] = { 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6 };
+
+
+static void
+compute_sha1rnds4 (int *src1, int *src2, int imm, int *res)
+{
+ int k = k_arr[imm];
+ int (*f)(int, int, int) = f_arr[imm];
+
+ int w[4] = { src2[3], src2[2], src2[1], src2[0] };
+ int a[5], b[5], c[5], d[5], e[5];
+
+ a[0] = src1[3];
+ b[0] = src1[2];
+ c[0] = src1[1];
+ d[0] = src1[0];
+ e[0] = 0;
+
+ int i;
+ for (i = 0; i <= 3; i++)
+ {
+ a[i+1] = f(b[i], c[i], d[i]) + __rold (a[i], 5) + w[i] + e[i] + k;
+ b[i+1] = a[i];
+ c[i+1] = __rold (b[i], 30);
+ d[i+1] = c[i];
+ e[i+1] = d[i];
+ }
+
+ res[0] = d[4];
+ res[1] = c[4];
+ res[2] = b[4];
+ res[3] = a[4];
+}
+
+
+static void
+sha_test (void)
+{
+ int imm;
+ union128i_d s1, s2, res;
+ int res_ref[4];
+
+ s1.x = _mm_set_epi32 (111, 222, 333, 444);
+ s2.x = _mm_set_epi32 (555, 666, 777, 888);
+
+ res.x = _mm_sha1rnds4_epu32 (s1.x, s2.x, 0);
+ compute_sha1rnds4 (s1.a, s2.a, 0, res_ref);
+ if (check_union128i_d (res, res_ref))
+ abort ();
+
+ res.x = _mm_sha1rnds4_epu32 (s1.x, s2.x, 1);
+ compute_sha1rnds4 (s1.a, s2.a, 1, res_ref);
+ if (check_union128i_d (res, res_ref))
+ abort ();
+
+ res.x = _mm_sha1rnds4_epu32 (s1.x, s2.x, 2);
+ compute_sha1rnds4 (s1.a, s2.a, 2, res_ref);
+ if (check_union128i_d (res, res_ref))
+ abort ();
+
+ res.x = _mm_sha1rnds4_epu32 (s1.x, s2.x, 3);
+ compute_sha1rnds4 (s1.a, s2.a, 3, res_ref);
+ if (check_union128i_d (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha256msg1-1.c b/gcc/testsuite/gcc.target/i386/sha256msg1-1.c
new file mode 100644
index 00000000000..020874e4a4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha256msg1-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-final { scan-assembler "sha256msg1\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+
+void extern
+sha_test (void)
+{
+ x = _mm_sha256msg1_epu32 (x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha256msg1-2.c b/gcc/testsuite/gcc.target/i386/sha256msg1-2.c
new file mode 100644
index 00000000000..2b70920b029
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha256msg1-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-require-effective-target sha } */
+
+#include "sha-check.h"
+#include "m128-check.h"
+#include <x86intrin.h>
+#include <immintrin.h>
+
+static int
+s0 (int w)
+{
+ return __rord (w, 7) ^ __rord (w, 18) ^ (w >> 3);
+}
+
+static void
+compute_sha256msg1 (int *src1, int *src2, int *res)
+{
+ int w0, w1, w2, w3, w4;
+
+ w0 = src1[0];
+ w1 = src1[1];
+ w2 = src1[2];
+ w3 = src1[3];
+ w4 = src2[0];
+
+ res[0] = w0 + s0 (w1);
+ res[1] = w1 + s0 (w2);
+ res[2] = w2 + s0 (w3);
+ res[3] = w3 + s0 (w4);
+}
+
+static void
+sha_test (void)
+{
+ union128i_d s1, s2, res;
+ int res_ref[4];
+
+ s1.x = _mm_set_epi32 (111, 222, 333, 444);
+ s2.x = _mm_set_epi32 (0, 0, 0, 555);
+
+ res.x = _mm_sha256msg1_epu32 (s1.x, s2.x);
+
+ compute_sha256msg1 (s1.a, s2.a, res_ref);
+
+ if (check_union128i_d (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha256msg2-1.c b/gcc/testsuite/gcc.target/i386/sha256msg2-1.c
new file mode 100644
index 00000000000..88a9a03e4e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha256msg2-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-final { scan-assembler "sha256msg2\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+
+void extern
+sha_test (void)
+{
+ x = _mm_sha256msg2_epu32 (x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha256msg2-2.c b/gcc/testsuite/gcc.target/i386/sha256msg2-2.c
new file mode 100644
index 00000000000..ffb0c2582bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha256msg2-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-require-effective-target sha } */
+
+#include "sha-check.h"
+#include "m128-check.h"
+#include <x86intrin.h>
+#include <immintrin.h>
+
+static int
+s1 (int w)
+{
+ return __rord (w, 17) ^ __rord (w, 19) ^ (w >> 10);
+}
+
+static void
+compute_sha256msg2 (int *src1, int *src2, int *res)
+{
+ int w14, w15, w16, w17, w18, w19;
+
+ w14 = src2[2];
+ w15 = src2[3];
+ w16 = src1[0] + s1 (w14);
+ w17 = src1[1] + s1 (w15);
+ w18 = src1[2] + s1 (w16);
+ w19 = src1[3] + s1 (w17);
+
+ res[0] = w16;
+ res[1] = w17;
+ res[2] = w18;
+ res[3] = w19;
+}
+
+static void
+sha_test (void)
+{
+ union128i_d s1, s2, res;
+ int res_ref[4];
+
+ s1.x = _mm_set_epi32 (111, 222, 333, 444);
+ s2.x = _mm_set_epi32 (555, 666, 0, 0);
+
+ res.x = _mm_sha256msg2_epu32 (s1.x, s2.x);
+
+ compute_sha256msg2 (s1.a, s2.a, res_ref);
+
+ if (check_union128i_d (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha256rnds2-1.c b/gcc/testsuite/gcc.target/i386/sha256rnds2-1.c
new file mode 100644
index 00000000000..8bdf6642078
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha256rnds2-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-final { scan-assembler "sha256rnds2\[ \\t\]+\[^\n\]*%xmm0\[^\n\]*%xmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+
+void extern
+sha_test (void)
+{
+ x = _mm_sha256rnds2_epu32 (x, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sha256rnds2-2.c b/gcc/testsuite/gcc.target/i386/sha256rnds2-2.c
new file mode 100644
index 00000000000..4e586749def
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sha256rnds2-2.c
@@ -0,0 +1,85 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msha" } */
+/* { dg-require-effective-target sha } */
+
+#include "sha-check.h"
+#include "m128-check.h"
+#include <x86intrin.h>
+#include <immintrin.h>
+
+static int
+ch (int e, int f, int g)
+{
+ return (e & f) ^ (~e & g);
+}
+
+static int
+maj (int a, int b, int c)
+{
+ return (a & b) ^ (a & c) ^ (b & c);
+}
+
+static int
+s0 (int a)
+{
+ return __rord (a, 2) ^ __rord (a, 13) ^ __rord (a, 22);
+}
+
+static int
+s1 (int e)
+{
+ return __rord (e, 6) ^ __rord (e, 11) ^ __rord (e, 25);
+}
+
+static void
+compute_sha256rnds2 (int *src0, int *src1, int *src2, int *res)
+{
+ int wk[2] = { src0[0], src0[1] };
+ int a[3], b[3], c[3], d[3], e[3], f[3], g[3], h[3];
+
+ a[0] = src2[3];
+ b[0] = src2[2];
+ c[0] = src1[3];
+ d[0] = src1[2];
+ e[0] = src2[1];
+ f[0] = src2[0];
+ g[0] = src1[1];
+ h[0] = src1[0];
+
+ int i;
+ for (i = 0; i <= 1; i++)
+ {
+ a[i+1] = ch (e[i], f[i], g[i]) + s1 (e[i]) + wk[i] + h[i]
+ + maj (a[i], b[i], c[i]) + s0 (a[i]);
+ b[i+1] = a[i];
+ c[i+1] = b[i];
+ d[i+1] = c[i];
+ e[i+1] = ch (e[i], f[i], g[i]) + s1 (e[i]) + wk[i] + h[i] + d[i];
+ f[i+1] = e[i];
+ g[i+1] = f[i];
+ h[i+1] = g[i];
+ }
+
+ res[0] = f[2];
+ res[1] = e[2];
+ res[2] = b[2];
+ res[3] = a[2];
+}
+
+static void
+sha_test (void)
+{
+ union128i_d s0, s1, s2, res;
+ int res_ref[4];
+
+ s0.x = _mm_set_epi32 (0, 0, 111, 222);
+ s1.x = _mm_set_epi32 (333, 444, 555, 666);
+ s2.x = _mm_set_epi32 (777, 888, 999, 123);
+
+ res.x = _mm_sha256rnds2_epu32 (s1.x, s2.x, s0.x);
+
+ compute_sha256rnds2 (s0.a, s1.a, s2.a, res_ref);
+
+ if (check_union128i_d (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
index c1c5745ef0b..aa362428879 100644
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
@@ -3,7 +3,7 @@
popcntintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f" } */
#include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 1d777d12e4f..569eacf0450 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f" } */
#include <mm_malloc.h>
@@ -55,6 +55,20 @@
#define __builtin_ia32_vcvtps2ph(A, I) __builtin_ia32_vcvtps2ph(A, 1)
#define __builtin_ia32_vcvtps2ph256(A, I) __builtin_ia32_vcvtps2ph256(A, 1)
+/* avx512pfintrin.h */
+#define __builtin_ia32_gatherpfdps(A, B, C, D, E) __builtin_ia32_gatherpfdps (A, B, C, 1, 1)
+#define __builtin_ia32_gatherpfqps(A, B, C, D, E) __builtin_ia32_gatherpfqps (A, B, C, 1, 1)
+#define __builtin_ia32_scatterpfdps(A, B, C, D, E) __builtin_ia32_scatterpfdps (A, B, C, 1, 1)
+#define __builtin_ia32_scatterpfqps(A, B, C, D, E) __builtin_ia32_scatterpfqps (A, B, C, 1, 1)
+
+/* avx512erintrin.h */
+#define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask (A, B, C, 1)
+#define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask (A, B, C, 1)
+#define __builtin_ia32_rcp28pd_mask(A, B, C, D) __builtin_ia32_rcp28pd_mask (A, B, C, 1)
+#define __builtin_ia32_rcp28ps_mask(A, B, C, D) __builtin_ia32_rcp28ps_mask (A, B, C, 1)
+#define __builtin_ia32_rsqrt28pd_mask(A, B, C, D) __builtin_ia32_rsqrt28pd_mask (A, B, C, 1)
+#define __builtin_ia32_rsqrt28ps_mask(A, B, C, D) __builtin_ia32_rsqrt28ps_mask (A, B, C, 1)
+
/* wmmintrin.h */
#define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1)
#define __builtin_ia32_pclmulqdq128(X, Y, I) __builtin_ia32_pclmulqdq128(X, Y, 1)
@@ -182,3 +196,189 @@
/* rtmintrin.h */
#define __builtin_ia32_xabort (N) __builtin_ia32_xabort (1)
+
+/* avx512fintrin.h */
+#define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_addsd_mask(A, B, C, D, E) __builtin_ia32_addsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_addss_mask(A, B, C, D, E) __builtin_ia32_addss_mask(A, B, C, D, 1)
+#define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E)
+#define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E)
+#define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D)
+#define __builtin_ia32_cmppd512_mask(A, B, F, D, E) __builtin_ia32_cmppd512_mask(A, B, 1, D, 5)
+#define __builtin_ia32_cmpps512_mask(A, B, F, D, E) __builtin_ia32_cmpps512_mask(A, B, 1, D, 5)
+#define __builtin_ia32_cmpq512_mask(A, B, E, D) __builtin_ia32_cmpq512_mask(A, B, 1, D)
+#define __builtin_ia32_cmpsd_mask(A, B, F, D, E) __builtin_ia32_cmpsd_mask(A, B, 1, D, 5)
+#define __builtin_ia32_cmpss_mask(A, B, F, D, E) __builtin_ia32_cmpss_mask(A, B, 1, D, 5)
+#define __builtin_ia32_cvtdq2ps512_mask(A, B, C, D) __builtin_ia32_cvtdq2ps512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtpd2dq512_mask(A, B, C, D) __builtin_ia32_cvtpd2dq512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtpd2ps512_mask(A, B, C, D) __builtin_ia32_cvtpd2ps512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtpd2udq512_mask(A, B, C, D) __builtin_ia32_cvtpd2udq512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtps2dq512_mask(A, B, C, D) __builtin_ia32_cvtps2dq512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtps2pd512_mask(A, B, C, D) __builtin_ia32_cvtps2pd512_mask(A, B, C, 5)
+#define __builtin_ia32_cvtps2udq512_mask(A, B, C, D) __builtin_ia32_cvtps2udq512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtsd2ss_mask(A, B, C, D, E) __builtin_ia32_cvtsd2ss_mask(A, B, C, D, 1)
+#define __builtin_ia32_cvtsi2sd64(A, B, C) __builtin_ia32_cvtsi2sd64(A, B, 1)
+#define __builtin_ia32_cvtsi2ss32(A, B, C) __builtin_ia32_cvtsi2ss32(A, B, 1)
+#define __builtin_ia32_cvtsi2ss64(A, B, C) __builtin_ia32_cvtsi2ss64(A, B, 1)
+#define __builtin_ia32_cvtss2sd_mask(A, B, C, D, E) __builtin_ia32_cvtss2sd_mask(A, B, C, D, 5)
+#define __builtin_ia32_cvttpd2dq512_mask(A, B, C, D) __builtin_ia32_cvttpd2dq512_mask(A, B, C, 5)
+#define __builtin_ia32_cvttpd2udq512_mask(A, B, C, D) __builtin_ia32_cvttpd2udq512_mask(A, B, C, 5)
+#define __builtin_ia32_cvttps2dq512_mask(A, B, C, D) __builtin_ia32_cvttps2dq512_mask(A, B, C, 5)
+#define __builtin_ia32_cvttps2udq512_mask(A, B, C, D) __builtin_ia32_cvttps2udq512_mask(A, B, C, 5)
+#define __builtin_ia32_cvtudq2ps512_mask(A, B, C, D) __builtin_ia32_cvtudq2ps512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtusi2sd64(A, B, C) __builtin_ia32_cvtusi2sd64(A, B, 1)
+#define __builtin_ia32_cvtusi2ss32(A, B, C) __builtin_ia32_cvtusi2ss32(A, B, 1)
+#define __builtin_ia32_cvtusi2ss64(A, B, C) __builtin_ia32_cvtusi2ss64(A, B, 1)
+#define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_divsd_mask(A, B, C, D, E) __builtin_ia32_divsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_divss_mask(A, B, C, D, E) __builtin_ia32_divss_mask(A, B, C, D, 1)
+#define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D)
+#define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D)
+#define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D)
+#define __builtin_ia32_extracti64x4_mask(A, E, C, D) __builtin_ia32_extracti64x4_mask(A, 1, C, D)
+#define __builtin_ia32_fixupimmpd512_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmpd512_mask(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmpd512_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmpd512_maskz(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmps512_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmps512_mask(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmps512_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmps512_maskz(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmsd_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmsd_mask(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmsd_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmsd_maskz(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmss_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmss_mask(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmss_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmss_maskz(A, B, C, 1, E, 5)
+#define __builtin_ia32_gatherdiv8df(A, B, C, D, F) __builtin_ia32_gatherdiv8df(A, B, C, D, 1)
+#define __builtin_ia32_gatherdiv8di(A, B, C, D, F) __builtin_ia32_gatherdiv8di(A, B, C, D, 1)
+#define __builtin_ia32_gatherdiv16sf(A, B, C, D, F) __builtin_ia32_gatherdiv16sf(A, B, C, D, 1)
+#define __builtin_ia32_gatherdiv16si(A, B, C, D, F) __builtin_ia32_gatherdiv16si(A, B, C, D, 1)
+#define __builtin_ia32_gathersiv16sf(A, B, C, D, F) __builtin_ia32_gathersiv16sf(A, B, C, D, 1)
+#define __builtin_ia32_gathersiv16si(A, B, C, D, F) __builtin_ia32_gathersiv16si(A, B, C, D, 1)
+#define __builtin_ia32_gathersiv8df(A, B, C, D, F) __builtin_ia32_gathersiv8df(A, B, C, D, 1)
+#define __builtin_ia32_gathersiv8di(A, B, C, D, F) __builtin_ia32_gathersiv8di(A, B, C, D, 1)
+#define __builtin_ia32_getexppd512_mask(A, B, C, D) __builtin_ia32_getexppd512_mask(A, B, C, 5)
+#define __builtin_ia32_getexpps512_mask(A, B, C, D) __builtin_ia32_getexpps512_mask(A, B, C, 5)
+#define __builtin_ia32_getexpsd128_mask(A, B, C, D, E) __builtin_ia32_getexpsd128_mask(A, B, C, D, 5)
+#define __builtin_ia32_getexpss128_mask(A, B, C, D, E) __builtin_ia32_getexpss128_mask(A, B, C, D, 5)
+#define __builtin_ia32_getmantpd512_mask(A, F, C, D, E) __builtin_ia32_getmantpd512_mask(A, 1, C, D, 5)
+#define __builtin_ia32_getmantps512_mask(A, F, C, D, E) __builtin_ia32_getmantps512_mask(A, 1, C, D, 5)
+#define __builtin_ia32_getmantsd_mask(A, B, I, D, E, F) __builtin_ia32_getmantsd_mask(A, B, 1, D, E, 5)
+#define __builtin_ia32_getmantss_mask(A, B, I, D, E, F) __builtin_ia32_getmantss_mask(A, B, 1, D, E, 5)
+#define __builtin_ia32_insertf32x4_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_insertf64x4_mask(A, B, F, D, E) __builtin_ia32_insertf64x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti32x4_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti64x4_mask(A, B, F, D, E) __builtin_ia32_inserti64x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 5)
+#define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 5)
+#define __builtin_ia32_maxsd_mask(A, B, C, D, E) __builtin_ia32_maxsd_mask(A, B, C, D, 5)
+#define __builtin_ia32_maxss_mask(A, B, C, D, E) __builtin_ia32_maxss_mask(A, B, C, D, 5)
+#define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 5)
+#define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 5)
+#define __builtin_ia32_minsd_mask(A, B, C, D, E) __builtin_ia32_minsd_mask(A, B, C, D, 5)
+#define __builtin_ia32_minss_mask(A, B, C, D, E) __builtin_ia32_minss_mask(A, B, C, D, 5)
+#define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_mulsd_mask(A, B, C, D, E) __builtin_ia32_mulsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_mulss_mask(A, B, C, D, E) __builtin_ia32_mulss_mask(A, B, C, D, 1)
+#define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D)
+#define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D)
+#define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D)
+#define __builtin_ia32_prolq512_mask(A, E, C, D) __builtin_ia32_prolq512_mask(A, 1, C, D)
+#define __builtin_ia32_prord512_mask(A, E, C, D) __builtin_ia32_prord512_mask(A, 1, C, D)
+#define __builtin_ia32_prorq512_mask(A, E, C, D) __builtin_ia32_prorq512_mask(A, 1, C, D)
+#define __builtin_ia32_pshufd512_mask(A, E, C, D) __builtin_ia32_pshufd512_mask(A, 1, C, D)
+#define __builtin_ia32_pslldi512_mask(A, E, C, D) __builtin_ia32_pslldi512_mask(A, 1, C, D)
+#define __builtin_ia32_psllqi512_mask(A, E, C, D) __builtin_ia32_psllqi512_mask(A, 1, C, D)
+#define __builtin_ia32_psradi512_mask(A, E, C, D) __builtin_ia32_psradi512_mask(A, 1, C, D)
+#define __builtin_ia32_psraqi512_mask(A, E, C, D) __builtin_ia32_psraqi512_mask(A, 1, C, D)
+#define __builtin_ia32_psrldi512_mask(A, E, C, D) __builtin_ia32_psrldi512_mask(A, 1, C, D)
+#define __builtin_ia32_psrlqi512_mask(A, E, C, D) __builtin_ia32_psrlqi512_mask(A, 1, C, D)
+#define __builtin_ia32_pternlogd512_mask(A, B, C, F, E) __builtin_ia32_pternlogd512_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogd512_maskz(A, B, C, F, E) __builtin_ia32_pternlogd512_maskz(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq512_mask(A, B, C, F, E) __builtin_ia32_pternlogq512_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E)
+#define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 5)
+#define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 5)
+#define __builtin_ia32_rndscalesd_mask(A, B, I, D, E, F) __builtin_ia32_rndscalesd_mask(A, B, 1, D, E, 5)
+#define __builtin_ia32_rndscaless_mask(A, B, I, D, E, F) __builtin_ia32_rndscaless_mask(A, B, 1, D, E, 5)
+#define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_scalefsd_mask(A, B, C, D, E) __builtin_ia32_scalefsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_scalefss_mask(A, B, C, D, E) __builtin_ia32_scalefss_mask(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv16si(A, B, C, D, F) __builtin_ia32_scatterdiv16si(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv16sf(A, B, C, D, F) __builtin_ia32_scattersiv16sf(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv16si(A, B, C, D, F) __builtin_ia32_scattersiv16si(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv8df(A, B, C, D, F) __builtin_ia32_scattersiv8df(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv8di(A, B, C, D, F) __builtin_ia32_scattersiv8di(A, B, C, D, 1)
+#define __builtin_ia32_shuf_f32x4_mask(A, B, F, D, E) __builtin_ia32_shuf_f32x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_f64x2_mask(A, B, F, D, E) __builtin_ia32_shuf_f64x2_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_i32x4_mask(A, B, F, D, E) __builtin_ia32_shuf_i32x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_i64x2_mask(A, B, F, D, E) __builtin_ia32_shuf_i64x2_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufpd512_mask(A, B, F, D, E) __builtin_ia32_shufpd512_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E)
+#define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 1)
+#define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 1)
+#define __builtin_ia32_sqrtsd_mask(A, B, C, D, E) __builtin_ia32_sqrtsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_sqrtss_mask(A, B, C, D, E) __builtin_ia32_sqrtss_mask(A, B, C, D, 1)
+#define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_subsd_mask(A, B, C, D, E) __builtin_ia32_subsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_subss_mask(A, B, C, D, E) __builtin_ia32_subss_mask(A, B, C, D, 1)
+#define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D)
+#define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 5)
+#define __builtin_ia32_vcomiss(A, B, C, D) __builtin_ia32_vcomiss(A, B, 1, 5)
+#define __builtin_ia32_vcvtph2ps512_mask(A, B, C, D) __builtin_ia32_vcvtph2ps512_mask(A, B, C, 5)
+#define __builtin_ia32_vcvtps2ph512_mask(A, E, C, D) __builtin_ia32_vcvtps2ph512_mask(A, 1, C, D)
+#define __builtin_ia32_vcvtsd2si32(A, B) __builtin_ia32_vcvtsd2si32(A, 1)
+#define __builtin_ia32_vcvtsd2si64(A, B) __builtin_ia32_vcvtsd2si64(A, 1)
+#define __builtin_ia32_vcvtsd2usi32(A, B) __builtin_ia32_vcvtsd2usi32(A, 1)
+#define __builtin_ia32_vcvtsd2usi64(A, B) __builtin_ia32_vcvtsd2usi64(A, 1)
+#define __builtin_ia32_vcvtss2si32(A, B) __builtin_ia32_vcvtss2si32(A, 1)
+#define __builtin_ia32_vcvtss2si64(A, B) __builtin_ia32_vcvtss2si64(A, 1)
+#define __builtin_ia32_vcvtss2usi32(A, B) __builtin_ia32_vcvtss2usi32(A, 1)
+#define __builtin_ia32_vcvtss2usi64(A, B) __builtin_ia32_vcvtss2usi64(A, 1)
+#define __builtin_ia32_vcvttsd2si32(A, B) __builtin_ia32_vcvttsd2si32(A, 5)
+#define __builtin_ia32_vcvttsd2si64(A, B) __builtin_ia32_vcvttsd2si64(A, 5)
+#define __builtin_ia32_vcvttsd2usi32(A, B) __builtin_ia32_vcvttsd2usi32(A, 5)
+#define __builtin_ia32_vcvttsd2usi64(A, B) __builtin_ia32_vcvttsd2usi64(A, 5)
+#define __builtin_ia32_vcvttss2si32(A, B) __builtin_ia32_vcvttss2si32(A, 5)
+#define __builtin_ia32_vcvttss2si64(A, B) __builtin_ia32_vcvttss2si64(A, 5)
+#define __builtin_ia32_vcvttss2usi32(A, B) __builtin_ia32_vcvttss2usi32(A, 5)
+#define __builtin_ia32_vcvttss2usi64(A, B) __builtin_ia32_vcvttss2usi64(A, 5)
+#define __builtin_ia32_vfmaddpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddpd512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddpd512_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddps512_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubaddpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddpd512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubps512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfnmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfnmsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfnmsubps512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfnmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubps512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vpermilpd512_mask(A, E, C, D) __builtin_ia32_vpermilpd512_mask(A, 1, C, D)
+#define __builtin_ia32_vpermilps512_mask(A, E, C, D) __builtin_ia32_vpermilps512_mask(A, 1, C, D)
+
+/* shaintrin.h */
+#define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 331be0e1987..4d4d5e7efc5 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512pf -mavx512cd -msha" } */
#include <mm_malloc.h>
@@ -31,6 +31,10 @@
type _CONCAT(_,func) (op1_type A, int const I, int const L) \
{ return func (A, imm1, imm2); }
+#define test_1y(func, type, op1_type, imm1, imm2, imm3) \
+ type _CONCAT(_,func) (op1_type A, int const I, int const L, int const R)\
+ { return func (A, imm1, imm2, imm3); }
+
#define test_2(func, type, op1_type, op2_type, imm) \
type _CONCAT(_,func) (op1_type A, op2_type B, int const I) \
{ return func (A, B, imm); }
@@ -39,16 +43,60 @@
type _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \
{ return func (A, B, imm1, imm2); }
+#define test_2y(func, type, op1_type, op2_type, imm1, imm2, imm3) \
+ type _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L,\
+ int const R) \
+ { return func (A, B, imm1, imm2, imm3); }
+
+#define test_2vx(func, op1_type, op2_type, imm1, imm2) \
+ _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \
+ { func (A, B, imm1, imm2); }
+
#define test_3(func, type, op1_type, op2_type, op3_type, imm) \
type _CONCAT(_,func) (op1_type A, op2_type B, \
op3_type C, int const I) \
{ return func (A, B, C, imm); }
+#define test_3x(func, type, op1_type, op2_type, op3_type, imm1, imm2) \
+ type _CONCAT(_,func) (op1_type A, op2_type B, \
+ op3_type C, int const I, int const L) \
+ { return func (A, B, C, imm1, imm2); }
+
+#define test_3y(func, type, op1_type, op2_type, op3_type, imm1, imm2, imm3) \
+ type _CONCAT(_,func) (op1_type A, op2_type B, \
+ op3_type C, int const I, int const L, int const R) \
+ { return func (A, B, C, imm1, imm2, imm3); }
+
+#define test_3v(func, op1_type, op2_type, op3_type, imm) \
+ _CONCAT(_,func) (op1_type A, op2_type B, \
+ op3_type C, int const I) \
+ { func (A, B, C, imm); }
+
+#define test_3vx(func, op1_type, op2_type, op3_type, imm1, imm2) \
+ _CONCAT(_,func) (op1_type A, op2_type B, \
+ op3_type C, int const I, int const L) \
+ { func (A, B, C, imm1, imm2); }
+
#define test_4(func, type, op1_type, op2_type, op3_type, op4_type, imm) \
type _CONCAT(_,func) (op1_type A, op2_type B, \
op3_type C, op4_type D, int const I) \
{ return func (A, B, C, D, imm); }
+#define test_4x(func, type, op1_type, op2_type, op3_type, op4_type, imm1, imm2) \
+ type _CONCAT(_,func) (op1_type A, op2_type B, \
+ op3_type C, op4_type D, int const I, int const L) \
+ { return func (A, B, C, D, imm1, imm2); }
+
+#define test_4y(func, type, op1_type, op2_type, op3_type, op4_type, imm1, imm2, imm3) \
+ type _CONCAT(_,func) (op1_type A, op2_type B, op3_type C, \
+ op4_type D, int const I, int const L, int const R) \
+ { return func (A, B, C, D, imm1, imm2, imm3); }
+
+#define test_4v(func, op1_type, op2_type, op3_type, op4_type, imm) \
+ _CONCAT(_,func) (op1_type A, op2_type B, \
+ op3_type C, op4_type D, int const I) \
+ { func (A, B, C, D, imm); }
+
/* Following intrinsics require immediate arguments. They
are defined as macros for non-optimized compilations. */
@@ -100,6 +148,470 @@ test_1 (_cvtss_sh, unsigned short, float, 1)
test_1 (_mm_cvtps_ph, __m128i, __m128, 1)
test_1 (_mm256_cvtps_ph, __m128i, __m256, 1)
test_0 (_xabort, void, 1)
+test_1 (_mm512_cvt_roundepi32_ps, __m512, __m512i, 1)
+test_1 (_mm512_cvt_roundepu32_ps, __m512, __m512i, 1)
+test_1 (_mm512_cvt_roundpd_epi32, __m256i, __m512d, 1)
+test_1 (_mm512_cvt_roundpd_epu32, __m256i, __m512d, 1)
+test_1 (_mm512_cvt_roundpd_ps, __m256, __m512d, 1)
+test_1 (_mm512_cvt_roundph_ps, __m512, __m256i, 5)
+test_1 (_mm512_cvt_roundps_epi32, __m512i, __m512, 1)
+test_1 (_mm512_cvt_roundps_epu32, __m512i, __m512, 1)
+test_1 (_mm512_cvt_roundps_pd, __m512d, __m256, 5)
+test_1 (_mm512_cvtps_ph, __m256i, __m512, 1)
+test_1 (_mm512_cvtt_roundpd_epi32, __m256i, __m512d, 5)
+test_1 (_mm512_cvtt_roundpd_epu32, __m256i, __m512d, 5)
+test_1 (_mm512_cvtt_roundps_epi32, __m512i, __m512, 5)
+test_1 (_mm512_cvtt_roundps_epu32, __m512i, __m512, 5)
+test_1 (_mm512_extractf32x4_ps, __m128, __m512, 1)
+test_1 (_mm512_extractf64x4_pd, __m256d, __m512d, 1)
+test_1 (_mm512_extracti32x4_epi32, __m128i, __m512i, 1)
+test_1 (_mm512_extracti64x4_epi64, __m256i, __m512i, 1)
+test_1 (_mm512_getexp_round_pd, __m512d, __m512d, 5)
+test_1 (_mm512_getexp_round_ps, __m512, __m512, 5)
+test_1y (_mm512_getmant_round_pd, __m512d, __m512d, 1, 1, 5)
+test_1y (_mm512_getmant_round_ps, __m512, __m512, 1, 1, 5)
+test_1 (_mm512_permute_pd, __m512d, __m512d, 1)
+test_1 (_mm512_permute_ps, __m512, __m512, 1)
+test_1 (_mm512_permutex_epi64, __m512i, __m512i, 1)
+test_1 (_mm512_permutex_pd, __m512d, __m512d, 1)
+test_1 (_mm512_rol_epi32, __m512i, __m512i, 1)
+test_1 (_mm512_rol_epi64, __m512i, __m512i, 1)
+test_1 (_mm512_ror_epi32, __m512i, __m512i, 1)
+test_1 (_mm512_ror_epi64, __m512i, __m512i, 1)
+test_1 (_mm512_shuffle_epi32, __m512i, __m512i, 1)
+test_1 (_mm512_slli_epi32, __m512i, __m512i, 1)
+test_1 (_mm512_slli_epi64, __m512i, __m512i, 1)
+test_1 (_mm512_sqrt_round_pd, __m512d, __m512d, 1)
+test_1 (_mm512_sqrt_round_ps, __m512, __m512, 1)
+test_1 (_mm512_srai_epi32, __m512i, __m512i, 1)
+test_1 (_mm512_srai_epi64, __m512i, __m512i, 1)
+test_1 (_mm512_srli_epi32, __m512i, __m512i, 1)
+test_1 (_mm512_srli_epi64, __m512i, __m512i, 1)
+test_1 (_mm_cvt_roundsd_i32, int, __m128d, 1)
+test_1 (_mm_cvt_roundsd_u32, unsigned, __m128d, 1)
+test_1 (_mm_cvt_roundss_i32, int, __m128, 1)
+test_1 (_mm_cvt_roundss_u32, unsigned, __m128, 1)
+test_1 (_mm_cvtt_roundsd_i32, int, __m128d, 5)
+test_1 (_mm_cvtt_roundsd_u32, unsigned, __m128d, 5)
+test_1 (_mm_cvtt_roundss_i32, int, __m128, 5)
+test_1 (_mm_cvtt_roundss_u32, unsigned, __m128, 5)
+test_1x (_mm512_getmant_pd, __m512d, __m512d, 1, 1)
+test_1x (_mm512_getmant_ps, __m512, __m512, 1, 1)
+test_1x (_mm512_roundscale_round_pd, __m512d, __m512d, 1, 5)
+test_1x (_mm512_roundscale_round_ps, __m512, __m512, 1, 5)
+test_1x (_mm_cvt_roundi32_ss, __m128, __m128, 1, 1)
+test_2 (_mm512_add_round_pd, __m512d, __m512d, __m512d, 1)
+test_2 (_mm512_add_round_ps, __m512, __m512, __m512, 1)
+test_2 (_mm512_alignr_epi32, __m512i, __m512i, __m512i, 1)
+test_2 (_mm512_alignr_epi64, __m512i, __m512i, __m512i, 1)
+test_2 (_mm512_cmp_epi32_mask, __mmask16, __m512i, __m512i, 1)
+test_2 (_mm512_cmp_epi64_mask, __mmask8, __m512i, __m512i, 1)
+test_2 (_mm512_cmp_epu32_mask, __mmask16, __m512i, __m512i, 1)
+test_2 (_mm512_cmp_epu64_mask, __mmask8, __m512i, __m512i, 1)
+test_2 (_mm512_cmp_pd_mask, __mmask8, __m512d, __m512d, 1)
+test_2 (_mm512_cmp_ps_mask, __mmask16, __m512, __m512, 1)
+test_2 (_mm512_div_round_pd, __m512d, __m512d, __m512d, 1)
+test_2 (_mm512_div_round_ps, __m512, __m512, __m512, 1)
+test_2 (_mm512_i32gather_epi32, __m512i, __m512i, void const *, 1)
+test_2 (_mm512_i32gather_epi64, __m512i, __m256i, void const *, 1)
+test_2 (_mm512_i32gather_pd, __m512d, __m256i, void const *, 1)
+test_2 (_mm512_i32gather_ps, __m512, __m512i, void const *, 1)
+test_2 (_mm512_i64gather_epi32, __m256i, __m512i, void const *, 1)
+test_2 (_mm512_i64gather_epi64, __m512i, __m512i, void const *, 1)
+test_2 (_mm512_i64gather_pd, __m512d, __m512i, void const *, 1)
+test_2 (_mm512_i64gather_ps, __m256, __m512i, void const *, 1)
+test_2 (_mm512_insertf32x4, __m512, __m512, __m128, 1)
+test_2 (_mm512_insertf64x4, __m512d, __m512d, __m256d, 1)
+test_2 (_mm512_inserti32x4, __m512i, __m512i, __m128i, 1)
+test_2 (_mm512_inserti64x4, __m512i, __m512i, __m256i, 1)
+test_2 (_mm512_maskz_cvt_roundepi32_ps, __m512, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_cvt_roundepu32_ps, __m512, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_cvt_roundpd_epi32, __m256i, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_cvt_roundpd_epu32, __m256i, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_cvt_roundpd_ps, __m256, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_cvt_roundph_ps, __m512, __mmask16, __m256i, 5)
+test_2 (_mm512_maskz_cvt_roundps_epi32, __m512i, __mmask16, __m512, 1)
+test_2 (_mm512_maskz_cvt_roundps_epu32, __m512i, __mmask16, __m512, 1)
+test_2 (_mm512_maskz_cvt_roundps_pd, __m512d, __mmask8, __m256, 5)
+test_2 (_mm512_maskz_cvtps_ph, __m256i, __mmask16, __m512, 1)
+test_2 (_mm512_maskz_cvtt_roundpd_epi32, __m256i, __mmask8, __m512d, 5)
+test_2 (_mm512_maskz_cvtt_roundpd_epu32, __m256i, __mmask8, __m512d, 5)
+test_2 (_mm512_maskz_cvtt_roundps_epi32, __m512i, __mmask16, __m512, 5)
+test_2 (_mm512_maskz_cvtt_roundps_epu32, __m512i, __mmask16, __m512, 5)
+test_2 (_mm512_maskz_extractf32x4_ps, __m128, __mmask8, __m512, 1)
+test_2 (_mm512_maskz_extractf64x4_pd, __m256d, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_extracti32x4_epi32, __m128i, __mmask8, __m512i, 1)
+test_2 (_mm512_maskz_extracti64x4_epi64, __m256i, __mmask8, __m512i, 1)
+test_2 (_mm512_maskz_getexp_round_pd, __m512d, __mmask8, __m512d, 5)
+test_2 (_mm512_maskz_getexp_round_ps, __m512, __mmask16, __m512, 5)
+test_2y (_mm512_maskz_getmant_round_pd, __m512d, __mmask8, __m512d, 1, 1, 5)
+test_2y (_mm512_maskz_getmant_round_ps, __m512, __mmask16, __m512, 1, 1, 5)
+test_2 (_mm512_maskz_permute_pd, __m512d, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_permute_ps, __m512, __mmask16, __m512, 1)
+test_2 (_mm512_maskz_permutex_epi64, __m512i, __mmask8, __m512i, 1)
+test_2 (_mm512_maskz_permutex_pd, __m512d, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_rol_epi32, __m512i, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_rol_epi64, __m512i, __mmask8, __m512i, 1)
+test_2 (_mm512_maskz_ror_epi32, __m512i, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_ror_epi64, __m512i, __mmask8, __m512i, 1)
+test_2 (_mm512_maskz_shuffle_epi32, __m512i, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_slli_epi32, __m512i, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_slli_epi64, __m512i, __mmask8, __m512i, 1)
+test_2 (_mm512_maskz_sqrt_round_pd, __m512d, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_sqrt_round_ps, __m512, __mmask16, __m512, 1)
+test_2 (_mm512_maskz_srai_epi32, __m512i, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_srai_epi64, __m512i, __mmask8, __m512i, 1)
+test_2 (_mm512_maskz_srli_epi32, __m512i, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_srli_epi64, __m512i, __mmask8, __m512i, 1)
+test_2 (_mm512_max_round_pd, __m512d, __m512d, __m512d, 5)
+test_2 (_mm512_max_round_ps, __m512, __m512, __m512, 5)
+test_2 (_mm512_min_round_pd, __m512d, __m512d, __m512d, 5)
+test_2 (_mm512_min_round_ps, __m512, __m512, __m512, 5)
+test_2 (_mm512_mul_round_pd, __m512d, __m512d, __m512d, 1)
+test_2 (_mm512_mul_round_ps, __m512, __m512, __m512, 1)
+test_2 (_mm512_scalef_round_pd, __m512d, __m512d, __m512d, 1)
+test_2 (_mm512_scalef_round_ps, __m512, __m512, __m512, 1)
+test_2 (_mm512_shuffle_f32x4, __m512, __m512, __m512, 1)
+test_2 (_mm512_shuffle_f64x2, __m512d, __m512d, __m512d, 1)
+test_2 (_mm512_shuffle_i32x4, __m512i, __m512i, __m512i, 1)
+test_2 (_mm512_shuffle_i64x2, __m512i, __m512i, __m512i, 1)
+test_2 (_mm512_shuffle_pd, __m512d, __m512d, __m512d, 1)
+test_2 (_mm512_shuffle_ps, __m512, __m512, __m512, 1)
+test_2 (_mm512_sub_round_pd, __m512d, __m512d, __m512d, 1)
+test_2 (_mm512_sub_round_ps, __m512, __m512, __m512, 1)
+test_2 (_mm_add_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_add_round_ss, __m128, __m128, __m128, 1)
+test_2 (_mm_cmp_sd_mask, __mmask8, __m128d, __m128d, 1)
+test_2 (_mm_cmp_ss_mask, __mmask8, __m128, __m128, 1)
+#ifdef __x86_64__
+test_2 (_mm_cvt_roundi64_sd, __m128d, __m128d, long long, 1)
+test_2 (_mm_cvt_roundi64_ss, __m128, __m128, long long, 1)
+#endif
+test_2 (_mm_cvt_roundsd_ss, __m128, __m128, __m128d, 1)
+test_2 (_mm_cvt_roundss_sd, __m128d, __m128d, __m128, 5)
+test_2 (_mm_cvt_roundu32_ss, __m128, __m128, unsigned, 1)
+#ifdef __x86_64__
+test_2 (_mm_cvt_roundu64_sd, __m128d, __m128d, unsigned long long, 1)
+test_2 (_mm_cvt_roundu64_ss, __m128, __m128, unsigned long long, 1)
+#endif
+test_2 (_mm_div_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_div_round_ss, __m128, __m128, __m128, 1)
+test_2 (_mm_getexp_round_sd, __m128d, __m128d, __m128d, 5)
+test_2 (_mm_getexp_round_ss, __m128, __m128, __m128, 5)
+test_2y (_mm_getmant_round_sd, __m128d, __m128d, __m128d, 1, 1, 5)
+test_2y (_mm_getmant_round_ss, __m128, __m128, __m128, 1, 1, 5)
+test_2 (_mm_mul_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_mul_round_ss, __m128, __m128, __m128, 1)
+test_2 (_mm_scalef_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_scalef_round_ss, __m128, __m128, __m128, 1)
+test_2 (_mm_sqrt_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_sqrt_round_ss, __m128, __m128, __m128, 1)
+test_2 (_mm_sub_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_sub_round_ss, __m128, __m128, __m128, 1)
+test_2x (_mm512_cmp_round_pd_mask, __mmask8, __m512d, __m512d, 1, 5)
+test_2x (_mm512_cmp_round_ps_mask, __mmask16, __m512, __m512, 1, 5)
+test_2x (_mm512_maskz_roundscale_round_pd, __m512d, __mmask8, __m512d, 1, 5)
+test_2x (_mm512_maskz_roundscale_round_ps, __m512, __mmask16, __m512, 1, 5)
+test_2x (_mm_cmp_round_sd_mask, __mmask8, __m128d, __m128d, 1, 5)
+test_2x (_mm_cmp_round_ss_mask, __mmask8, __m128, __m128, 1, 5)
+test_2x (_mm_comi_round_sd, int, __m128d, __m128d, 1, 5)
+test_2x (_mm_comi_round_ss, int, __m128, __m128, 1, 5)
+test_2x (_mm_roundscale_round_sd, __m128d, __m128d, __m128d, 1, 5)
+test_2x (_mm_roundscale_round_ss, __m128, __m128, __m128, 1, 5)
+test_3 (_mm512_fmadd_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
+test_3 (_mm512_fmadd_round_ps, __m512, __m512, __m512, __m512, 1)
+test_3 (_mm512_fmaddsub_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
+test_3 (_mm512_fmaddsub_round_ps, __m512, __m512, __m512, __m512, 1)
+test_3 (_mm512_fmsub_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
+test_3 (_mm512_fmsub_round_ps, __m512, __m512, __m512, __m512, 1)
+test_3 (_mm512_fmsubadd_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
+test_3 (_mm512_fmsubadd_round_ps, __m512, __m512, __m512, __m512, 1)
+test_3 (_mm512_fnmadd_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
+test_3 (_mm512_fnmadd_round_ps, __m512, __m512, __m512, __m512, 1)
+test_3 (_mm512_fnmsub_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
+test_3 (_mm512_fnmsub_round_ps, __m512, __m512, __m512, __m512, 1)
+test_3 (_mm512_mask_cmp_epi32_mask, __mmask16, __mmask16, __m512i, __m512i, 1)
+test_3 (_mm512_mask_cmp_epi64_mask, __mmask8, __mmask8, __m512i, __m512i, 1)
+test_3 (_mm512_mask_cmp_epu32_mask, __mmask16, __mmask16, __m512i, __m512i, 1)
+test_3 (_mm512_mask_cmp_epu64_mask, __mmask8, __mmask8, __m512i, __m512i, 1)
+test_3 (_mm512_mask_cmp_pd_mask, __mmask8, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_mask_cmp_ps_mask, __mmask16, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_mask_cvt_roundepi32_ps, __m512, __m512, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_cvt_roundepu32_ps, __m512, __m512, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_cvt_roundpd_epi32, __m256i, __m256i, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_cvt_roundpd_epu32, __m256i, __m256i, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_cvt_roundpd_ps, __m256, __m256, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_cvt_roundph_ps, __m512, __m512, __mmask16, __m256i, 5)
+test_3 (_mm512_mask_cvt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 1)
+test_3 (_mm512_mask_cvt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 1)
+test_3 (_mm512_mask_cvt_roundps_pd, __m512d, __m512d, __mmask8, __m256, 5)
+test_3 (_mm512_mask_cvtps_ph, __m256i, __m256i, __mmask16, __m512, 1)
+test_3 (_mm512_mask_cvtt_roundpd_epi32, __m256i, __m256i, __mmask8, __m512d, 5)
+test_3 (_mm512_mask_cvtt_roundpd_epu32, __m256i, __m256i, __mmask8, __m512d, 5)
+test_3 (_mm512_mask_cvtt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 5)
+test_3 (_mm512_mask_cvtt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 5)
+test_3 (_mm512_mask_extractf32x4_ps, __m128, __m128, __mmask8, __m512, 1)
+test_3 (_mm512_mask_extractf64x4_pd, __m256d, __m256d, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_extracti32x4_epi32, __m128i, __m128i, __mmask8, __m512i, 1)
+test_3 (_mm512_mask_extracti64x4_epi64, __m256i, __m256i, __mmask8, __m512i, 1)
+test_3 (_mm512_mask_getexp_round_pd, __m512d, __m512d, __mmask8, __m512d, 5)
+test_3 (_mm512_mask_getexp_round_ps, __m512, __m512, __mmask16, __m512, 5)
+test_3y (_mm512_mask_getmant_round_pd, __m512d, __m512d, __mmask8, __m512d, 1, 1, 5)
+test_3y (_mm512_mask_getmant_round_ps, __m512, __m512, __mmask16, __m512, 1, 1, 5)
+test_3 (_mm512_mask_permute_pd, __m512d, __m512d, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_permute_ps, __m512, __m512, __mmask16, __m512, 1)
+test_3 (_mm512_mask_permutex_epi64, __m512i, __m512i, __mmask8, __m512i, 1)
+test_3 (_mm512_mask_permutex_pd, __m512d, __m512d, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_rol_epi32, __m512i, __m512i, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_rol_epi64, __m512i, __m512i, __mmask8, __m512i, 1)
+test_3 (_mm512_mask_ror_epi32, __m512i, __m512i, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_ror_epi64, __m512i, __m512i, __mmask8, __m512i, 1)
+test_3 (_mm512_mask_shuffle_epi32, __m512i, __m512i, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_slli_epi32, __m512i, __m512i, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_slli_epi64, __m512i, __m512i, __mmask8, __m512i, 1)
+test_3 (_mm512_mask_sqrt_round_pd, __m512d, __m512d, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_sqrt_round_ps, __m512, __m512, __mmask16, __m512, 1)
+test_3 (_mm512_mask_srai_epi32, __m512i, __m512i, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_srai_epi64, __m512i, __m512i, __mmask8, __m512i, 1)
+test_3 (_mm512_mask_srli_epi32, __m512i, __m512i, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_srli_epi64, __m512i, __m512i, __mmask8, __m512i, 1)
+test_3 (_mm512_maskz_add_round_pd, __m512d, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_maskz_add_round_ps, __m512, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_maskz_alignr_epi32, __m512i, __mmask16, __m512i, __m512i, 1)
+test_3 (_mm512_maskz_alignr_epi64, __m512i, __mmask8, __m512i, __m512i, 1)
+test_3 (_mm512_maskz_div_round_pd, __m512d, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_maskz_div_round_ps, __m512, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_maskz_insertf32x4, __m512, __mmask16, __m512, __m128, 1)
+test_3 (_mm512_maskz_insertf64x4, __m512d, __mmask8, __m512d, __m256d, 1)
+test_3 (_mm512_maskz_inserti32x4, __m512i, __mmask16, __m512i, __m128i, 1)
+test_3 (_mm512_maskz_inserti64x4, __m512i, __mmask8, __m512i, __m256i, 1)
+test_3 (_mm512_maskz_max_round_pd, __m512d, __mmask8, __m512d, __m512d, 5)
+test_3 (_mm512_maskz_max_round_ps, __m512, __mmask16, __m512, __m512, 5)
+test_3 (_mm512_maskz_min_round_pd, __m512d, __mmask8, __m512d, __m512d, 5)
+test_3 (_mm512_maskz_min_round_ps, __m512, __mmask16, __m512, __m512, 5)
+test_3 (_mm512_maskz_mul_round_pd, __m512d, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_maskz_mul_round_ps, __m512, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_maskz_scalef_round_pd, __m512d, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_maskz_scalef_round_ps, __m512, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_maskz_shuffle_f32x4, __m512, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_maskz_shuffle_f64x2, __m512d, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_maskz_shuffle_i32x4, __m512i, __mmask16, __m512i, __m512i, 1)
+test_3 (_mm512_maskz_shuffle_i64x2, __m512i, __mmask8, __m512i, __m512i, 1)
+test_3 (_mm512_maskz_shuffle_pd, __m512d, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_maskz_shuffle_ps, __m512, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_maskz_sub_round_pd, __m512d, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_maskz_sub_round_ps, __m512, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_ternarylogic_epi32, __m512i, __m512i, __m512i, __m512i, 1)
+test_3 (_mm512_ternarylogic_epi64, __m512i, __m512i, __m512i, __m512i, 1)
+test_3 (_mm_fmadd_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
+test_3 (_mm_fmadd_round_ss, __m128, __m128, __m128, __m128, 1)
+test_3 (_mm_fmsub_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
+test_3 (_mm_fmsub_round_ss, __m128, __m128, __m128, __m128, 1)
+test_3 (_mm_fnmadd_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
+test_3 (_mm_fnmadd_round_ss, __m128, __m128, __m128, __m128, 1)
+test_3 (_mm_fnmsub_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
+test_3 (_mm_fnmsub_round_ss, __m128, __m128, __m128, __m128, 1)
+test_3 (_mm_mask_cmp_sd_mask, __mmask8, __mmask8, __m128d, __m128d, 1)
+test_3 (_mm_mask_cmp_ss_mask, __mmask8, __mmask8, __m128, __m128, 1)
+test_3 (_mm_maskz_add_round_sd, __m128d, __mmask8, __m128d, __m128d, 1)
+test_3 (_mm_maskz_add_round_ss, __m128, __mmask8, __m128, __m128, 1)
+test_3 (_mm_maskz_cvt_roundsd_ss, __m128, __mmask8, __m128, __m128d, 1)
+test_3 (_mm_maskz_cvt_roundss_sd, __m128d, __mmask8, __m128d, __m128, 5)
+test_3 (_mm_maskz_div_round_sd, __m128d, __mmask8, __m128d, __m128d, 1)
+test_3 (_mm_maskz_div_round_ss, __m128, __mmask8, __m128, __m128, 1)
+test_3 (_mm_maskz_getexp_round_sd, __m128d, __mmask8, __m128d, __m128d, 5)
+test_3 (_mm_maskz_getexp_round_ss, __m128, __mmask8, __m128, __m128, 5)
+test_3y (_mm_maskz_getmant_round_sd, __m128d, __mmask8, __m128d, __m128d, 1, 1, 5)
+test_3y (_mm_maskz_getmant_round_ss, __m128, __mmask8, __m128, __m128, 1, 1, 5)
+test_3 (_mm_maskz_mul_round_sd, __m128d, __mmask8, __m128d, __m128d, 1)
+test_3 (_mm_maskz_mul_round_ss, __m128, __mmask8, __m128, __m128, 1)
+test_3 (_mm_maskz_scalef_round_sd, __m128d, __mmask8, __m128d, __m128d, 1)
+test_3 (_mm_maskz_scalef_round_ss, __m128, __mmask8, __m128, __m128, 1)
+test_3 (_mm_maskz_sqrt_round_sd, __m128d, __mmask8, __m128d, __m128d, 1)
+test_3 (_mm_maskz_sqrt_round_ss, __m128, __mmask8, __m128, __m128, 1)
+test_3 (_mm_maskz_sub_round_sd, __m128d, __mmask8, __m128d, __m128d, 1)
+test_3 (_mm_maskz_sub_round_ss, __m128, __mmask8, __m128, __m128, 1)
+test_3v (_mm512_i32scatter_epi32, void *, __m512i, __m512i, 1)
+test_3v (_mm512_i32scatter_epi64, void *, __m256i, __m512i, 1)
+test_3v (_mm512_i32scatter_pd, void *, __m256i, __m512d, 1)
+test_3v (_mm512_i32scatter_ps, void *, __m512i, __m512, 1)
+test_3v (_mm512_i64scatter_epi32, void *, __m512i, __m256i, 1)
+test_3v (_mm512_i64scatter_epi64, void *, __m512i, __m512i, 1)
+test_3v (_mm512_i64scatter_pd, void *, __m512i, __m512d, 1)
+test_3v (_mm512_i64scatter_ps, void *, __m512i, __m256, 1)
+test_3x (_mm512_mask_roundscale_round_pd, __m512d, __m512d, __mmask8, __m512d, 1, 5)
+test_3x (_mm512_mask_roundscale_round_ps, __m512, __m512, __mmask16, __m512, 1, 5)
+test_3x (_mm_maskz_roundscale_round_sd, __m128d, __mmask8, __m128d, __m128d, 1, 5)
+test_3x (_mm_maskz_roundscale_round_ss, __m128, __mmask8, __m128, __m128, 1, 5)
+test_3x (_mm_fixupimm_round_sd, __m128d, __m128d, __m128d, __m128i, 1, 5)
+test_3x (_mm_fixupimm_round_ss, __m128, __m128, __m128, __m128i, 1, 5)
+test_3x (_mm_mask_cmp_round_sd_mask, __mmask8, __mmask8, __m128d, __m128d, 1, 5)
+test_3x (_mm_mask_cmp_round_ss_mask, __mmask8, __mmask8, __m128, __m128, 1, 5)
+test_4 (_mm512_mask3_fmadd_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
+test_4 (_mm512_mask3_fmadd_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1)
+test_4 (_mm512_mask3_fmaddsub_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
+test_4 (_mm512_mask3_fmaddsub_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1)
+test_4 (_mm512_mask3_fmsub_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
+test_4 (_mm512_mask3_fmsub_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1)
+test_4 (_mm512_mask3_fmsubadd_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
+test_4 (_mm512_mask3_fmsubadd_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1)
+test_4 (_mm512_mask3_fnmadd_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
+test_4 (_mm512_mask3_fnmadd_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1)
+test_4 (_mm512_mask3_fnmsub_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
+test_4 (_mm512_mask3_fnmsub_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1)
+test_4 (_mm512_mask_add_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_add_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_alignr_epi32, __m512i, __m512i, __mmask16, __m512i, __m512i, 1)
+test_4 (_mm512_mask_alignr_epi64, __m512i, __m512i, __mmask8, __m512i, __m512i, 1)
+test_4 (_mm512_mask_div_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_div_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_fmadd_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_fmadd_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_fmaddsub_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_fmaddsub_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_fmsub_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_fmsub_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_fmsubadd_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_fmsubadd_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_fnmadd_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_fnmadd_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_fnmsub_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_fnmsub_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_i32gather_epi32, __m512i, __m512i, __mmask16, __m512i, void const *, 1)
+test_4 (_mm512_mask_i32gather_epi64, __m512i, __m512i, __mmask8, __m256i, void const *, 1)
+test_4 (_mm512_mask_i32gather_pd, __m512d, __m512d, __mmask8, __m256i, void const *, 1)
+test_4 (_mm512_mask_i32gather_ps, __m512, __m512, __mmask16, __m512i, void const *, 1)
+test_4 (_mm512_mask_i64gather_epi32, __m256i, __m256i, __mmask8, __m512i, void const *, 1)
+test_4 (_mm512_mask_i64gather_epi64, __m512i, __m512i, __mmask8, __m512i, void const *, 1)
+test_4 (_mm512_mask_i64gather_pd, __m512d, __m512d, __mmask8, __m512i, void const *, 1)
+test_4 (_mm512_mask_i64gather_ps, __m256, __m256, __mmask8, __m512i, void const *, 1)
+test_4 (_mm512_mask_insertf32x4, __m512, __m512, __mmask16, __m512, __m128, 1)
+test_4 (_mm512_mask_insertf64x4, __m512d, __m512d, __mmask8, __m512d, __m256d, 1)
+test_4 (_mm512_mask_inserti32x4, __m512i, __m512i, __mmask16, __m512i, __m128i, 1)
+test_4 (_mm512_mask_inserti64x4, __m512i, __m512i, __mmask8, __m512i, __m256i, 1)
+test_4 (_mm512_mask_max_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 5)
+test_4 (_mm512_mask_max_round_ps, __m512, __m512, __mmask16, __m512, __m512, 5)
+test_4 (_mm512_mask_min_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 5)
+test_4 (_mm512_mask_min_round_ps, __m512, __m512, __mmask16, __m512, __m512, 5)
+test_4 (_mm512_mask_mul_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_mul_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_scalef_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_scalef_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_shuffle_f32x4, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_shuffle_f64x2, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_shuffle_i32x4, __m512i, __m512i, __mmask16, __m512i, __m512i, 1)
+test_4 (_mm512_mask_shuffle_i64x2, __m512i, __m512i, __mmask8, __m512i, __m512i, 1)
+test_4 (_mm512_mask_shuffle_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_shuffle_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_sub_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_sub_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_ternarylogic_epi32, __m512i, __m512i, __mmask16, __m512i, __m512i, 1)
+test_4 (_mm512_mask_ternarylogic_epi64, __m512i, __m512i, __mmask8, __m512i, __m512i, 1)
+test_4 (_mm512_maskz_fmadd_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1)
+test_4 (_mm512_maskz_fmadd_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1)
+test_4 (_mm512_maskz_fmaddsub_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1)
+test_4 (_mm512_maskz_fmaddsub_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1)
+test_4 (_mm512_maskz_fmsub_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1)
+test_4 (_mm512_maskz_fmsub_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1)
+test_4 (_mm512_maskz_fmsubadd_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1)
+test_4 (_mm512_maskz_fmsubadd_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1)
+test_4 (_mm512_maskz_fnmadd_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1)
+test_4 (_mm512_maskz_fnmadd_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1)
+test_4 (_mm512_maskz_fnmsub_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1)
+test_4 (_mm512_maskz_fnmsub_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1)
+test_4 (_mm512_maskz_ternarylogic_epi32, __m512i, __mmask16, __m512i, __m512i, __m512i, 1)
+test_4 (_mm512_maskz_ternarylogic_epi64, __m512i, __mmask8, __m512i, __m512i, __m512i, 1)
+test_4 (_mm_mask3_fmadd_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 1)
+test_4 (_mm_mask3_fmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, 1)
+test_4 (_mm_mask3_fmsub_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 1)
+test_4 (_mm_mask3_fmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, 1)
+test_4 (_mm_mask3_fnmadd_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 1)
+test_4 (_mm_mask3_fnmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, 1)
+test_4 (_mm_mask3_fnmsub_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 1)
+test_4 (_mm_mask3_fnmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, 1)
+test_4 (_mm_mask_add_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_add_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_cvt_roundsd_ss, __m128, __m128, __mmask8, __m128, __m128d, 1)
+test_4 (_mm_mask_cvt_roundss_sd, __m128d, __m128d, __mmask8, __m128d, __m128, 5)
+test_4 (_mm_mask_div_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_div_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_fmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_fmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_fmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_fmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_fnmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_fnmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_fnmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_fnmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_getexp_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 5)
+test_4 (_mm_mask_getexp_round_ss, __m128, __m128, __mmask8, __m128, __m128, 5)
+test_4y (_mm_mask_getmant_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1, 1, 5)
+test_4y (_mm_mask_getmant_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1, 1, 5)
+test_4 (_mm_mask_mul_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_mul_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_scalef_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_scalef_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_sqrt_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_sqrt_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_sub_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_sub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_maskz_fmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 1)
+test_4 (_mm_maskz_fmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, 1)
+test_4 (_mm_maskz_fmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 1)
+test_4 (_mm_maskz_fmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, 1)
+test_4 (_mm_maskz_fnmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 1)
+test_4 (_mm_maskz_fnmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, 1)
+test_4 (_mm_maskz_fnmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 1)
+test_4 (_mm_maskz_fnmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, 1)
+test_4v (_mm512_mask_i32scatter_epi32, void *, __mmask16, __m512i, __m512i, 1)
+test_4v (_mm512_mask_i32scatter_epi64, void *, __mmask8, __m256i, __m512i, 1)
+test_4v (_mm512_mask_i32scatter_pd, void *, __mmask8, __m256i, __m512d, 1)
+test_4v (_mm512_mask_i32scatter_ps, void *, __mmask16, __m512i, __m512, 1)
+test_4v (_mm512_mask_i64scatter_epi32, void *, __mmask8, __m512i, __m256i, 1)
+test_4v (_mm512_mask_i64scatter_epi64, void *, __mmask8, __m512i, __m512i, 1)
+test_4v (_mm512_mask_i64scatter_pd, void *, __mmask8, __m512i, __m512d, 1)
+test_4v (_mm512_mask_i64scatter_ps, void *, __mmask8, __m512i, __m256, 1)
+test_4x (_mm_mask_getmant_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1, 1)
+test_4x (_mm_mask_getmant_ss, __m128, __m128, __mmask8, __m128, __m128, 1, 1)
+test_4x (_mm_mask_roundscale_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1, 5)
+test_4x (_mm_mask_roundscale_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1, 5)
+test_4x (_mm512_mask_fixupimm_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512i, 1, 5)
+test_4x (_mm512_mask_fixupimm_round_ps, __m512, __m512, __mmask16, __m512, __m512i, 1, 5)
+test_4x (_mm512_maskz_fixupimm_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512i, 1, 5)
+test_4x (_mm512_maskz_fixupimm_round_ps, __m512, __mmask16, __m512, __m512, __m512i, 1, 5)
+test_4x (_mm_mask_fixupimm_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128i, 1, 5)
+test_4x (_mm_mask_fixupimm_round_ss, __m128, __m128, __mmask8, __m128, __m128i, 1, 5)
+test_4x (_mm_maskz_fixupimm_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128i, 1, 5)
+test_4x (_mm_maskz_fixupimm_round_ss, __m128, __mmask8, __m128, __m128, __m128i, 1, 5)
+
+/* avx512pfintrin.h */
+test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1, 1)
+test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16, __m512i, 1, 1)
+test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *, 1, 1)
+test_3vx (_mm512_mask_prefetch_i64scatter_ps, void const *, __mmask8, __m512i, 1, 1)
+
+/* avx512erintrin.h */
+test_1 (_mm512_exp2a23_round_pd, __m512d, __m512d, 1)
+test_1 (_mm512_exp2a23_round_ps, __m512, __m512, 1)
+test_1 (_mm512_rcp28_round_pd, __m512d, __m512d, 1)
+test_1 (_mm512_rcp28_round_ps, __m512, __m512, 1)
+test_1 (_mm512_rsqrt28_round_pd, __m512d, __m512d, 1)
+test_1 (_mm512_rsqrt28_round_ps, __m512, __m512, 1)
+test_2 (_mm512_maskz_exp2a23_round_pd, __m512d, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_exp2a23_round_ps, __m512, __mmask16, __m512, 1)
+test_2 (_mm512_maskz_rcp28_round_pd, __m512d, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_rcp28_round_ps, __m512, __mmask16, __m512, 1)
+test_2 (_mm512_maskz_rsqrt28_round_pd, __m512d, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_rsqrt28_round_ps, __m512, __mmask16, __m512, 1)
+test_3 (_mm512_mask_exp2a23_round_pd, __m512d, __m512d, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_exp2a23_round_ps, __m512, __m512, __mmask16, __m512, 1)
+test_3 (_mm512_mask_rcp28_round_pd, __m512d, __m512d, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_rcp28_round_ps, __m512, __m512, __mmask16, __m512, 1)
+test_3 (_mm512_mask_rsqrt28_round_pd, __m512d, __m512d, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_rsqrt28_round_ps, __m512, __m512, __mmask16, __m512, 1)
+
+/* shaintrin.h */
+test_2 (_mm_sha1rnds4_epu32, __m128i, __m128i, __m128i, 1)
/* wmmintrin.h */
test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 8e4c4bd3ebd..eeba83dd86f 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -30,6 +30,10 @@
type _CONCAT(_,func) (op1_type A, int const I, int const L) \
{ return func (A, imm1, imm2); }
+#define test_1y(func, type, op1_type, imm1, imm2, imm3) \
+ type _CONCAT(_,func) (op1_type A, int const I, int const L, int const R)\
+ { return func (A, imm1, imm2, imm3); }
+
#define test_2(func, type, op1_type, op2_type, imm) \
type _CONCAT(_,func) (op1_type A, op2_type B, int const I) \
{ return func (A, B, imm); }
@@ -38,19 +42,64 @@
type _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \
{ return func (A, B, imm1, imm2); }
+#define test_2y(func, type, op1_type, op2_type, imm1, imm2, imm3) \
+ type _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L,\
+ int const R) \
+ { return func (A, B, imm1, imm2, imm3); }
+
+#define test_2vx(func, op1_type, op2_type, imm1, imm2) \
+ _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \
+ { func (A, B, imm1, imm2); }
+
#define test_3(func, type, op1_type, op2_type, op3_type, imm) \
type _CONCAT(_,func) (op1_type A, op2_type B, \
op3_type C, int const I) \
{ return func (A, B, C, imm); }
+#define test_3x(func, type, op1_type, op2_type, op3_type, imm1, imm2) \
+ type _CONCAT(_,func) (op1_type A, op2_type B, \
+ op3_type C, int const I, int const L) \
+ { return func (A, B, C, imm1, imm2); }
+
+#define test_3y(func, type, op1_type, op2_type, op3_type, imm1, imm2, imm3) \
+ type _CONCAT(_,func) (op1_type A, op2_type B, \
+ op3_type C, int const I, int const L, int const R) \
+ { return func (A, B, C, imm1, imm2, imm3); }
+
+#define test_3v(func, op1_type, op2_type, op3_type, imm) \
+ _CONCAT(_,func) (op1_type A, op2_type B, \
+ op3_type C, int const I) \
+ { func (A, B, C, imm); }
+
+#define test_3vx(func, op1_type, op2_type, op3_type, imm1, imm2) \
+ _CONCAT(_,func) (op1_type A, op2_type B, \
+ op3_type C, int const I, int const L) \
+ { func (A, B, C, imm1, imm2); }
+
#define test_4(func, type, op1_type, op2_type, op3_type, op4_type, imm) \
type _CONCAT(_,func) (op1_type A, op2_type B, \
op3_type C, op4_type D, int const I) \
{ return func (A, B, C, D, imm); }
+#define test_4x(func, type, op1_type, op2_type, op3_type, op4_type, imm1, imm2) \
+ type _CONCAT(_,func) (op1_type A, op2_type B, \
+ op3_type C, op4_type D, int const I, int const L) \
+ { return func (A, B, C, D, imm1, imm2); }
+
+#define test_4y(func, type, op1_type, op2_type, op3_type, op4_type, imm1, imm2, imm3) \
+ type _CONCAT(_,func) (op1_type A, op2_type B, op3_type C, \
+ op4_type D, int const I, int const L, int const R) \
+ { return func (A, B, C, D, imm1, imm2, imm3); }
+
+
+#define test_4v(func, op1_type, op2_type, op3_type, op4_type, imm) \
+ _CONCAT(_,func) (op1_type A, op2_type B, \
+ op3_type C, op4_type D, int const I) \
+ { func (A, B, C, D, imm); }
+
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512pf,avx512er,avx512cd,sha")
#endif
/* Following intrinsics require immediate arguments. They
@@ -163,9 +212,9 @@ test_4 (_mm_cmpestro, int, __m128i, int, __m128i, int, 1)
test_4 (_mm_cmpestrs, int, __m128i, int, __m128i, int, 1)
test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
-/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM) */
+/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
@@ -248,6 +297,472 @@ test_2 ( _mm256_i64gather_epi32, __m128i, int const *, __m256i, 1)
/* rtmintrin.h */
test_0 ( _xabort, void, 1)
+/* avx512fintrin.h */
+test_1 (_mm512_cvt_roundepi32_ps, __m512, __m512i, 1)
+test_1 (_mm512_cvt_roundepu32_ps, __m512, __m512i, 1)
+test_1 (_mm512_cvt_roundpd_epi32, __m256i, __m512d, 1)
+test_1 (_mm512_cvt_roundpd_epu32, __m256i, __m512d, 1)
+test_1 (_mm512_cvt_roundpd_ps, __m256, __m512d, 1)
+test_1 (_mm512_cvt_roundph_ps, __m512, __m256i, 5)
+test_1 (_mm512_cvt_roundps_epi32, __m512i, __m512, 1)
+test_1 (_mm512_cvt_roundps_epu32, __m512i, __m512, 1)
+test_1 (_mm512_cvt_roundps_pd, __m512d, __m256, 5)
+test_1 (_mm512_cvtps_ph, __m256i, __m512, 1)
+test_1 (_mm512_cvtt_roundpd_epi32, __m256i, __m512d, 5)
+test_1 (_mm512_cvtt_roundpd_epu32, __m256i, __m512d, 5)
+test_1 (_mm512_cvtt_roundps_epi32, __m512i, __m512, 5)
+test_1 (_mm512_cvtt_roundps_epu32, __m512i, __m512, 5)
+test_1 (_mm512_extractf32x4_ps, __m128, __m512, 1)
+test_1 (_mm512_extractf64x4_pd, __m256d, __m512d, 1)
+test_1 (_mm512_extracti32x4_epi32, __m128i, __m512i, 1)
+test_1 (_mm512_extracti64x4_epi64, __m256i, __m512i, 1)
+test_1 (_mm512_getexp_round_pd, __m512d, __m512d, 5)
+test_1 (_mm512_getexp_round_ps, __m512, __m512, 5)
+test_1y (_mm512_getmant_round_pd, __m512d, __m512d, 1, 1, 5)
+test_1y (_mm512_getmant_round_ps, __m512, __m512, 1, 1, 5)
+test_1 (_mm512_permute_pd, __m512d, __m512d, 1)
+test_1 (_mm512_permute_ps, __m512, __m512, 1)
+test_1 (_mm512_permutex_epi64, __m512i, __m512i, 1)
+test_1 (_mm512_permutex_pd, __m512d, __m512d, 1)
+test_1 (_mm512_rol_epi32, __m512i, __m512i, 1)
+test_1 (_mm512_rol_epi64, __m512i, __m512i, 1)
+test_1 (_mm512_ror_epi32, __m512i, __m512i, 1)
+test_1 (_mm512_ror_epi64, __m512i, __m512i, 1)
+test_1 (_mm512_shuffle_epi32, __m512i, __m512i, 1)
+test_1 (_mm512_slli_epi32, __m512i, __m512i, 1)
+test_1 (_mm512_slli_epi64, __m512i, __m512i, 1)
+test_1 (_mm512_sqrt_round_pd, __m512d, __m512d, 1)
+test_1 (_mm512_sqrt_round_ps, __m512, __m512, 1)
+test_1 (_mm512_srai_epi32, __m512i, __m512i, 1)
+test_1 (_mm512_srai_epi64, __m512i, __m512i, 1)
+test_1 (_mm512_srli_epi32, __m512i, __m512i, 1)
+test_1 (_mm512_srli_epi64, __m512i, __m512i, 1)
+test_1 (_mm_cvt_roundsd_i32, int, __m128d, 1)
+test_1 (_mm_cvt_roundsd_u32, unsigned, __m128d, 1)
+test_1 (_mm_cvt_roundss_i32, int, __m128, 1)
+test_1 (_mm_cvt_roundss_u32, unsigned, __m128, 1)
+test_1 (_mm_cvtt_roundsd_i32, int, __m128d, 5)
+test_1 (_mm_cvtt_roundsd_u32, unsigned, __m128d, 5)
+test_1 (_mm_cvtt_roundss_i32, int, __m128, 5)
+test_1 (_mm_cvtt_roundss_u32, unsigned, __m128, 5)
+test_1x (_mm512_getmant_pd, __m512d, __m512d, 1, 1)
+test_1x (_mm512_getmant_ps, __m512, __m512, 1, 1)
+test_1x (_mm_cvt_roundi32_ss, __m128, __m128, 1, 1)
+test_2 (_mm512_add_round_pd, __m512d, __m512d, __m512d, 1)
+test_2 (_mm512_add_round_ps, __m512, __m512, __m512, 1)
+test_2 (_mm512_alignr_epi32, __m512i, __m512i, __m512i, 1)
+test_2 (_mm512_alignr_epi64, __m512i, __m512i, __m512i, 1)
+test_2 (_mm512_cmp_epi32_mask, __mmask16, __m512i, __m512i, 1)
+test_2 (_mm512_cmp_epi64_mask, __mmask8, __m512i, __m512i, 1)
+test_2 (_mm512_cmp_epu32_mask, __mmask16, __m512i, __m512i, 1)
+test_2 (_mm512_cmp_epu64_mask, __mmask8, __m512i, __m512i, 1)
+test_2 (_mm512_cmp_pd_mask, __mmask8, __m512d, __m512d, 1)
+test_2 (_mm512_cmp_ps_mask, __mmask16, __m512, __m512, 1)
+test_2 (_mm512_div_round_pd, __m512d, __m512d, __m512d, 1)
+test_2 (_mm512_div_round_ps, __m512, __m512, __m512, 1)
+test_2 (_mm512_i32gather_epi32, __m512i, __m512i, void const *, 1)
+test_2 (_mm512_i32gather_epi64, __m512i, __m256i, void const *, 1)
+test_2 (_mm512_i32gather_pd, __m512d, __m256i, void const *, 1)
+test_2 (_mm512_i32gather_ps, __m512, __m512i, void const *, 1)
+test_2 (_mm512_i64gather_epi32, __m256i, __m512i, void const *, 1)
+test_2 (_mm512_i64gather_epi64, __m512i, __m512i, void const *, 1)
+test_2 (_mm512_i64gather_pd, __m512d, __m512i, void const *, 1)
+test_2 (_mm512_i64gather_ps, __m256, __m512i, void const *, 1)
+test_2 (_mm512_insertf32x4, __m512, __m512, __m128, 1)
+test_2 (_mm512_insertf64x4, __m512d, __m512d, __m256d, 1)
+test_2 (_mm512_inserti32x4, __m512i, __m512i, __m128i, 1)
+test_2 (_mm512_inserti64x4, __m512i, __m512i, __m256i, 1)
+test_2 (_mm512_maskz_cvt_roundepi32_ps, __m512, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_cvt_roundepu32_ps, __m512, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_cvt_roundpd_epi32, __m256i, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_cvt_roundpd_epu32, __m256i, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_cvt_roundpd_ps, __m256, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_cvt_roundph_ps, __m512, __mmask16, __m256i, 5)
+test_2 (_mm512_maskz_cvt_roundps_epi32, __m512i, __mmask16, __m512, 1)
+test_2 (_mm512_maskz_cvt_roundps_epu32, __m512i, __mmask16, __m512, 1)
+test_2 (_mm512_maskz_cvt_roundps_pd, __m512d, __mmask8, __m256, 5)
+test_2 (_mm512_maskz_cvtps_ph, __m256i, __mmask16, __m512, 1)
+test_2 (_mm512_maskz_cvtt_roundpd_epi32, __m256i, __mmask8, __m512d, 5)
+test_2 (_mm512_maskz_cvtt_roundpd_epu32, __m256i, __mmask8, __m512d, 5)
+test_2 (_mm512_maskz_cvtt_roundps_epi32, __m512i, __mmask16, __m512, 5)
+test_2 (_mm512_maskz_cvtt_roundps_epu32, __m512i, __mmask16, __m512, 5)
+test_2 (_mm512_maskz_extractf32x4_ps, __m128, __mmask8, __m512, 1)
+test_2 (_mm512_maskz_extractf64x4_pd, __m256d, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_extracti32x4_epi32, __m128i, __mmask8, __m512i, 1)
+test_2 (_mm512_maskz_extracti64x4_epi64, __m256i, __mmask8, __m512i, 1)
+test_2 (_mm512_maskz_getexp_round_pd, __m512d, __mmask8, __m512d, 5)
+test_2 (_mm512_maskz_getexp_round_ps, __m512, __mmask16, __m512, 5)
+test_2y (_mm512_maskz_getmant_round_pd, __m512d, __mmask8, __m512d, 1, 1, 5)
+test_2y (_mm512_maskz_getmant_round_ps, __m512, __mmask16, __m512, 1, 1, 5)
+test_2 (_mm512_maskz_permute_pd, __m512d, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_permute_ps, __m512, __mmask16, __m512, 1)
+test_2 (_mm512_maskz_permutex_epi64, __m512i, __mmask8, __m512i, 1)
+test_2 (_mm512_maskz_permutex_pd, __m512d, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_rol_epi32, __m512i, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_rol_epi64, __m512i, __mmask8, __m512i, 1)
+test_2 (_mm512_maskz_ror_epi32, __m512i, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_ror_epi64, __m512i, __mmask8, __m512i, 1)
+test_2 (_mm512_maskz_shuffle_epi32, __m512i, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_slli_epi32, __m512i, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_slli_epi64, __m512i, __mmask8, __m512i, 1)
+test_2 (_mm512_maskz_sqrt_round_pd, __m512d, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_sqrt_round_ps, __m512, __mmask16, __m512, 1)
+test_2 (_mm512_maskz_srai_epi32, __m512i, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_srai_epi64, __m512i, __mmask8, __m512i, 1)
+test_2 (_mm512_maskz_srli_epi32, __m512i, __mmask16, __m512i, 1)
+test_2 (_mm512_maskz_srli_epi64, __m512i, __mmask8, __m512i, 1)
+test_2 (_mm512_max_round_pd, __m512d, __m512d, __m512d, 5)
+test_2 (_mm512_max_round_ps, __m512, __m512, __m512, 5)
+test_2 (_mm512_min_round_pd, __m512d, __m512d, __m512d, 5)
+test_2 (_mm512_min_round_ps, __m512, __m512, __m512, 5)
+test_2 (_mm512_mul_round_pd, __m512d, __m512d, __m512d, 1)
+test_2 (_mm512_mul_round_ps, __m512, __m512, __m512, 1)
+test_2 (_mm512_scalef_round_pd, __m512d, __m512d, __m512d, 1)
+test_2 (_mm512_scalef_round_ps, __m512, __m512, __m512, 1)
+test_2 (_mm512_shuffle_f32x4, __m512, __m512, __m512, 1)
+test_2 (_mm512_shuffle_f64x2, __m512d, __m512d, __m512d, 1)
+test_2 (_mm512_shuffle_i32x4, __m512i, __m512i, __m512i, 1)
+test_2 (_mm512_shuffle_i64x2, __m512i, __m512i, __m512i, 1)
+test_2 (_mm512_shuffle_pd, __m512d, __m512d, __m512d, 1)
+test_2 (_mm512_shuffle_ps, __m512, __m512, __m512, 1)
+test_2 (_mm512_sub_round_pd, __m512d, __m512d, __m512d, 1)
+test_2 (_mm512_sub_round_ps, __m512, __m512, __m512, 1)
+test_2 (_mm_add_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_add_round_ss, __m128, __m128, __m128, 1)
+test_2 (_mm_cmp_sd_mask, __mmask8, __m128d, __m128d, 1)
+test_2 (_mm_cmp_ss_mask, __mmask8, __m128, __m128, 1)
+#ifdef __x86_64__
+test_2 (_mm_cvt_roundi64_sd, __m128d, __m128d, long long, 1)
+test_2 (_mm_cvt_roundi64_ss, __m128, __m128, long long, 1)
+#endif
+test_2 (_mm_cvt_roundsd_ss, __m128, __m128, __m128d, 1)
+test_2 (_mm_cvt_roundss_sd, __m128d, __m128d, __m128, 5)
+test_2 (_mm_cvt_roundu32_ss, __m128, __m128, unsigned, 1)
+#ifdef __x86_64__
+test_2 (_mm_cvt_roundu64_sd, __m128d, __m128d, unsigned long long, 1)
+test_2 (_mm_cvt_roundu64_ss, __m128, __m128, unsigned long long, 1)
+#endif
+test_2 (_mm_div_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_div_round_ss, __m128, __m128, __m128, 1)
+test_2 (_mm_getexp_round_sd, __m128d, __m128d, __m128d, 5)
+test_2 (_mm_getexp_round_ss, __m128, __m128, __m128, 5)
+test_2y (_mm_getmant_round_sd, __m128d, __m128d, __m128d, 1, 1, 5)
+test_2y (_mm_getmant_round_ss, __m128, __m128, __m128, 1, 1, 5)
+test_2 (_mm_mul_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_mul_round_ss, __m128, __m128, __m128, 1)
+test_2 (_mm_scalef_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_scalef_round_ss, __m128, __m128, __m128, 1)
+test_2 (_mm_sqrt_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_sqrt_round_ss, __m128, __m128, __m128, 1)
+test_2 (_mm_sub_round_sd, __m128d, __m128d, __m128d, 1)
+test_2 (_mm_sub_round_ss, __m128, __m128, __m128, 1)
+test_2x (_mm512_cmp_round_pd_mask, __mmask8, __m512d, __m512d, 1, 5)
+test_2x (_mm512_cmp_round_ps_mask, __mmask16, __m512, __m512, 1, 5)
+test_2x (_mm512_maskz_roundscale_round_pd, __m512d, __mmask8, __m512d, 1, 5)
+test_2x (_mm512_maskz_roundscale_round_ps, __m512, __mmask16, __m512, 1, 5)
+test_2x (_mm_cmp_round_sd_mask, __mmask8, __m128d, __m128d, 1, 5)
+test_2x (_mm_cmp_round_ss_mask, __mmask8, __m128, __m128, 1, 5)
+test_2x (_mm_comi_round_sd, int, __m128d, __m128d, 1, 5)
+test_2x (_mm_comi_round_ss, int, __m128, __m128, 1, 5)
+test_2x (_mm_roundscale_round_sd, __m128d, __m128d, __m128d, 1, 5)
+test_2x (_mm_roundscale_round_ss, __m128, __m128, __m128, 1, 5)
+test_3 (_mm512_fmadd_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
+test_3 (_mm512_fmadd_round_ps, __m512, __m512, __m512, __m512, 1)
+test_3 (_mm512_fmaddsub_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
+test_3 (_mm512_fmaddsub_round_ps, __m512, __m512, __m512, __m512, 1)
+test_3 (_mm512_fmsub_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
+test_3 (_mm512_fmsub_round_ps, __m512, __m512, __m512, __m512, 1)
+test_3 (_mm512_fmsubadd_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
+test_3 (_mm512_fmsubadd_round_ps, __m512, __m512, __m512, __m512, 1)
+test_3 (_mm512_fnmadd_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
+test_3 (_mm512_fnmadd_round_ps, __m512, __m512, __m512, __m512, 1)
+test_3 (_mm512_fnmsub_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
+test_3 (_mm512_fnmsub_round_ps, __m512, __m512, __m512, __m512, 1)
+test_3 (_mm512_mask_cmp_epi32_mask, __mmask16, __mmask16, __m512i, __m512i, 1)
+test_3 (_mm512_mask_cmp_epi64_mask, __mmask8, __mmask8, __m512i, __m512i, 1)
+test_3 (_mm512_mask_cmp_epu32_mask, __mmask16, __mmask16, __m512i, __m512i, 1)
+test_3 (_mm512_mask_cmp_epu64_mask, __mmask8, __mmask8, __m512i, __m512i, 1)
+test_3 (_mm512_mask_cmp_pd_mask, __mmask8, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_mask_cmp_ps_mask, __mmask16, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_mask_cvt_roundepi32_ps, __m512, __m512, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_cvt_roundepu32_ps, __m512, __m512, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_cvt_roundpd_epi32, __m256i, __m256i, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_cvt_roundpd_epu32, __m256i, __m256i, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_cvt_roundpd_ps, __m256, __m256, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_cvt_roundph_ps, __m512, __m512, __mmask16, __m256i, 5)
+test_3 (_mm512_mask_cvt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 1)
+test_3 (_mm512_mask_cvt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 1)
+test_3 (_mm512_mask_cvt_roundps_pd, __m512d, __m512d, __mmask8, __m256, 5)
+test_3 (_mm512_mask_cvtps_ph, __m256i, __m256i, __mmask16, __m512, 1)
+test_3 (_mm512_mask_cvtt_roundpd_epi32, __m256i, __m256i, __mmask8, __m512d, 5)
+test_3 (_mm512_mask_cvtt_roundpd_epu32, __m256i, __m256i, __mmask8, __m512d, 5)
+test_3 (_mm512_mask_cvtt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 5)
+test_3 (_mm512_mask_cvtt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 5)
+test_3 (_mm512_mask_extractf32x4_ps, __m128, __m128, __mmask8, __m512, 1)
+test_3 (_mm512_mask_extractf64x4_pd, __m256d, __m256d, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_extracti32x4_epi32, __m128i, __m128i, __mmask8, __m512i, 1)
+test_3 (_mm512_mask_extracti64x4_epi64, __m256i, __m256i, __mmask8, __m512i, 1)
+test_3 (_mm512_mask_getexp_round_pd, __m512d, __m512d, __mmask8, __m512d, 5)
+test_3 (_mm512_mask_getexp_round_ps, __m512, __m512, __mmask16, __m512, 5)
+test_3y (_mm512_mask_getmant_round_pd, __m512d, __m512d, __mmask8, __m512d, 1, 1, 5)
+test_3y (_mm512_mask_getmant_round_ps, __m512, __m512, __mmask16, __m512, 1, 1, 5)
+test_3 (_mm512_mask_permute_pd, __m512d, __m512d, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_permute_ps, __m512, __m512, __mmask16, __m512, 1)
+test_3 (_mm512_mask_permutex_epi64, __m512i, __m512i, __mmask8, __m512i, 1)
+test_3 (_mm512_mask_permutex_pd, __m512d, __m512d, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_rol_epi32, __m512i, __m512i, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_rol_epi64, __m512i, __m512i, __mmask8, __m512i, 1)
+test_3 (_mm512_mask_ror_epi32, __m512i, __m512i, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_ror_epi64, __m512i, __m512i, __mmask8, __m512i, 1)
+test_3 (_mm512_mask_shuffle_epi32, __m512i, __m512i, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_slli_epi32, __m512i, __m512i, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_slli_epi64, __m512i, __m512i, __mmask8, __m512i, 1)
+test_3 (_mm512_mask_sqrt_round_pd, __m512d, __m512d, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_sqrt_round_ps, __m512, __m512, __mmask16, __m512, 1)
+test_3 (_mm512_mask_srai_epi32, __m512i, __m512i, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_srai_epi64, __m512i, __m512i, __mmask8, __m512i, 1)
+test_3 (_mm512_mask_srli_epi32, __m512i, __m512i, __mmask16, __m512i, 1)
+test_3 (_mm512_mask_srli_epi64, __m512i, __m512i, __mmask8, __m512i, 1)
+test_3 (_mm512_maskz_add_round_pd, __m512d, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_maskz_add_round_ps, __m512, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_maskz_alignr_epi32, __m512i, __mmask16, __m512i, __m512i, 1)
+test_3 (_mm512_maskz_alignr_epi64, __m512i, __mmask8, __m512i, __m512i, 1)
+test_3 (_mm512_maskz_div_round_pd, __m512d, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_maskz_div_round_ps, __m512, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_maskz_insertf32x4, __m512, __mmask16, __m512, __m128, 1)
+test_3 (_mm512_maskz_insertf64x4, __m512d, __mmask8, __m512d, __m256d, 1)
+test_3 (_mm512_maskz_inserti32x4, __m512i, __mmask16, __m512i, __m128i, 1)
+test_3 (_mm512_maskz_inserti64x4, __m512i, __mmask8, __m512i, __m256i, 1)
+test_3 (_mm512_maskz_max_round_pd, __m512d, __mmask8, __m512d, __m512d, 5)
+test_3 (_mm512_maskz_max_round_ps, __m512, __mmask16, __m512, __m512, 5)
+test_3 (_mm512_maskz_min_round_pd, __m512d, __mmask8, __m512d, __m512d, 5)
+test_3 (_mm512_maskz_min_round_ps, __m512, __mmask16, __m512, __m512, 5)
+test_3 (_mm512_maskz_mul_round_pd, __m512d, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_maskz_mul_round_ps, __m512, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_maskz_scalef_round_pd, __m512d, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_maskz_scalef_round_ps, __m512, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_maskz_shuffle_f32x4, __m512, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_maskz_shuffle_f64x2, __m512d, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_maskz_shuffle_i32x4, __m512i, __mmask16, __m512i, __m512i, 1)
+test_3 (_mm512_maskz_shuffle_i64x2, __m512i, __mmask8, __m512i, __m512i, 1)
+test_3 (_mm512_maskz_shuffle_pd, __m512d, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_maskz_shuffle_ps, __m512, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_maskz_sub_round_pd, __m512d, __mmask8, __m512d, __m512d, 1)
+test_3 (_mm512_maskz_sub_round_ps, __m512, __mmask16, __m512, __m512, 1)
+test_3 (_mm512_ternarylogic_epi32, __m512i, __m512i, __m512i, __m512i, 1)
+test_3 (_mm512_ternarylogic_epi64, __m512i, __m512i, __m512i, __m512i, 1)
+test_3 (_mm_fmadd_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
+test_3 (_mm_fmadd_round_ss, __m128, __m128, __m128, __m128, 1)
+test_3 (_mm_fmsub_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
+test_3 (_mm_fmsub_round_ss, __m128, __m128, __m128, __m128, 1)
+test_3 (_mm_fnmadd_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
+test_3 (_mm_fnmadd_round_ss, __m128, __m128, __m128, __m128, 1)
+test_3 (_mm_fnmsub_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
+test_3 (_mm_fnmsub_round_ss, __m128, __m128, __m128, __m128, 1)
+test_3 (_mm_mask_cmp_sd_mask, __mmask8, __mmask8, __m128d, __m128d, 1)
+test_3 (_mm_mask_cmp_ss_mask, __mmask8, __mmask8, __m128, __m128, 1)
+test_3 (_mm_maskz_add_round_sd, __m128d, __mmask8, __m128d, __m128d, 1)
+test_3 (_mm_maskz_add_round_ss, __m128, __mmask8, __m128, __m128, 1)
+test_3 (_mm_maskz_cvt_roundsd_ss, __m128, __mmask8, __m128, __m128d, 1)
+test_3 (_mm_maskz_cvt_roundss_sd, __m128d, __mmask8, __m128d, __m128, 5)
+test_3 (_mm_maskz_div_round_sd, __m128d, __mmask8, __m128d, __m128d, 1)
+test_3 (_mm_maskz_div_round_ss, __m128, __mmask8, __m128, __m128, 1)
+test_3 (_mm_maskz_getexp_round_sd, __m128d, __mmask8, __m128d, __m128d, 5)
+test_3 (_mm_maskz_getexp_round_ss, __m128, __mmask8, __m128, __m128, 5)
+test_3y (_mm_maskz_getmant_round_sd, __m128d, __mmask8, __m128d, __m128d, 1, 1, 5)
+test_3y (_mm_maskz_getmant_round_ss, __m128, __mmask8, __m128, __m128, 1, 1, 5)
+test_3 (_mm_maskz_mul_round_sd, __m128d, __mmask8, __m128d, __m128d, 1)
+test_3 (_mm_maskz_mul_round_ss, __m128, __mmask8, __m128, __m128, 1)
+test_3 (_mm_maskz_scalef_round_sd, __m128d, __mmask8, __m128d, __m128d, 1)
+test_3 (_mm_maskz_scalef_round_ss, __m128, __mmask8, __m128, __m128, 1)
+test_3 (_mm_maskz_sqrt_round_sd, __m128d, __mmask8, __m128d, __m128d, 1)
+test_3 (_mm_maskz_sqrt_round_ss, __m128, __mmask8, __m128, __m128, 1)
+test_3 (_mm_maskz_sub_round_sd, __m128d, __mmask8, __m128d, __m128d, 1)
+test_3 (_mm_maskz_sub_round_ss, __m128, __mmask8, __m128, __m128, 1)
+test_3v (_mm512_i32scatter_epi32, void *, __m512i, __m512i, 1)
+test_3v (_mm512_i32scatter_epi64, void *, __m256i, __m512i, 1)
+test_3v (_mm512_i32scatter_pd, void *, __m256i, __m512d, 1)
+test_3v (_mm512_i32scatter_ps, void *, __m512i, __m512, 1)
+test_3v (_mm512_i64scatter_epi32, void *, __m512i, __m256i, 1)
+test_3v (_mm512_i64scatter_epi64, void *, __m512i, __m512i, 1)
+test_3v (_mm512_i64scatter_pd, void *, __m512i, __m512d, 1)
+test_3v (_mm512_i64scatter_ps, void *, __m512i, __m256, 1)
+test_3x (_mm512_mask_roundscale_round_pd, __m512d, __m512d, __mmask8, __m512d, 1, 5)
+test_3x (_mm512_mask_roundscale_round_ps, __m512, __m512, __mmask16, __m512, 1, 5)
+test_3x (_mm512_mask_cmp_round_pd_mask, __mmask8, __mmask8, __m512d, __m512d, 1, 5)
+test_3x (_mm512_mask_cmp_round_ps_mask, __mmask16, __mmask16, __m512, __m512, 1, 5)
+test_3x (_mm_fixupimm_round_sd, __m128d, __m128d, __m128d, __m128i, 1, 5)
+test_3x (_mm_fixupimm_round_ss, __m128, __m128, __m128, __m128i, 1, 5)
+test_3x (_mm_mask_cmp_round_sd_mask, __mmask8, __mmask8, __m128d, __m128d, 1, 5)
+test_3x (_mm_mask_cmp_round_ss_mask, __mmask8, __mmask8, __m128, __m128, 1, 5)
+test_3x (_mm_maskz_roundscale_round_sd, __m128d, __mmask8, __m128d, __m128d, 1, 5)
+test_3x (_mm_maskz_roundscale_round_ss, __m128, __mmask8, __m128, __m128, 1, 5)
+test_4 (_mm512_mask3_fmadd_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
+test_4 (_mm512_mask3_fmadd_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1)
+test_4 (_mm512_mask3_fmaddsub_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
+test_4 (_mm512_mask3_fmaddsub_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1)
+test_4 (_mm512_mask3_fmsub_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
+test_4 (_mm512_mask3_fmsub_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1)
+test_4 (_mm512_mask3_fmsubadd_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
+test_4 (_mm512_mask3_fmsubadd_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1)
+test_4 (_mm512_mask3_fnmadd_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
+test_4 (_mm512_mask3_fnmadd_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1)
+test_4 (_mm512_mask3_fnmsub_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
+test_4 (_mm512_mask3_fnmsub_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1)
+test_4 (_mm512_mask_add_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_add_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_alignr_epi32, __m512i, __m512i, __mmask16, __m512i, __m512i, 1)
+test_4 (_mm512_mask_alignr_epi64, __m512i, __m512i, __mmask8, __m512i, __m512i, 1)
+test_4 (_mm512_mask_div_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_div_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_fmadd_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_fmadd_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_fmaddsub_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_fmaddsub_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_fmsub_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_fmsub_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_fmsubadd_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_fmsubadd_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_fnmadd_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_fnmadd_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_fnmsub_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_fnmsub_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_i32gather_epi32, __m512i, __m512i, __mmask16, __m512i, void const *, 1)
+test_4 (_mm512_mask_i32gather_epi64, __m512i, __m512i, __mmask8, __m256i, void const *, 1)
+test_4 (_mm512_mask_i32gather_pd, __m512d, __m512d, __mmask8, __m256i, void const *, 1)
+test_4 (_mm512_mask_i32gather_ps, __m512, __m512, __mmask16, __m512i, void const *, 1)
+test_4 (_mm512_mask_i64gather_epi32, __m256i, __m256i, __mmask8, __m512i, void const *, 1)
+test_4 (_mm512_mask_i64gather_epi64, __m512i, __m512i, __mmask8, __m512i, void const *, 1)
+test_4 (_mm512_mask_i64gather_pd, __m512d, __m512d, __mmask8, __m512i, void const *, 1)
+test_4 (_mm512_mask_i64gather_ps, __m256, __m256, __mmask8, __m512i, void const *, 1)
+test_4 (_mm512_mask_insertf32x4, __m512, __m512, __mmask16, __m512, __m128, 1)
+test_4 (_mm512_mask_insertf64x4, __m512d, __m512d, __mmask8, __m512d, __m256d, 1)
+test_4 (_mm512_mask_inserti32x4, __m512i, __m512i, __mmask16, __m512i, __m128i, 1)
+test_4 (_mm512_mask_inserti64x4, __m512i, __m512i, __mmask8, __m512i, __m256i, 1)
+test_4 (_mm512_mask_max_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 5)
+test_4 (_mm512_mask_max_round_ps, __m512, __m512, __mmask16, __m512, __m512, 5)
+test_4 (_mm512_mask_min_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 5)
+test_4 (_mm512_mask_min_round_ps, __m512, __m512, __mmask16, __m512, __m512, 5)
+test_4 (_mm512_mask_mul_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_mul_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_scalef_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_scalef_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_shuffle_f32x4, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_shuffle_f64x2, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_shuffle_i32x4, __m512i, __m512i, __mmask16, __m512i, __m512i, 1)
+test_4 (_mm512_mask_shuffle_i64x2, __m512i, __m512i, __mmask8, __m512i, __m512i, 1)
+test_4 (_mm512_mask_shuffle_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_shuffle_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_sub_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
+test_4 (_mm512_mask_sub_round_ps, __m512, __m512, __mmask16, __m512, __m512, 1)
+test_4 (_mm512_mask_ternarylogic_epi32, __m512i, __m512i, __mmask16, __m512i, __m512i, 1)
+test_4 (_mm512_mask_ternarylogic_epi64, __m512i, __m512i, __mmask8, __m512i, __m512i, 1)
+test_4 (_mm512_maskz_fmadd_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1)
+test_4 (_mm512_maskz_fmadd_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1)
+test_4 (_mm512_maskz_fmaddsub_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1)
+test_4 (_mm512_maskz_fmaddsub_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1)
+test_4 (_mm512_maskz_fmsub_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1)
+test_4 (_mm512_maskz_fmsub_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1)
+test_4 (_mm512_maskz_fmsubadd_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1)
+test_4 (_mm512_maskz_fmsubadd_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1)
+test_4 (_mm512_maskz_fnmadd_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1)
+test_4 (_mm512_maskz_fnmadd_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1)
+test_4 (_mm512_maskz_fnmsub_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512d, 1)
+test_4 (_mm512_maskz_fnmsub_round_ps, __m512, __mmask16, __m512, __m512, __m512, 1)
+test_4 (_mm512_maskz_ternarylogic_epi32, __m512i, __mmask16, __m512i, __m512i, __m512i, 1)
+test_4 (_mm512_maskz_ternarylogic_epi64, __m512i, __mmask8, __m512i, __m512i, __m512i, 1)
+test_4 (_mm_mask3_fmadd_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 1)
+test_4 (_mm_mask3_fmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, 1)
+test_4 (_mm_mask3_fmsub_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 1)
+test_4 (_mm_mask3_fmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, 1)
+test_4 (_mm_mask3_fnmadd_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 1)
+test_4 (_mm_mask3_fnmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, 1)
+test_4 (_mm_mask3_fnmsub_round_sd, __m128d, __m128d, __m128d, __m128d, __mmask8, 1)
+test_4 (_mm_mask3_fnmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, 1)
+test_4 (_mm_mask_add_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_add_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_cvt_roundsd_ss, __m128, __m128, __mmask8, __m128, __m128d, 1)
+test_4 (_mm_mask_cvt_roundss_sd, __m128d, __m128d, __mmask8, __m128d, __m128, 5)
+test_4 (_mm_mask_div_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_div_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_fmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_fmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_fmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_fmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_fnmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_fnmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_fnmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_fnmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_getexp_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 5)
+test_4 (_mm_mask_getexp_round_ss, __m128, __m128, __mmask8, __m128, __m128, 5)
+test_4y (_mm_mask_getmant_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1, 1, 5)
+test_4y (_mm_mask_getmant_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1, 1, 5)
+test_4 (_mm_mask_mul_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_mul_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_scalef_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_scalef_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_sqrt_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_sqrt_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_mask_sub_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1)
+test_4 (_mm_mask_sub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1)
+test_4 (_mm_maskz_fmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 1)
+test_4 (_mm_maskz_fmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, 1)
+test_4 (_mm_maskz_fmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 1)
+test_4 (_mm_maskz_fmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, 1)
+test_4 (_mm_maskz_fnmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 1)
+test_4 (_mm_maskz_fnmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, 1)
+test_4 (_mm_maskz_fnmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128d, 1)
+test_4 (_mm_maskz_fnmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, 1)
+test_4v (_mm512_mask_i32scatter_epi32, void *, __mmask16, __m512i, __m512i, 1)
+test_4v (_mm512_mask_i32scatter_epi64, void *, __mmask8, __m256i, __m512i, 1)
+test_4v (_mm512_mask_i32scatter_pd, void *, __mmask8, __m256i, __m512d, 1)
+test_4v (_mm512_mask_i32scatter_ps, void *, __mmask16, __m512i, __m512, 1)
+test_4v (_mm512_mask_i64scatter_epi32, void *, __mmask8, __m512i, __m256i, 1)
+test_4v (_mm512_mask_i64scatter_epi64, void *, __mmask8, __m512i, __m512i, 1)
+test_4v (_mm512_mask_i64scatter_pd, void *, __mmask8, __m512i, __m512d, 1)
+test_4v (_mm512_mask_i64scatter_ps, void *, __mmask8, __m512i, __m256, 1)
+test_4x (_mm_mask_getmant_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1, 1)
+test_4x (_mm_mask_getmant_ss, __m128, __m128, __mmask8, __m128, __m128, 1, 1)
+test_4x (_mm_mask_roundscale_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 1, 5)
+test_4x (_mm_mask_roundscale_round_ss, __m128, __m128, __mmask8, __m128, __m128, 1, 5)
+test_4x (_mm512_mask_fixupimm_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512i, 1, 5)
+test_4x (_mm512_mask_fixupimm_round_ps, __m512, __m512, __mmask16, __m512, __m512i, 1, 5)
+test_4x (_mm512_maskz_fixupimm_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512i, 1, 5)
+test_4x (_mm512_maskz_fixupimm_round_ps, __m512, __mmask16, __m512, __m512, __m512i, 1, 5)
+test_4x (_mm_mask_fixupimm_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128i, 1, 5)
+test_4x (_mm_mask_fixupimm_round_ss, __m128, __m128, __mmask8, __m128, __m128i, 1, 5)
+test_4x (_mm_maskz_fixupimm_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128i, 1, 5)
+test_4x (_mm_maskz_fixupimm_round_ss, __m128, __mmask8, __m128, __m128, __m128i, 1, 5)
+
+/* avx512pfintrin.h */
+test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1, 1)
+test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16, __m512i, 1, 1)
+test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *, 1, 1)
+test_3vx (_mm512_mask_prefetch_i64scatter_ps, void const *, __mmask8, __m512i, 1, 1)
+
+/* avx512erintrin.h */
+test_1 (_mm512_exp2a23_round_pd, __m512d, __m512d, 1)
+test_1 (_mm512_exp2a23_round_ps, __m512, __m512, 1)
+test_1 (_mm512_rcp28_round_pd, __m512d, __m512d, 1)
+test_1 (_mm512_rcp28_round_ps, __m512, __m512, 1)
+test_1 (_mm512_rsqrt28_round_pd, __m512d, __m512d, 1)
+test_1 (_mm512_rsqrt28_round_ps, __m512, __m512, 1)
+test_2 (_mm512_maskz_exp2a23_round_pd, __m512d, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_exp2a23_round_ps, __m512, __mmask16, __m512, 1)
+test_2 (_mm512_maskz_rcp28_round_pd, __m512d, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_rcp28_round_ps, __m512, __mmask16, __m512, 1)
+test_2 (_mm512_maskz_rsqrt28_round_pd, __m512d, __mmask8, __m512d, 1)
+test_2 (_mm512_maskz_rsqrt28_round_ps, __m512, __mmask16, __m512, 1)
+test_3 (_mm512_mask_exp2a23_round_pd, __m512d, __m512d, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_exp2a23_round_ps, __m512, __m512, __mmask16, __m512, 1)
+test_3 (_mm512_mask_rcp28_round_pd, __m512d, __m512d, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_rcp28_round_ps, __m512, __m512, __mmask16, __m512, 1)
+test_3 (_mm512_mask_rsqrt28_round_pd, __m512d, __m512d, __mmask8, __m512d, 1)
+test_3 (_mm512_mask_rsqrt28_round_ps, __m512, __m512, __mmask16, __m512, 1)
+
+/* shaintrin.h */
+test_2 (_mm_sha1rnds4_epu32, __m128i, __m128i, __m128i, 1)
+
/* wmmintrin.h (AES/PCLMUL). */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("aes,pclmul")
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 069f8e7cb80..a1dafc87eb6 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -183,7 +183,207 @@
/* rtmintrin.h */
#define __builtin_ia32_xabort(M) __builtin_ia32_xabort(1)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt")
+/* avx512fintrin.h */
+#define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_addsd_mask(A, B, C, D, E) __builtin_ia32_addsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_addss_mask(A, B, C, D, E) __builtin_ia32_addss_mask(A, B, C, D, 1)
+#define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E)
+#define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E)
+#define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D)
+#define __builtin_ia32_cmppd512_mask(A, B, F, D, E) __builtin_ia32_cmppd512_mask(A, B, 1, D, 5)
+#define __builtin_ia32_cmpps512_mask(A, B, F, D, E) __builtin_ia32_cmpps512_mask(A, B, 1, D, 5)
+#define __builtin_ia32_cmpq512_mask(A, B, E, D) __builtin_ia32_cmpq512_mask(A, B, 1, D)
+#define __builtin_ia32_cmpsd_mask(A, B, F, D, E) __builtin_ia32_cmpsd_mask(A, B, 1, D, 5)
+#define __builtin_ia32_cmpss_mask(A, B, F, D, E) __builtin_ia32_cmpss_mask(A, B, 1, D, 5)
+#define __builtin_ia32_cvtdq2ps512_mask(A, B, C, D) __builtin_ia32_cvtdq2ps512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtpd2dq512_mask(A, B, C, D) __builtin_ia32_cvtpd2dq512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtpd2ps512_mask(A, B, C, D) __builtin_ia32_cvtpd2ps512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtpd2udq512_mask(A, B, C, D) __builtin_ia32_cvtpd2udq512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtps2dq512_mask(A, B, C, D) __builtin_ia32_cvtps2dq512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtps2pd512_mask(A, B, C, D) __builtin_ia32_cvtps2pd512_mask(A, B, C, 5)
+#define __builtin_ia32_cvtps2udq512_mask(A, B, C, D) __builtin_ia32_cvtps2udq512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtsd2ss_mask(A, B, C, D, E) __builtin_ia32_cvtsd2ss_mask(A, B, C, D, 1)
+#define __builtin_ia32_cvtsi2sd64(A, B, C) __builtin_ia32_cvtsi2sd64(A, B, 1)
+#define __builtin_ia32_cvtsi2ss32(A, B, C) __builtin_ia32_cvtsi2ss32(A, B, 1)
+#define __builtin_ia32_cvtsi2ss64(A, B, C) __builtin_ia32_cvtsi2ss64(A, B, 1)
+#define __builtin_ia32_cvtss2sd_mask(A, B, C, D, E) __builtin_ia32_cvtss2sd_mask(A, B, C, D, 5)
+#define __builtin_ia32_cvttpd2dq512_mask(A, B, C, D) __builtin_ia32_cvttpd2dq512_mask(A, B, C, 5)
+#define __builtin_ia32_cvttpd2udq512_mask(A, B, C, D) __builtin_ia32_cvttpd2udq512_mask(A, B, C, 5)
+#define __builtin_ia32_cvttps2dq512_mask(A, B, C, D) __builtin_ia32_cvttps2dq512_mask(A, B, C, 5)
+#define __builtin_ia32_cvttps2udq512_mask(A, B, C, D) __builtin_ia32_cvttps2udq512_mask(A, B, C, 5)
+#define __builtin_ia32_cvtudq2ps512_mask(A, B, C, D) __builtin_ia32_cvtudq2ps512_mask(A, B, C, 1)
+#define __builtin_ia32_cvtusi2sd64(A, B, C) __builtin_ia32_cvtusi2sd64(A, B, 1)
+#define __builtin_ia32_cvtusi2ss32(A, B, C) __builtin_ia32_cvtusi2ss32(A, B, 1)
+#define __builtin_ia32_cvtusi2ss64(A, B, C) __builtin_ia32_cvtusi2ss64(A, B, 1)
+#define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_divsd_mask(A, B, C, D, E) __builtin_ia32_divsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_divss_mask(A, B, C, D, E) __builtin_ia32_divss_mask(A, B, C, D, 1)
+#define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D)
+#define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D)
+#define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D)
+#define __builtin_ia32_extracti64x4_mask(A, E, C, D) __builtin_ia32_extracti64x4_mask(A, 1, C, D)
+#define __builtin_ia32_fixupimmpd512_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmpd512_mask(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmpd512_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmpd512_maskz(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmps512_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmps512_mask(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmps512_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmps512_maskz(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmsd_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmsd_mask(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmsd_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmsd_maskz(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmss_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmss_mask(A, B, C, 1, E, 5)
+#define __builtin_ia32_fixupimmss_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmss_maskz(A, B, C, 1, E, 5)
+#define __builtin_ia32_gatherdiv8df(A, B, C, D, F) __builtin_ia32_gatherdiv8df(A, B, C, D, 1)
+#define __builtin_ia32_gatherdiv8di(A, B, C, D, F) __builtin_ia32_gatherdiv8di(A, B, C, D, 1)
+#define __builtin_ia32_gatherdiv16sf(A, B, C, D, F) __builtin_ia32_gatherdiv16sf(A, B, C, D, 1)
+#define __builtin_ia32_gatherdiv16si(A, B, C, D, F) __builtin_ia32_gatherdiv16si(A, B, C, D, 1)
+#define __builtin_ia32_gathersiv16sf(A, B, C, D, F) __builtin_ia32_gathersiv16sf(A, B, C, D, 1)
+#define __builtin_ia32_gathersiv16si(A, B, C, D, F) __builtin_ia32_gathersiv16si(A, B, C, D, 1)
+#define __builtin_ia32_gathersiv8df(A, B, C, D, F) __builtin_ia32_gathersiv8df(A, B, C, D, 1)
+#define __builtin_ia32_gathersiv8di(A, B, C, D, F) __builtin_ia32_gathersiv8di(A, B, C, D, 1)
+#define __builtin_ia32_getexppd512_mask(A, B, C, D) __builtin_ia32_getexppd512_mask(A, B, C, 5)
+#define __builtin_ia32_getexpps512_mask(A, B, C, D) __builtin_ia32_getexpps512_mask(A, B, C, 5)
+#define __builtin_ia32_getexpsd128_mask(A, B, C, D, E) __builtin_ia32_getexpsd128_mask(A, B, C, D, 5)
+#define __builtin_ia32_getexpss128_mask(A, B, C, D, E) __builtin_ia32_getexpss128_mask(A, B, C, D, 5)
+#define __builtin_ia32_getmantpd512_mask(A, F, C, D, E) __builtin_ia32_getmantpd512_mask(A, 1, C, D, 5)
+#define __builtin_ia32_getmantps512_mask(A, F, C, D, E) __builtin_ia32_getmantps512_mask(A, 1, C, D, 5)
+#define __builtin_ia32_getmantsd_mask(A, B, I, D, E, F) __builtin_ia32_getmantsd_mask(A, B, 1, D, E, 5)
+#define __builtin_ia32_getmantss_mask(A, B, I, D, E, F) __builtin_ia32_getmantss_mask(A, B, 1, D, E, 5)
+#define __builtin_ia32_insertf32x4_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_insertf64x4_mask(A, B, F, D, E) __builtin_ia32_insertf64x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti32x4_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti64x4_mask(A, B, F, D, E) __builtin_ia32_inserti64x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 5)
+#define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 5)
+#define __builtin_ia32_maxsd_mask(A, B, C, D, E) __builtin_ia32_maxsd_mask(A, B, C, D, 5)
+#define __builtin_ia32_maxss_mask(A, B, C, D, E) __builtin_ia32_maxss_mask(A, B, C, D, 5)
+#define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 5)
+#define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 5)
+#define __builtin_ia32_minsd_mask(A, B, C, D, E) __builtin_ia32_minsd_mask(A, B, C, D, 5)
+#define __builtin_ia32_minss_mask(A, B, C, D, E) __builtin_ia32_minss_mask(A, B, C, D, 5)
+#define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_mulsd_mask(A, B, C, D, E) __builtin_ia32_mulsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_mulss_mask(A, B, C, D, E) __builtin_ia32_mulss_mask(A, B, C, D, 1)
+#define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D)
+#define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D)
+#define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D)
+#define __builtin_ia32_prolq512_mask(A, E, C, D) __builtin_ia32_prolq512_mask(A, 1, C, D)
+#define __builtin_ia32_prord512_mask(A, E, C, D) __builtin_ia32_prord512_mask(A, 1, C, D)
+#define __builtin_ia32_prorq512_mask(A, E, C, D) __builtin_ia32_prorq512_mask(A, 1, C, D)
+#define __builtin_ia32_pshufd512_mask(A, E, C, D) __builtin_ia32_pshufd512_mask(A, 1, C, D)
+#define __builtin_ia32_pslldi512_mask(A, E, C, D) __builtin_ia32_pslldi512_mask(A, 1, C, D)
+#define __builtin_ia32_psllqi512_mask(A, E, C, D) __builtin_ia32_psllqi512_mask(A, 1, C, D)
+#define __builtin_ia32_psradi512_mask(A, E, C, D) __builtin_ia32_psradi512_mask(A, 1, C, D)
+#define __builtin_ia32_psraqi512_mask(A, E, C, D) __builtin_ia32_psraqi512_mask(A, 1, C, D)
+#define __builtin_ia32_psrldi512_mask(A, E, C, D) __builtin_ia32_psrldi512_mask(A, 1, C, D)
+#define __builtin_ia32_psrlqi512_mask(A, E, C, D) __builtin_ia32_psrlqi512_mask(A, 1, C, D)
+#define __builtin_ia32_pternlogd512_mask(A, B, C, F, E) __builtin_ia32_pternlogd512_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogd512_maskz(A, B, C, F, E) __builtin_ia32_pternlogd512_maskz(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq512_mask(A, B, C, F, E) __builtin_ia32_pternlogq512_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E)
+#define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 5)
+#define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 5)
+#define __builtin_ia32_rndscalesd_mask(A, B, I, D, E, F) __builtin_ia32_rndscalesd_mask(A, B, 1, D, E, 5)
+#define __builtin_ia32_rndscaless_mask(A, B, I, D, E, F) __builtin_ia32_rndscaless_mask(A, B, 1, D, E, 5)
+#define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_scalefsd_mask(A, B, C, D, E) __builtin_ia32_scalefsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_scalefss_mask(A, B, C, D, E) __builtin_ia32_scalefss_mask(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv16si(A, B, C, D, F) __builtin_ia32_scatterdiv16si(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv16sf(A, B, C, D, F) __builtin_ia32_scattersiv16sf(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv16si(A, B, C, D, F) __builtin_ia32_scattersiv16si(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv8df(A, B, C, D, F) __builtin_ia32_scattersiv8df(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv8di(A, B, C, D, F) __builtin_ia32_scattersiv8di(A, B, C, D, 1)
+#define __builtin_ia32_shuf_f32x4_mask(A, B, F, D, E) __builtin_ia32_shuf_f32x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_f64x2_mask(A, B, F, D, E) __builtin_ia32_shuf_f64x2_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_i32x4_mask(A, B, F, D, E) __builtin_ia32_shuf_i32x4_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_i64x2_mask(A, B, F, D, E) __builtin_ia32_shuf_i64x2_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufpd512_mask(A, B, F, D, E) __builtin_ia32_shufpd512_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E)
+#define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 1)
+#define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 1)
+#define __builtin_ia32_sqrtsd_mask(A, B, C, D, E) __builtin_ia32_sqrtsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_sqrtss_mask(A, B, C, D, E) __builtin_ia32_sqrtss_mask(A, B, C, D, 1)
+#define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_subsd_mask(A, B, C, D, E) __builtin_ia32_subsd_mask(A, B, C, D, 1)
+#define __builtin_ia32_subss_mask(A, B, C, D, E) __builtin_ia32_subss_mask(A, B, C, D, 1)
+#define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D)
+#define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 5)
+#define __builtin_ia32_vcomiss(A, B, C, D) __builtin_ia32_vcomiss(A, B, 1, 5)
+#define __builtin_ia32_vcvtph2ps512_mask(A, B, C, D) __builtin_ia32_vcvtph2ps512_mask(A, B, C, 5)
+#define __builtin_ia32_vcvtps2ph512_mask(A, E, C, D) __builtin_ia32_vcvtps2ph512_mask(A, 1, C, D)
+#define __builtin_ia32_vcvtsd2si32(A, B) __builtin_ia32_vcvtsd2si32(A, 1)
+#define __builtin_ia32_vcvtsd2si64(A, B) __builtin_ia32_vcvtsd2si64(A, 1)
+#define __builtin_ia32_vcvtsd2usi32(A, B) __builtin_ia32_vcvtsd2usi32(A, 1)
+#define __builtin_ia32_vcvtsd2usi64(A, B) __builtin_ia32_vcvtsd2usi64(A, 1)
+#define __builtin_ia32_vcvtss2si32(A, B) __builtin_ia32_vcvtss2si32(A, 1)
+#define __builtin_ia32_vcvtss2si64(A, B) __builtin_ia32_vcvtss2si64(A, 1)
+#define __builtin_ia32_vcvtss2usi32(A, B) __builtin_ia32_vcvtss2usi32(A, 1)
+#define __builtin_ia32_vcvtss2usi64(A, B) __builtin_ia32_vcvtss2usi64(A, 1)
+#define __builtin_ia32_vcvttsd2si32(A, B) __builtin_ia32_vcvttsd2si32(A, 5)
+#define __builtin_ia32_vcvttsd2si64(A, B) __builtin_ia32_vcvttsd2si64(A, 5)
+#define __builtin_ia32_vcvttsd2usi32(A, B) __builtin_ia32_vcvttsd2usi32(A, 5)
+#define __builtin_ia32_vcvttsd2usi64(A, B) __builtin_ia32_vcvttsd2usi64(A, 5)
+#define __builtin_ia32_vcvttss2si32(A, B) __builtin_ia32_vcvttss2si32(A, 5)
+#define __builtin_ia32_vcvttss2si64(A, B) __builtin_ia32_vcvttss2si64(A, 5)
+#define __builtin_ia32_vcvttss2usi32(A, B) __builtin_ia32_vcvttss2usi32(A, 5)
+#define __builtin_ia32_vcvttss2usi64(A, B) __builtin_ia32_vcvttss2usi64(A, 5)
+#define __builtin_ia32_vfmaddpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddpd512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddpd512_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddps512_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmaddsubps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_maskz(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubaddpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddpd512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubps512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfnmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfnmsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vfnmsubps512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubps512_mask(A, B, C, D, 1)
+#define __builtin_ia32_vfnmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubps512_mask3(A, B, C, D, 1)
+#define __builtin_ia32_vpermilpd512_mask(A, E, C, D) __builtin_ia32_vpermilpd512_mask(A, 1, C, D)
+#define __builtin_ia32_vpermilps512_mask(A, E, C, D) __builtin_ia32_vpermilps512_mask(A, 1, C, D)
+
+/* avx512pfintrin.h */
+#define __builtin_ia32_gatherpfdps(A, B, C, D, E) __builtin_ia32_gatherpfdps(A, B, C, 1, 1)
+#define __builtin_ia32_gatherpfqps(A, B, C, D, E) __builtin_ia32_gatherpfqps(A, B, C, 1, 1)
+#define __builtin_ia32_scatterpfdps(A, B, C, D, E) __builtin_ia32_scatterpfdps(A, B, C, 1, 1)
+#define __builtin_ia32_scatterpfqps(A, B, C, D, E) __builtin_ia32_scatterpfqps(A, B, C, 1, 1)
+
+/* avx512erintrin.h */
+#define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask (A, B, C, 1)
+#define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask (A, B, C, 1)
+#define __builtin_ia32_rcp28pd_mask(A, B, C, D) __builtin_ia32_rcp28pd_mask (A, B, C, 1)
+#define __builtin_ia32_rcp28ps_mask(A, B, C, D) __builtin_ia32_rcp28ps_mask (A, B, C, 1)
+#define __builtin_ia32_rsqrt28pd_mask(A, B, C, D) __builtin_ia32_rsqrt28pd_mask (A, B, C, 1)
+#define __builtin_ia32_rsqrt28ps_mask(A, B, C, D) __builtin_ia32_rsqrt28ps_mask (A, B, C, 1)
+
+/* shaintrin.h */
+#define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
+
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512pf,avx512cd,sha")
#include <wmmintrin.h>
#include <smmintrin.h>
#include <mm3dnow.h>
diff --git a/gcc/testsuite/gcc.target/i386/testimm-10.c b/gcc/testsuite/gcc.target/i386/testimm-10.c
new file mode 100644
index 00000000000..4d1c8efde2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/testimm-10.c
@@ -0,0 +1,200 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -mavx512f" } */
+
+#include <x86intrin.h>
+
+__m512i m512i;
+__m512d m512d;
+__m512 m512;
+__m256i m256i;
+__m256d m256d;
+__m256 m256;
+__m128i m128i;
+__m128d m128d;
+__m128 m128;
+__mmask8 mmask8;
+__mmask16 mmask16;
+
+void
+test8bit (void)
+{
+ m512i = _mm512_permutex_epi64 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_mask_permutex_epi64 (m512i, mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_maskz_permutex_epi64 (mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512i = _mm512_ternarylogic_epi64 (m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_mask_ternarylogic_epi64 (m512i, mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_maskz_ternarylogic_epi64 (mmask8, m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_ternarylogic_epi32 (m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_mask_ternarylogic_epi32 (m512i, mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_maskz_ternarylogic_epi32 (mmask16, m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512i = _mm512_shuffle_epi32 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_mask_shuffle_epi32 (m512i, mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_maskz_shuffle_epi32 (mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512i = _mm512_shuffle_i64x2 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_mask_shuffle_i64x2 (m512i, mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_maskz_shuffle_i64x2 (mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512i = _mm512_shuffle_i32x4 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_mask_shuffle_i32x4 (m512i, mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_maskz_shuffle_i32x4 (mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512d = _mm512_shuffle_f64x2 (m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512d = _mm512_mask_shuffle_f64x2 (m512d, mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512d = _mm512_maskz_shuffle_f64x2 (mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512 = _mm512_shuffle_f32x4 (m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512 = _mm512_mask_shuffle_f32x4 (m512, mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512 = _mm512_maskz_shuffle_f32x4 (mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512d = _mm512_permutex_pd (m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512d = _mm512_mask_permutex_pd (m512d, mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512d = _mm512_maskz_permutex_pd (mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512d = _mm512_permute_pd (m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512d = _mm512_mask_permute_pd (m512d, mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512d = _mm512_maskz_permute_pd (mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512 = _mm512_permute_ps (m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512 = _mm512_mask_permute_ps (m512, mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512 = _mm512_maskz_permute_ps (mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512d = _mm512_shuffle_pd (m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512d = _mm512_mask_shuffle_pd (m512d, mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512d = _mm512_maskz_shuffle_pd (mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512 = _mm512_shuffle_ps (m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512 = _mm512_mask_shuffle_ps (m512, mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512 = _mm512_maskz_shuffle_ps (mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512d = _mm512_fixupimm_pd (m512d, m512d, m512i, 256); /* { dg-error "the immediate argument must be 8-bit immediate." } */
+ m512d = _mm512_mask_fixupimm_pd (m512d, mmask8, m512d, m512i, 256); /* { dg-error "the immediate argument must be 8-bit immediate." } */
+ m512d = _mm512_maskz_fixupimm_pd (mmask8, m512d, m512d, m512i, 256); /* { dg-error "the immediate argument must be 8-bit immediate." } */
+
+ m512 = _mm512_fixupimm_ps (m512, m512, m512i, 256); /* { dg-error "the immediate argument must be 8-bit immediate." } */
+ m512 = _mm512_mask_fixupimm_ps (m512, mmask16, m512, m512i, 256); /* { dg-error "the immediate argument must be 8-bit immediate." } */
+ m512 = _mm512_maskz_fixupimm_ps (mmask16, m512, m512, m512i, 256); /* { dg-error "the immediate argument must be 8-bit immediate." } */
+
+ m128d = _mm_fixupimm_sd (m128d, m128d, m128i, 256); /* { dg-error "the immediate argument must be 8-bit immediate." } */
+ m128d = _mm_mask_fixupimm_sd (m128d, mmask8, m128d, m128i, 256); /* { dg-error "the immediate argument must be 8-bit immediate." } */
+ m128d = _mm_maskz_fixupimm_sd (mmask8, m128d, m128d, m128i, 256); /* { dg-error "the immediate argument must be 8-bit immediate." } */
+
+ m128 = _mm_fixupimm_ss (m128, m128, m128i, 256); /* { dg-error "the immediate argument must be 8-bit immediate." } */
+ m128 = _mm_mask_fixupimm_ss (m128, mmask8, m128, m128i, 256); /* { dg-error "the immediate argument must be 8-bit immediate." } */
+ m128 = _mm_maskz_fixupimm_ss (mmask8, m128, m128, m128i, 256); /* { dg-error "the immediate argument must be 8-bit immediate." } */
+
+ m512i = _mm512_rol_epi32 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_mask_rol_epi32 (m512i, mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_maskz_rol_epi32 (mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512i = _mm512_ror_epi32 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_mask_ror_epi32 (m512i, mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_maskz_ror_epi32 (mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512i = _mm512_rol_epi64 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_mask_rol_epi64 (m512i, mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_maskz_rol_epi64 (mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512i = _mm512_ror_epi64 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_mask_ror_epi64 (m512i, mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_maskz_ror_epi64 (mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m256i = _mm512_cvtps_ph (m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m256i = _mm512_mask_cvtps_ph (m256i, mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m256i = _mm512_maskz_cvtps_ph (mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ m512d = _mm512_roundscale_pd (m512d, 256); /* { dg-error "the immediate argument must be 8-bit immediate" } */
+ m512d = _mm512_mask_roundscale_pd (m512d, mmask8, m512d, 256); /* { dg-error "the immediate argument must be 8-bit immediate" } */
+ m512d = _mm512_maskz_roundscale_pd (mmask8, m512d, 256); /* { dg-error "the immediate argument must be 8-bit immediate" } */
+
+ m512 = _mm512_roundscale_ps (m512, 256); /* { dg-error "the immediate argument must be 8-bit immediate" } */
+ m512 = _mm512_mask_roundscale_ps (m512, mmask16, m512, 256); /* { dg-error "the immediate argument must be 8-bit immediate" } */
+ m512 = _mm512_maskz_roundscale_ps (mmask16, m512, 256); /* { dg-error "the immediate argument must be 8-bit immediate" } */
+
+ m128d = _mm_roundscale_sd (m128d, m128d, 256); /* { dg-error "the immediate argument must be 8-bit immediate" } */
+ m128d = _mm_mask_roundscale_sd (m128d, mmask8, m128d, m128d, 256); /* { dg-error "the immediate argument must be 8-bit immediate" } */
+ m128d = _mm_maskz_roundscale_sd (mmask8, m128d, m128d, 256); /* { dg-error "the immediate argument must be 8-bit immediate" } */
+ m128 = _mm_roundscale_ss (m128, m128, 256); /* { dg-error "the immediate argument must be 8-bit immediate" } */
+ m128 = _mm_mask_roundscale_ss (m128, mmask8, m128, m128, 256); /* { dg-error "the immediate argument must be 8-bit immediate" } */
+ m128 = _mm_maskz_roundscale_ss (mmask8, m128, m128, 256); /* { dg-error "the immediate argument must be 8-bit immediate" } */
+
+ m512i = _mm512_alignr_epi32 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_mask_alignr_epi32 (m512i, mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_maskz_alignr_epi32 (mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_alignr_epi64 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_mask_alignr_epi64 (m512i, mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+ m512i = _mm512_maskz_alignr_epi64 (mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
+
+ mmask8 = _mm512_cmp_epi64_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
+ mmask8 = _mm512_cmp_epi32_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
+ mmask8 = _mm512_cmp_epu64_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
+ mmask8 = _mm512_cmp_epu32_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
+ mmask8 = _mm512_cmp_pd_mask (m512d, m512d, 256); /* { dg-error "the immediate argument must be 5-bit immediate." } */
+ mmask8 = _mm512_cmp_ps_mask (m512, m512, 256); /* { dg-error "the immediate argument must be 5-bit immediate." } */
+ mmask8 = _mm512_mask_cmp_epi64_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
+ mmask8 = _mm512_mask_cmp_epi32_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
+ mmask8 = _mm512_mask_cmp_epu64_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
+ mmask8 = _mm512_mask_cmp_epu32_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
+ mmask8 = _mm512_mask_cmp_pd_mask (2, m512d, m512d, 256); /* { dg-error "the immediate argument must be 5-bit immediate." } */
+ mmask8 = _mm512_mask_cmp_ps_mask (2, m512, m512, 256); /* { dg-error "the immediate argument must be 5-bit immediate." } */
+ mmask8 = _mm_cmp_sd_mask (m128d, m128d, 256); /* { dg-error "the immediate argument must be 5-bit immediate." } */
+ mmask8 = _mm_cmp_ss_mask (m128, m128, 256); /* { dg-error "the immediate argument must be 5-bit immediate." } */
+ mmask8 = _mm_mask_cmp_sd_mask (1, m128d, m128d, 256); /* { dg-error "the immediate argument must be 5-bit immediate." } */
+ mmask8 = _mm_mask_cmp_ss_mask (1, m128, m128, 256); /* { dg-error "the immediate argument must be 5-bit immediate." } */
+}
+
+test1bit (void) {
+ m256d = _mm512_extractf64x4_pd (m512d, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
+ m256d = _mm512_mask_extractf64x4_pd (m256d, mmask8, m512d, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
+ m256d = _mm512_maskz_extractf64x4_pd (mmask8, m512d, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
+
+ m256i = _mm512_extracti64x4_epi64 (m512i, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
+ m256i = _mm512_mask_extracti64x4_epi64 (m256i, mmask8, m512i, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
+ m256i = _mm512_maskz_extracti64x4_epi64 (mmask8, m512i, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
+
+ m512d = _mm512_insertf64x4 (m512d, m256d, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
+ m512d = _mm512_mask_insertf64x4 (m512d, mmask8, m512d, m256d, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
+ m512d = _mm512_maskz_insertf64x4 (mmask8, m512d, m256d, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
+
+ m512i = _mm512_inserti64x4 (m512i, m256i, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
+ m512i = _mm512_mask_inserti64x4 (m512i, mmask8, m512i, m256i, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
+ m512i = _mm512_maskz_inserti64x4 (mmask8, m512i, m256i, 256); /* { dg-error "the last argument must be a 1-bit immediate" } */
+}
+
+test2bit (void) {
+ m128 = _mm512_extractf32x4_ps(m512, 256); /* { dg-error "the last argument must be a 2-bit immediate" } */
+ m128 = _mm512_mask_extractf32x4_ps(m128, mmask8, m512, 256); /* { dg-error "the last argument must be a 2-bit immediate" } */
+ m128 = _mm512_maskz_extractf32x4_ps(mmask8, m512, 256); /* { dg-error "the last argument must be a 2-bit immediate" } */
+
+ m128i = _mm512_extracti32x4_epi32 (m512i, 256); /* { dg-error "the last argument must be a 2-bit immediate" } */
+ m128i = _mm512_mask_extracti32x4_epi32 (m128i, mmask8, m512i, 256); /* { dg-error "the last argument must be a 2-bit immediate" } */
+ m128i = _mm512_maskz_extracti32x4_epi32 (mmask8, m512i, 256); /* { dg-error "the last argument must be a 2-bit immediate" } */
+
+ m512 = _mm512_insertf32x4 (m512, m128, 256); /* { dg-error "the last argument must be a 2-bit immediate" } */
+ m512 = _mm512_mask_insertf32x4 (m512, mmask16, m512, m128, 256); /* { dg-error "the last argument must be a 2-bit immediate" } */
+ m512 = _mm512_maskz_insertf32x4 (mmask16, m512, m128, 256); /* { dg-error "the last argument must be a 2-bit immediate" } */
+
+ m512i = _mm512_inserti32x4 (m512i, m128i, 256); /* { dg-error "the last argument must be a 2-bit immediate" } */
+ m512i = _mm512_mask_inserti32x4 (m512i, mmask16, m512i, m128i, 256); /* { dg-error "the last argument must be a 2-bit immediate" } */
+ m512i = _mm512_maskz_inserti32x4 (mmask16, m512i, m128i, 256); /* { dg-error "the last argument must be a 2-bit immediate" } */
+}
+
+test4bit (void) {
+ m512d = _mm512_getmant_pd (m512d, 1, 64); /* { dg-error "the immediate argument must be 4-bit immediate." } */
+ m512d = _mm512_mask_getmant_pd (m512d, mmask8, m512d, 1, 64); /* { dg-error "the immediate argument must be 4-bit immediate." } */
+ m512d = _mm512_maskz_getmant_pd (mmask8, m512d, 1, 64); /* { dg-error "the immediate argument must be 4-bit immediate." } */
+
+ m512 = _mm512_getmant_ps (m512, 1, 64); /* { dg-error "the immediate argument must be 4-bit immediate." } */
+ m512 = _mm512_mask_getmant_ps (m512, mmask16, m512, 1, 64); /* { dg-error "the immediate argument must be 4-bit immediate." } */
+ m512 = _mm512_maskz_getmant_ps (mmask16, m512, 1, 64); /* { dg-error "the immediate argument must be 4-bit immediate." } */
+
+ m128d = _mm_getmant_sd (m128d, m128d, 1, 64); /* { dg-error "the immediate argument must be 4-bit immediate." } */
+ m128d = _mm_mask_getmant_sd (m128d, mmask8, m128d, m128d, 1, 64); /* { dg-error "the immediate argument must be 4-bit immediate." } */
+ m128d = _mm_maskz_getmant_sd (mmask8, m128d, m128d, 1, 64); /* { dg-error "the immediate argument must be 4-bit immediate." } */
+
+ m128 = _mm_getmant_ss (m128, m128, 1, 64); /* { dg-error "the immediate argument must be 4-bit immediate." } */
+ m128 = _mm_mask_getmant_ss (m128, mmask8, m128, m128, 1, 64); /* { dg-error "the immediate argument must be 4-bit immediate." } */
+ m128 = _mm_maskz_getmant_ss (mmask8, m128, m128, 1, 64); /* { dg-error "the immediate argument must be 4-bit immediate." } */
+}
diff --git a/gcc/testsuite/gcc.target/i386/testround-1.c b/gcc/testsuite/gcc.target/i386/testround-1.c
new file mode 100644
index 00000000000..3730c529f86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/testround-1.c
@@ -0,0 +1,653 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -mavx512f" } */
+
+#include <x86intrin.h>
+
+int i;
+unsigned int ui;
+__m512i m512i;
+__m512d m512d;
+__m512 m512;
+__m256i m256i;
+__m256 m256;
+__m128i m128i;
+__m128d m128d;
+__m128 m128;
+__mmask8 mmask8;
+__mmask16 mmask16;
+
+void
+test_round (void)
+{
+ m128d = _mm_add_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_add_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_add_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_add_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_add_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_add_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_sub_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_sub_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_sub_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_sub_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_sub_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_sub_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m512d = _mm512_sqrt_round_pd (m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_sqrt_round_pd (m512d, mmask8, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_sqrt_round_pd (mmask8, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_sqrt_round_ps (m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_sqrt_round_ps (m512, mmask16, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_sqrt_round_ps (mmask16, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_sqrt_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_sqrt_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_sqrt_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_sqrt_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_sqrt_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_sqrt_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m512d = _mm512_add_round_pd (m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_add_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_add_round_pd (mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_add_round_ps (m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_add_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_add_round_ps (mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_sub_round_pd (m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_sub_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_sub_round_pd (mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_sub_round_ps (m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_sub_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_sub_round_ps (mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m512d = _mm512_mul_round_pd (m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_mul_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_mul_round_pd (mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mul_round_ps (m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_mul_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_mul_round_ps (mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_div_round_pd (m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_div_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_div_round_pd (mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_div_round_ps (m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_div_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_div_round_ps (mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mul_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_mul_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_mul_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mul_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_mul_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_mul_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_div_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_div_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_div_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_div_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_div_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_div_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m512d = _mm512_scalef_round_pd(m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_scalef_round_pd(m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_scalef_round_pd(mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_scalef_round_ps(m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_scalef_round_ps(m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_scalef_round_ps(mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_scalef_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_scalef_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_scalef_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_scalef_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_scalef_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_scalef_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m512d = _mm512_fmadd_round_pd (m512d, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_fmadd_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask3_fmadd_round_pd (m512d, m512d, m512d, mmask8, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_fmadd_round_pd (mmask8, m512d, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_fmadd_round_ps (m512, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_fmadd_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask3_fmadd_round_ps (m512, m512, m512, mmask16, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_fmadd_round_ps (mmask16, m512, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_fmsub_round_pd (m512d, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_fmsub_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask3_fmsub_round_pd (m512d, m512d, m512d, mmask8, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_fmsub_round_pd (mmask8, m512d, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_fmsub_round_ps (m512, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_fmsub_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask3_fmsub_round_ps (m512, m512, m512, mmask16, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_fmsub_round_ps (mmask16, m512, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_fmaddsub_round_pd (m512d, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_fmaddsub_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask3_fmaddsub_round_pd (m512d, m512d, m512d, mmask8, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_fmaddsub_round_pd (mmask8, m512d, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_fmaddsub_round_ps (m512, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_fmaddsub_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask3_fmaddsub_round_ps (m512, m512, m512, mmask16, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_fmaddsub_round_ps (mmask16, m512, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_fmsubadd_round_pd (m512d, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_fmsubadd_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask3_fmsubadd_round_pd (m512d, m512d, m512d, mmask8, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_fmsubadd_round_pd (mmask8, m512d, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_fmsubadd_round_ps (m512, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_fmsubadd_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask3_fmsubadd_round_ps (m512, m512, m512, mmask16, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_fmsubadd_round_ps (mmask16, m512, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_fnmadd_round_pd (m512d, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_fnmadd_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask3_fnmadd_round_pd (m512d, m512d, m512d, mmask8, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_fnmadd_round_pd (mmask8, m512d, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_fnmadd_round_ps (m512, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_fnmadd_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask3_fnmadd_round_ps (m512, m512, m512, mmask16, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_fnmadd_round_ps (mmask16, m512, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_fnmsub_round_pd (m512d, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_fnmsub_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask3_fnmsub_round_pd (m512d, m512d, m512d, mmask8, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_fnmsub_round_pd (mmask8, m512d, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_fnmsub_round_ps (m512, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_fnmsub_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask3_fnmsub_round_ps (m512, m512, m512, mmask16, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_fnmsub_round_ps (mmask16, m512, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m256i = _mm512_cvt_roundpd_epi32 (m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_mask_cvt_roundpd_epi32 (m256i, mmask8, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_maskz_cvt_roundpd_epi32 (mmask8, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_cvt_roundpd_epu32 (m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_mask_cvt_roundpd_epu32 (m256i, mmask8, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_maskz_cvt_roundpd_epu32 (mmask8, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m512i = _mm512_cvt_roundps_epi32 (m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_mask_cvt_roundps_epi32 (m512i, mmask16, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_maskz_cvt_roundps_epi32 (mmask16, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_cvt_roundps_epu32 (m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_mask_cvt_roundps_epu32 (m512i, mmask16, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_maskz_cvt_roundps_epu32 (mmask16, m512, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m128 = _mm_cvt_roundu32_ss (m128, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_cvt_roundi32_ss (m128, 4, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m512 = _mm512_cvt_roundepi32_ps (m512i, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_cvt_roundepi32_ps (m512, mmask16, m512i, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_cvt_roundepi32_ps (mmask16, m512i, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_cvt_roundepu32_ps (m512i, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_cvt_roundepu32_ps (m512, mmask16, m512i, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_cvt_roundepu32_ps (mmask16, m512i, 7); /* { dg-error "incorrect rounding operand." } */
+
+ ui = _mm_cvt_roundss_u32 (m128, 7); /* { dg-error "incorrect rounding operand." } */
+ i = _mm_cvt_roundss_i32 (m128, 7); /* { dg-error "incorrect rounding operand." } */
+
+ ui = _mm_cvt_roundsd_u32 (m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ i = _mm_cvt_roundsd_i32 (m128d, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m256 = _mm512_cvt_roundpd_ps (m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m256 = _mm512_mask_cvt_roundpd_ps (m256, mmask8, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m256 = _mm512_maskz_cvt_roundpd_ps (mmask8, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_cvt_roundsd_ss (m128, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_cvt_roundsd_ss (m128, mmask8, m128, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_cvt_roundsd_ss (mmask8, m128, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m128d = _mm_fmadd_round_sd (m128d, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_fmadd_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask3_fmadd_round_sd (m128d, m128d, m128d, mmask8, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_fmadd_round_sd (mmask8, m128d, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_fmadd_round_ss (m128, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_fmadd_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask3_fmadd_round_ss (m128, m128, m128, mmask8, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_fmadd_round_ss (mmask8, m128, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_fmsub_round_sd (m128d, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_fmsub_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask3_fmsub_round_sd (m128d, m128d, m128d, mmask8, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_fmsub_round_sd (mmask8, m128d, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_fmsub_round_ss (m128, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_fmsub_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask3_fmsub_round_ss (m128, m128, m128, mmask8, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_fmsub_round_ss (mmask8, m128, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_fnmadd_round_sd (m128d, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_fnmadd_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask3_fnmadd_round_sd (m128d, m128d, m128d, mmask8, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_fnmadd_round_sd (mmask8, m128d, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_fnmadd_round_ss (m128, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_fnmadd_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask3_fnmadd_round_ss (m128, m128, m128, mmask8, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_fnmadd_round_ss (mmask8, m128, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_fnmsub_round_sd (m128d, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_fnmsub_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask3_fnmsub_round_sd (m128d, m128d, m128d, mmask8, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_fnmsub_round_sd (mmask8, m128d, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_fnmsub_round_ss (m128, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_fnmsub_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask3_fnmsub_round_ss (m128, m128, m128, mmask8, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_fnmsub_round_ss (mmask8, m128, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m512d = _mm512_max_round_pd (m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_max_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_max_round_pd (mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_max_round_ps (m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_max_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_max_round_ps (mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_min_round_pd (m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_min_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_min_round_pd (mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_min_round_ps (m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_min_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_min_round_ps (mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m256i = _mm512_cvtt_roundpd_epi32 (m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_mask_cvtt_roundpd_epi32 (m256i, mmask8, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_maskz_cvtt_roundpd_epi32 (mmask8, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_cvtt_roundpd_epu32 (m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_mask_cvtt_roundpd_epu32 (m256i, mmask8, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_maskz_cvtt_roundpd_epu32 (mmask8, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m512i = _mm512_cvtt_roundps_epi32 (m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_mask_cvtt_roundps_epi32 (m512i, mmask16, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_maskz_cvtt_roundps_epi32 (mmask16, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_cvtt_roundps_epu32 (m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_mask_cvtt_roundps_epu32 (m512i, mmask16, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_maskz_cvtt_roundps_epu32 (mmask16, m512, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m512d = _mm512_fixupimm_round_pd (m512d, m512d, m512i, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_fixupimm_round_pd (m512d, mmask8, m512d, m512i, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_fixupimm_round_pd (mmask8, m512d, m512d, m512i, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_fixupimm_round_ps (m512, m512, m512i, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_fixupimm_round_ps (m512, mmask16, m512, m512i, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_fixupimm_round_ps (mmask16, m512, m512, m512i, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_fixupimm_round_sd (m128d, m128d, m128i, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_fixupimm_round_sd (m128d, mmask8, m128d, m128i, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_fixupimm_round_sd (mmask8, m128d, m128d, m128i, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_fixupimm_round_ss (m128, m128, m128i, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_fixupimm_round_ss (m128, mmask8, m128, m128i, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_fixupimm_round_ss (mmask8, m128, m128, m128i, 4, 7); /* { dg-error "incorrect rounding operand." } */
+
+ ui = _mm_cvtt_roundss_u32 (m128, 7); /* { dg-error "incorrect rounding operand." } */
+ i = _mm_cvtt_roundss_i32 (m128, 7); /* { dg-error "incorrect rounding operand." } */
+
+ ui = _mm_cvtt_roundsd_u32 (m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ i = _mm_cvtt_roundsd_i32 (m128d, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m512d = _mm512_cvt_roundps_pd (m256, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_cvt_roundps_pd (m512d, mmask8, m256, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_cvt_roundps_pd (mmask8, m256, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_cvt_roundph_ps (m256i, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_cvt_roundph_ps (m512, mmask16, m256i, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_cvt_roundph_ps (mmask16, m256i, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m128d = _mm_cvt_roundss_sd (m128d, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_cvt_roundss_sd (m128d, mmask8, m128d, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_cvt_roundss_sd (mmask8, m128d, m128, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m128 = _mm_getexp_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_getexp_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_getexp_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_getexp_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_getexp_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_getexp_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_getexp_round_ps (m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_getexp_round_ps (m512, mmask16, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_getexp_round_ps (mmask16, m512, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_getexp_round_pd (m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_getexp_round_pd (m512d, mmask8, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_getexp_round_pd (mmask8, m512d, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_getmant_round_pd (m512d, 0, 0, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_getmant_round_pd (m512d, mmask8, m512d, 0, 0, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_getmant_round_pd (mmask8, m512d, 0, 0, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_getmant_round_ps (m512, 0, 0, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_getmant_round_ps (m512, mmask16, m512, 0, 0, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_getmant_round_ps (mmask16, m512, 0, 0, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_getmant_round_sd (m128d, m128d, 0, 0, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_getmant_round_sd (m128d, mmask8, m128d, m128d, 0, 0, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_getmant_round_sd (mmask8, m128d, m128d, 0, 0, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_getmant_round_ss (m128, m128, 0, 0, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_getmant_round_ss (m128, mmask8, m128, m128, 0, 0, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_getmant_round_ss (mmask8, m128, m128, 0, 0, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m512 = _mm512_roundscale_round_ps (m512, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_roundscale_round_ps (m512, mmask16, m512, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_roundscale_round_ps (mmask16, m512, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_roundscale_round_pd (m512d, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_roundscale_round_pd (m512d, mmask8, m512d, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_roundscale_round_pd (mmask8, m512d, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_roundscale_round_ss (m128, m128, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_roundscale_round_ss (m128, mmask8, m128, m128, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_roundscale_round_ss (mmask8, m128, m128, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_roundscale_round_sd (m128d, m128d, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_roundscale_round_sd (m128d, mmask8, m128d, m128d, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_roundscale_round_sd (mmask8, m128d, m128d, 4, 7); /* { dg-error "incorrect rounding operand." } */
+
+ mmask8 = _mm512_cmp_round_pd_mask (m512d, m512d, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ mmask16 = _mm512_cmp_round_ps_mask (m512, m512, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ mmask8 = _mm512_mask_cmp_round_pd_mask (mmask8, m512d, m512d, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ mmask16 = _mm512_mask_cmp_round_ps_mask (mmask16, m512, m512, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ mmask8 = _mm_cmp_round_sd_mask (m128d, m128d, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ mmask8 = _mm_mask_cmp_round_sd_mask (mmask8, m128d, m128d, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ mmask16 = _mm_cmp_round_ss_mask (m128, m128, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ mmask16 = _mm_mask_cmp_round_ss_mask (mmask8, m128, m128, 4, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m128 = _mm_maskz_min_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_min_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_min_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_min_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_max_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_max_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_max_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_max_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand." } */
+
+ i = _mm_comi_round_ss (m128, m128, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ i = _mm_comi_round_sd (m128d, m128d, 4, 7); /* { dg-error "incorrect rounding operand." } */
+}
+
+void
+test_round_sae (void)
+{
+ m128d = _mm_add_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_add_round_sd (m128d, mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_add_round_sd (mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_add_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_add_round_ss (m128, mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_add_round_ss (mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_sub_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_sub_round_sd (m128d, mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_sub_round_sd (mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_sub_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_sub_round_ss (m128, mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_sub_round_ss (mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+
+ m512d = _mm512_sqrt_round_pd (m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_sqrt_round_pd (m512d, mmask8, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_sqrt_round_pd (mmask8, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_sqrt_round_ps (m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_sqrt_round_ps (m512, mmask16, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_sqrt_round_ps (mmask16, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_sqrt_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_sqrt_round_sd (m128d, mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_sqrt_round_sd (mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_sqrt_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_sqrt_round_ss (m128, mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_sqrt_round_ss (mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+
+ m512d = _mm512_add_round_pd (m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_add_round_pd (m512d, mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_add_round_pd (mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_add_round_ps (m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_add_round_ps (m512, mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_add_round_ps (mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_sub_round_pd (m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_sub_round_pd (m512d, mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_sub_round_pd (mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_sub_round_ps (m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_sub_round_ps (m512, mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_sub_round_ps (mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+
+ m512d = _mm512_mul_round_pd (m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_mul_round_pd (m512d, mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_mul_round_pd (mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mul_round_ps (m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_mul_round_ps (m512, mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_mul_round_ps (mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_div_round_pd (m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_div_round_pd (m512d, mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_div_round_pd (mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_div_round_ps (m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_div_round_ps (m512, mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_div_round_ps (mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mul_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_mul_round_sd (m128d, mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_mul_round_sd (mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mul_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_mul_round_ss (m128, mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_mul_round_ss (mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_div_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_div_round_sd (m128d, mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_div_round_sd (mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_div_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_div_round_ss (m128, mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_div_round_ss (mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+
+ m512d = _mm512_scalef_round_pd(m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_scalef_round_pd(m512d, mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_scalef_round_pd(mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_scalef_round_ps(m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_scalef_round_ps(m512, mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_scalef_round_ps(mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_scalef_round_sd (m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_scalef_round_sd (m128d, mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_scalef_round_sd (mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_scalef_round_ss (m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_scalef_round_ss (m128, mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_scalef_round_ss (mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+
+ m512d = _mm512_fmadd_round_pd (m512d, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_fmadd_round_pd (m512d, mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask3_fmadd_round_pd (m512d, m512d, m512d, mmask8, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_fmadd_round_pd (mmask8, m512d, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_fmadd_round_ps (m512, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_fmadd_round_ps (m512, mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask3_fmadd_round_ps (m512, m512, m512, mmask16, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_fmadd_round_ps (mmask16, m512, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_fmsub_round_pd (m512d, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_fmsub_round_pd (m512d, mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask3_fmsub_round_pd (m512d, m512d, m512d, mmask8, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_fmsub_round_pd (mmask8, m512d, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_fmsub_round_ps (m512, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_fmsub_round_ps (m512, mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask3_fmsub_round_ps (m512, m512, m512, mmask16, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_fmsub_round_ps (mmask16, m512, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_fmaddsub_round_pd (m512d, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_fmaddsub_round_pd (m512d, mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask3_fmaddsub_round_pd (m512d, m512d, m512d, mmask8, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_fmaddsub_round_pd (mmask8, m512d, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_fmaddsub_round_ps (m512, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_fmaddsub_round_ps (m512, mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask3_fmaddsub_round_ps (m512, m512, m512, mmask16, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_fmaddsub_round_ps (mmask16, m512, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_fmsubadd_round_pd (m512d, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_fmsubadd_round_pd (m512d, mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask3_fmsubadd_round_pd (m512d, m512d, m512d, mmask8, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_fmsubadd_round_pd (mmask8, m512d, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_fmsubadd_round_ps (m512, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_fmsubadd_round_ps (m512, mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask3_fmsubadd_round_ps (m512, m512, m512, mmask16, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_fmsubadd_round_ps (mmask16, m512, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_fnmadd_round_pd (m512d, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_fnmadd_round_pd (m512d, mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask3_fnmadd_round_pd (m512d, m512d, m512d, mmask8, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_fnmadd_round_pd (mmask8, m512d, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_fnmadd_round_ps (m512, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_fnmadd_round_ps (m512, mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask3_fnmadd_round_ps (m512, m512, m512, mmask16, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_fnmadd_round_ps (mmask16, m512, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_fnmsub_round_pd (m512d, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_fnmsub_round_pd (m512d, mmask8, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask3_fnmsub_round_pd (m512d, m512d, m512d, mmask8, 5); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_fnmsub_round_pd (mmask8, m512d, m512d, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_fnmsub_round_ps (m512, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_fnmsub_round_ps (m512, mmask16, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask3_fnmsub_round_ps (m512, m512, m512, mmask16, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_fnmsub_round_ps (mmask16, m512, m512, m512, 5); /* { dg-error "incorrect rounding operand." } */
+
+ m256i = _mm512_cvt_roundpd_epi32 (m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_mask_cvt_roundpd_epi32 (m256i, mmask8, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_maskz_cvt_roundpd_epi32 (mmask8, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_cvt_roundpd_epu32 (m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_mask_cvt_roundpd_epu32 (m256i, mmask8, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_maskz_cvt_roundpd_epu32 (mmask8, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+
+ m512i = _mm512_cvt_roundps_epi32 (m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_mask_cvt_roundps_epi32 (m512i, mmask16, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_maskz_cvt_roundps_epi32 (mmask16, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_cvt_roundps_epu32 (m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_mask_cvt_roundps_epu32 (m512i, mmask16, m512, 5); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_maskz_cvt_roundps_epu32 (mmask16, m512, 5); /* { dg-error "incorrect rounding operand." } */
+
+ m128 = _mm_cvt_roundu32_ss (m128, 4, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_cvt_roundi32_ss (m128, 4, 5); /* { dg-error "incorrect rounding operand." } */
+
+ m512 = _mm512_cvt_roundepi32_ps (m512i, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_cvt_roundepi32_ps (m512, mmask16, m512i, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_cvt_roundepi32_ps (mmask16, m512i, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_cvt_roundepu32_ps (m512i, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_cvt_roundepu32_ps (m512, mmask16, m512i, 5); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_cvt_roundepu32_ps (mmask16, m512i, 5); /* { dg-error "incorrect rounding operand." } */
+
+ ui = _mm_cvt_roundss_u32 (m128, 5); /* { dg-error "incorrect rounding operand." } */
+ i = _mm_cvt_roundss_i32 (m128, 5); /* { dg-error "incorrect rounding operand." } */
+
+ ui = _mm_cvt_roundsd_u32 (m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ i = _mm_cvt_roundsd_i32 (m128d, 5); /* { dg-error "incorrect rounding operand." } */
+
+ m256 = _mm512_cvt_roundpd_ps (m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m256 = _mm512_mask_cvt_roundpd_ps (m256, mmask8, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m256 = _mm512_maskz_cvt_roundpd_ps (mmask8, m512d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_cvt_roundsd_ss (m128, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_cvt_roundsd_ss (m128, mmask8, m128, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_cvt_roundsd_ss (mmask8, m128, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+
+ m128d = _mm_fmadd_round_sd (m128d, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_fmadd_round_sd (m128d, mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask3_fmadd_round_sd (m128d, m128d, m128d, mmask8, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_fmadd_round_sd (mmask8, m128d, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_fmadd_round_ss (m128, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_fmadd_round_ss (m128, mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask3_fmadd_round_ss (m128, m128, m128, mmask8, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_fmadd_round_ss (mmask8, m128, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_fmsub_round_sd (m128d, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_fmsub_round_sd (m128d, mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask3_fmsub_round_sd (m128d, m128d, m128d, mmask8, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_fmsub_round_sd (mmask8, m128d, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_fmsub_round_ss (m128, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_fmsub_round_ss (m128, mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask3_fmsub_round_ss (m128, m128, m128, mmask8, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_fmsub_round_ss (mmask8, m128, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_fnmadd_round_sd (m128d, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_fnmadd_round_sd (m128d, mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask3_fnmadd_round_sd (m128d, m128d, m128d, mmask8, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_fnmadd_round_sd (mmask8, m128d, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_fnmadd_round_ss (m128, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_fnmadd_round_ss (m128, mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask3_fnmadd_round_ss (m128, m128, m128, mmask8, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_fnmadd_round_ss (mmask8, m128, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_fnmsub_round_sd (m128d, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_fnmsub_round_sd (m128d, mmask8, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask3_fnmsub_round_sd (m128d, m128d, m128d, mmask8, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_fnmsub_round_sd (mmask8, m128d, m128d, m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_fnmsub_round_ss (m128, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_fnmsub_round_ss (m128, mmask8, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask3_fnmsub_round_ss (m128, m128, m128, mmask8, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_fnmsub_round_ss (mmask8, m128, m128, m128, 5); /* { dg-error "incorrect rounding operand." } */
+}
+
+void
+test_sae_only (void)
+{
+ m512d = _mm512_max_round_pd (m512d, m512d, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_max_round_pd (m512d, mmask8, m512d, m512d, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_max_round_pd (mmask8, m512d, m512d, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_max_round_ps (m512, m512, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_max_round_ps (m512, mmask16, m512, m512, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_max_round_ps (mmask16, m512, m512, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_min_round_pd (m512d, m512d, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_min_round_pd (m512d, mmask8, m512d, m512d, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_min_round_pd (mmask8, m512d, m512d, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_min_round_ps (m512, m512, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_min_round_ps (m512, mmask16, m512, m512, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_min_round_ps (mmask16, m512, m512, 3); /* { dg-error "incorrect rounding operand." } */
+
+ m256i = _mm512_cvtt_roundpd_epi32 (m512d, 3); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_mask_cvtt_roundpd_epi32 (m256i, mmask8, m512d, 3); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_maskz_cvtt_roundpd_epi32 (mmask8, m512d, 3); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_cvtt_roundpd_epu32 (m512d, 3); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_mask_cvtt_roundpd_epu32 (m256i, mmask8, m512d, 3); /* { dg-error "incorrect rounding operand." } */
+ m256i = _mm512_maskz_cvtt_roundpd_epu32 (mmask8, m512d, 3); /* { dg-error "incorrect rounding operand." } */
+
+ m512i = _mm512_cvtt_roundps_epi32 (m512, 3); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_mask_cvtt_roundps_epi32 (m512i, mmask16, m512, 3); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_maskz_cvtt_roundps_epi32 (mmask16, m512, 3); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_cvtt_roundps_epu32 (m512, 3); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_mask_cvtt_roundps_epu32 (m512i, mmask16, m512, 3); /* { dg-error "incorrect rounding operand." } */
+ m512i = _mm512_maskz_cvtt_roundps_epu32 (mmask16, m512, 3); /* { dg-error "incorrect rounding operand." } */
+
+ m512d = _mm512_fixupimm_round_pd (m512d, m512d, m512i, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_fixupimm_round_pd (m512d, mmask8, m512d, m512i, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_fixupimm_round_pd (mmask8, m512d, m512d, m512i, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_fixupimm_round_ps (m512, m512, m512i, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_fixupimm_round_ps (m512, mmask16, m512, m512i, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_fixupimm_round_ps (mmask16, m512, m512, m512i, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_fixupimm_round_sd (m128d, m128d, m128i, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_fixupimm_round_sd (m128d, mmask8, m128d, m128i, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_fixupimm_round_sd (mmask8, m128d, m128d, m128i, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_fixupimm_round_ss (m128, m128, m128i, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_fixupimm_round_ss (m128, mmask8, m128, m128i, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_fixupimm_round_ss (mmask8, m128, m128, m128i, 4, 3); /* { dg-error "incorrect rounding operand." } */
+
+ ui = _mm_cvtt_roundss_u32 (m128, 3); /* { dg-error "incorrect rounding operand." } */
+ i = _mm_cvtt_roundss_i32 (m128, 3); /* { dg-error "incorrect rounding operand." } */
+
+ ui = _mm_cvtt_roundsd_u32 (m128d, 3); /* { dg-error "incorrect rounding operand." } */
+ i = _mm_cvtt_roundsd_i32 (m128d, 3); /* { dg-error "incorrect rounding operand." } */
+
+ m512d = _mm512_cvt_roundps_pd (m256, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_cvt_roundps_pd (m512d, mmask8, m256, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_cvt_roundps_pd (mmask8, m256, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_cvt_roundph_ps (m256i, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_cvt_roundph_ps (m512, mmask16, m256i, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_cvt_roundph_ps (mmask16, m256i, 3); /* { dg-error "incorrect rounding operand." } */
+
+ m128d = _mm_cvt_roundss_sd (m128d, m128, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_cvt_roundss_sd (m128d, mmask8, m128d, m128, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_cvt_roundss_sd (mmask8, m128d, m128, 3); /* { dg-error "incorrect rounding operand." } */
+
+ m128 = _mm_getexp_round_ss (m128, m128, 3); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_getexp_round_ss (m128, mmask8, m128, m128, 3); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_getexp_round_ss (mmask8, m128, m128, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_getexp_round_sd (m128d, m128d, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_getexp_round_sd (m128d, mmask8, m128d, m128d, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_getexp_round_sd (mmask8, m128d, m128d, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_getexp_round_ps (m512, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_getexp_round_ps (m512, mmask16, m512, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_getexp_round_ps (mmask16, m512, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_getexp_round_pd (m512d, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_getexp_round_pd (m512d, mmask8, m512d, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_getexp_round_pd (mmask8, m512d, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_getmant_round_pd (m512d, 0, 0, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_getmant_round_pd (m512d, mmask8, m512d, 0, 0, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_getmant_round_pd (mmask8, m512d, 0, 0, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_getmant_round_ps (m512, 0, 0, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_getmant_round_ps (m512, mmask16, m512, 0, 0, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_getmant_round_ps (mmask16, m512, 0, 0, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_getmant_round_sd (m128d, m128d, 0, 0, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_getmant_round_sd (m128d, mmask8, m128d, m128d, 0, 0, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_getmant_round_sd (mmask8, m128d, m128d, 0, 0, 3); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_getmant_round_ss (m128, m128, 0, 0, 3); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_getmant_round_ss (m128, mmask8, m128, m128, 0, 0, 3); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_getmant_round_ss (mmask8, m128, m128, 0, 0, 3); /* { dg-error "incorrect rounding operand." } */
+
+ m512 = _mm512_roundscale_round_ps (m512, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_mask_roundscale_round_ps (m512, mmask16, m512, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m512 = _mm512_maskz_roundscale_round_ps (mmask16, m512, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_roundscale_round_pd (m512d, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_mask_roundscale_round_pd (m512d, mmask8, m512d, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m512d = _mm512_maskz_roundscale_round_pd (mmask8, m512d, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_roundscale_round_ss (m128, m128, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_roundscale_round_ss (m128, mmask8, m128, m128, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_roundscale_round_ss (mmask8, m128, m128, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_roundscale_round_sd (m128d, m128d, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_roundscale_round_sd (m128d, mmask8, m128d, m128d, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_roundscale_round_sd (mmask8, m128d, m128d, 4, 3); /* { dg-error "incorrect rounding operand." } */
+
+ mmask8 = _mm512_cmp_round_pd_mask (m512d, m512d, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ mmask16 = _mm512_cmp_round_ps_mask (m512, m512, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ mmask8 = _mm512_mask_cmp_round_pd_mask (mmask8, m512d, m512d, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ mmask16 = _mm512_mask_cmp_round_ps_mask (mmask16, m512, m512, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ mmask8 = _mm_cmp_round_sd_mask (m128d, m128d, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ mmask8 = _mm_mask_cmp_round_sd_mask (mmask8, m128d, m128d, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ mmask16 = _mm_cmp_round_ss_mask (m128, m128, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ mmask16 = _mm_mask_cmp_round_ss_mask (mmask8, m128, m128, 4, 3); /* { dg-error "incorrect rounding operand." } */
+
+ m128 = _mm_maskz_min_round_ss (mmask8, m128, m128, 3); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_min_round_ss (m128, mmask8, m128, m128, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_min_round_sd (mmask8, m128d, m128d, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_min_round_sd (m128d, mmask8, m128d, m128d, 3); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_maskz_max_round_ss (mmask8, m128, m128, 3); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_mask_max_round_ss (m128, mmask8, m128, m128, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_maskz_max_round_sd (mmask8, m128d, m128d, 3); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_mask_max_round_sd (m128d, mmask8, m128d, m128d, 3); /* { dg-error "incorrect rounding operand." } */
+
+ i = _mm_comi_round_ss (m128, m128, 4, 3); /* { dg-error "incorrect rounding operand." } */
+ i = _mm_comi_round_sd (m128d, m128d, 4, 3); /* { dg-error "incorrect rounding operand." } */
+}
diff --git a/gcc/testsuite/gcc.target/i386/testround-2.c b/gcc/testsuite/gcc.target/i386/testround-2.c
new file mode 100644
index 00000000000..0e7cff181ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/testround-2.c
@@ -0,0 +1,57 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-O0 -mavx512f" } */
+
+#include <x86intrin.h>
+
+long long l;
+unsigned long long ul;
+__m128d m128d;
+__m128 m128;
+
+void
+test_round_64 (void)
+{
+ m128d = _mm_cvt_roundu64_sd (m128d, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_cvt_roundi64_sd (m128d, 4, 7); /* { dg-error "incorrect rounding operand." } */
+
+ m128 = _mm_cvt_roundu64_ss (m128, 4, 7); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_cvt_roundi64_ss (m128, 4, 7); /* { dg-error "incorrect rounding operand." } */
+
+ ul = _mm_cvt_roundss_u64 (m128, 7); /* { dg-error "incorrect rounding operand." } */
+ l = _mm_cvt_roundss_i64 (m128, 7); /* { dg-error "incorrect rounding operand." } */
+
+ ul = _mm_cvt_roundsd_u64 (m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ l = _mm_cvt_roundsd_i64 (m128d, 7); /* { dg-error "incorrect rounding operand." } */
+
+ ul = _mm_cvtt_roundss_u64 (m128, 7); /* { dg-error "incorrect rounding operand." } */
+ l = _mm_cvtt_roundss_i64 (m128, 7); /* { dg-error "incorrect rounding operand." } */
+
+ ul = _mm_cvtt_roundsd_u64 (m128d, 7); /* { dg-error "incorrect rounding operand." } */
+ l = _mm_cvtt_roundsd_i64 (m128d, 7); /* { dg-error "incorrect rounding operand." } */
+}
+
+void
+test_round_sae_64 (void)
+{
+ m128d = _mm_cvt_roundu64_sd (m128d, 4, 5); /* { dg-error "incorrect rounding operand." } */
+ m128d = _mm_cvt_roundi64_sd (m128d, 4, 5); /* { dg-error "incorrect rounding operand." } */
+
+ m128 = _mm_cvt_roundu64_ss (m128, 4, 5); /* { dg-error "incorrect rounding operand." } */
+ m128 = _mm_cvt_roundi64_ss (m128, 4, 5); /* { dg-error "incorrect rounding operand." } */
+
+ ul = _mm_cvt_roundss_u64 (m128, 5); /* { dg-error "incorrect rounding operand." } */
+ l = _mm_cvt_roundss_i64 (m128, 5); /* { dg-error "incorrect rounding operand." } */
+
+ ul = _mm_cvt_roundsd_u64 (m128d, 5); /* { dg-error "incorrect rounding operand." } */
+ l = _mm_cvt_roundsd_i64 (m128d, 5); /* { dg-error "incorrect rounding operand." } */
+}
+
+void
+test_sae_only_64 (void)
+{
+ ul = _mm_cvtt_roundss_u64 (m128, 3); /* { dg-error "incorrect rounding operand." } */
+ l = _mm_cvtt_roundss_i64 (m128, 3); /* { dg-error "incorrect rounding operand." } */
+
+ ul = _mm_cvtt_roundsd_u64 (m128d, 3); /* { dg-error "incorrect rounding operand." } */
+ l = _mm_cvtt_roundsd_i64 (m128d, 3); /* { dg-error "incorrect rounding operand." } */
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/avx512f/abi-avx512f.exp b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/abi-avx512f.exp
new file mode 100644
index 00000000000..02143bef028
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/abi-avx512f.exp
@@ -0,0 +1,61 @@
+# Copyright (C) 2009-2013 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+# The x86-64 AVX512F ABI testsuite needs one additional assembler file for most
+# testcases. For simplicity we will just link it into each test.
+
+load_lib c-torture.exp
+load_lib target-supports.exp
+load_lib torture-options.exp
+
+if { (![istarget x86_64-*-*] && ![istarget i?86-*-*])
+ || ![is-effective-target lp64]
+ || ![is-effective-target avx512f] } then {
+ return
+}
+
+
+# If the linker used understands -M <mapfile>, pass it to clear hardware
+# capabilities set by the Sun assembler.
+set flags ""
+set clearcap_ldflags "-Wl,-M,$srcdir/gcc.target/i386/clearcap.map"
+
+if [check_no_compiler_messages mapfile executable {
+ int main (void) { return 0; }
+ } $clearcap_ldflags ] {
+ set flags $clearcap_ldflags
+}
+
+torture-init
+set-torture-options $C_TORTURE_OPTIONS
+set additional_flags "-W -Wall -mavx512f $flags"
+
+foreach src [lsort [glob -nocomplain $srcdir/$subdir/test_*.c]] {
+ if {[runtest_file_p $runtests $src]} {
+ if { ([istarget *-*-darwin*]) } then {
+ # FIXME: Darwin isn't tested.
+ c-torture-execute [list $src \
+ $srcdir/$subdir/asm-support-darwin.s] \
+ $additional_flags
+ } else {
+ c-torture-execute [list $src \
+ $srcdir/$subdir/asm-support.S] \
+ $additional_flags
+ }
+ }
+}
+
+torture-finish
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/avx512f/args.h b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/args.h
new file mode 100644
index 00000000000..5e3b265ecea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/args.h
@@ -0,0 +1,184 @@
+#ifndef INCLUDED_ARGS_H
+#define INCLUDED_ARGS_H
+
+#include <immintrin.h>
+#include <string.h>
+
+/* Assertion macro. */
+#define assert(test) if (!(test)) abort()
+
+#ifdef __GNUC__
+#define ATTRIBUTE_UNUSED __attribute__((__unused__))
+#else
+#define ATTRIBUTE_UNUSED
+#endif
+
+/* This defines the calling sequences for integers and floats. */
+#define I0 rdi
+#define I1 rsi
+#define I2 rdx
+#define I3 rcx
+#define I4 r8
+#define I5 r9
+#define F0 zmm0
+#define F1 zmm1
+#define F2 zmm2
+#define F3 zmm3
+#define F4 zmm4
+#define F5 zmm5
+#define F6 zmm6
+#define F7 zmm7
+
+typedef union {
+ float _float[16];
+ double _double[8];
+ long _long[8];
+ int _int[16];
+ unsigned long _ulong[8];
+ __m64 _m64[8];
+ __m128 _m128[4];
+ __m256 _m256[2];
+ __m512 _m512[1];
+} ZMM_T;
+
+typedef union {
+ float _float;
+ double _double;
+ long double _ldouble;
+ unsigned long _ulong[2];
+} X87_T;
+extern void (*callthis)(void);
+extern unsigned long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
+ZMM_T zmm_regs[32];
+X87_T x87_regs[8];
+extern volatile unsigned long volatile_var;
+extern void snapshot (void);
+extern void snapshot_ret (void);
+#define WRAP_CALL(N) \
+ (callthis = (void (*)()) (N), (typeof (&N)) snapshot)
+#define WRAP_RET(N) \
+ (callthis = (void (*)()) (N), (typeof (&N)) snapshot_ret)
+
+/* Clear all integer registers. */
+#define clear_int_hardware_registers \
+ asm __volatile__ ("xor %%rax, %%rax\n\t" \
+ "xor %%rbx, %%rbx\n\t" \
+ "xor %%rcx, %%rcx\n\t" \
+ "xor %%rdx, %%rdx\n\t" \
+ "xor %%rsi, %%rsi\n\t" \
+ "xor %%rdi, %%rdi\n\t" \
+ "xor %%r8, %%r8\n\t" \
+ "xor %%r9, %%r9\n\t" \
+ "xor %%r10, %%r10\n\t" \
+ "xor %%r11, %%r11\n\t" \
+ "xor %%r12, %%r12\n\t" \
+ "xor %%r13, %%r13\n\t" \
+ "xor %%r14, %%r14\n\t" \
+ "xor %%r15, %%r15\n\t" \
+ ::: "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", \
+ "r9", "r10", "r11", "r12", "r13", "r14", "r15");
+
+/* This is the list of registers available for passing arguments. Not all of
+ these are used or even really available. */
+struct IntegerRegisters
+{
+ unsigned long rax, rbx, rcx, rdx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
+};
+struct FloatRegisters
+{
+ double mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
+ long double st0, st1, st2, st3, st4, st5, st6, st7;
+ ZMM_T zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7, zmm8, zmm9,
+ zmm10, zmm11, zmm12, zmm13, zmm14, zmm15, zmm16, zmm17, zmm18,
+ zmm19, zmm20, zmm21, zmm22, zmm23, zmm24, zmm25, zmm26, zmm27,
+ zmm28, zmm29, zmm30, zmm31;
+};
+
+/* Implemented in scalarargs.c */
+extern struct IntegerRegisters iregs;
+extern struct FloatRegisters fregs;
+extern unsigned int num_iregs, num_fregs;
+
+#define check_int_arguments do { \
+ assert (num_iregs <= 0 || iregs.I0 == I0); \
+ assert (num_iregs <= 1 || iregs.I1 == I1); \
+ assert (num_iregs <= 2 || iregs.I2 == I2); \
+ assert (num_iregs <= 3 || iregs.I3 == I3); \
+ assert (num_iregs <= 4 || iregs.I4 == I4); \
+ assert (num_iregs <= 5 || iregs.I5 == I5); \
+ } while (0)
+
+#define check_char_arguments check_int_arguments
+#define check_short_arguments check_int_arguments
+#define check_long_arguments check_int_arguments
+
+/* Clear register struct. */
+#define clear_struct_registers \
+ rax = rbx = rcx = rdx = rdi = rsi = rbp = rsp \
+ = r8 = r9 = r10 = r11 = r12 = r13 = r14 = r15 = 0; \
+ memset (&iregs, 0, sizeof (iregs)); \
+ memset (&fregs, 0, sizeof (fregs)); \
+ memset (zmm_regs, 0, sizeof (zmm_regs)); \
+ memset (x87_regs, 0, sizeof (x87_regs));
+
+/* Clear both hardware and register structs for integers. */
+#define clear_int_registers \
+ clear_struct_registers \
+ clear_int_hardware_registers
+
+/* TODO: Do the checking. */
+#define check_f_arguments(T) do { \
+ assert (num_fregs <= 0 || fregs.zmm0._ ## T [0] == zmm_regs[0]._ ## T [0]); \
+ assert (num_fregs <= 1 || fregs.zmm1._ ## T [0] == zmm_regs[1]._ ## T [0]); \
+ assert (num_fregs <= 2 || fregs.zmm2._ ## T [0] == zmm_regs[2]._ ## T [0]); \
+ assert (num_fregs <= 3 || fregs.zmm3._ ## T [0] == zmm_regs[3]._ ## T [0]); \
+ assert (num_fregs <= 4 || fregs.zmm4._ ## T [0] == zmm_regs[4]._ ## T [0]); \
+ assert (num_fregs <= 5 || fregs.zmm5._ ## T [0] == zmm_regs[5]._ ## T [0]); \
+ assert (num_fregs <= 6 || fregs.zmm6._ ## T [0] == zmm_regs[6]._ ## T [0]); \
+ assert (num_fregs <= 7 || fregs.zmm7._ ## T [0] == zmm_regs[7]._ ## T [0]); \
+ } while (0)
+
+#define check_float_arguments check_f_arguments(float)
+#define check_double_arguments check_f_arguments(double)
+
+#define check_vector_arguments(T,O) do { \
+ assert (num_fregs <= 0 \
+ || memcmp (((char *) &fregs.zmm0) + (O), \
+ &zmm_regs[0], \
+ sizeof (__ ## T) - (O)) == 0); \
+ assert (num_fregs <= 1 \
+ || memcmp (((char *) &fregs.zmm1) + (O), \
+ &zmm_regs[1], \
+ sizeof (__ ## T) - (O)) == 0); \
+ assert (num_fregs <= 2 \
+ || memcmp (((char *) &fregs.zmm2) + (O), \
+ &zmm_regs[2], \
+ sizeof (__ ## T) - (O)) == 0); \
+ assert (num_fregs <= 3 \
+ || memcmp (((char *) &fregs.zmm3) + (O), \
+ &zmm_regs[3], \
+ sizeof (__ ## T) - (O)) == 0); \
+ assert (num_fregs <= 4 \
+ || memcmp (((char *) &fregs.zmm4) + (O), \
+ &zmm_regs[4], \
+ sizeof (__ ## T) - (O)) == 0); \
+ assert (num_fregs <= 5 \
+ || memcmp (((char *) &fregs.zmm5) + (O), \
+ &zmm_regs[5], \
+ sizeof (__ ## T) - (O)) == 0); \
+ assert (num_fregs <= 6 \
+ || memcmp (((char *) &fregs.zmm6) + (O), \
+ &zmm_regs[6], \
+ sizeof (__ ## T) - (O)) == 0); \
+ assert (num_fregs <= 7 \
+ || memcmp (((char *) &fregs.zmm7) + (O), \
+ &zmm_regs[7], \
+ sizeof (__ ## T) - (O)) == 0); \
+ } while (0)
+
+#define check_m64_arguments check_vector_arguments(m64, 0)
+#define check_m128_arguments check_vector_arguments(m128, 0)
+#define check_m256_arguments check_vector_arguments(m256, 0)
+#define check_m512_arguments check_vector_arguments(m512, 0)
+
+#endif /* INCLUDED_ARGS_H */
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/avx512f/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/asm-support.S
new file mode 100644
index 00000000000..e0309aeac12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/asm-support.S
@@ -0,0 +1,98 @@
+ .file "snapshot.S"
+ .text
+ .p2align 4,,15
+.globl snapshot
+ .type snapshot, @function
+snapshot:
+.LFB3:
+ movq %rax, rax(%rip)
+ movq %rbx, rbx(%rip)
+ movq %rcx, rcx(%rip)
+ movq %rdx, rdx(%rip)
+ movq %rdi, rdi(%rip)
+ movq %rsi, rsi(%rip)
+ movq %rbp, rbp(%rip)
+ movq %rsp, rsp(%rip)
+ movq %r8, r8(%rip)
+ movq %r9, r9(%rip)
+ movq %r10, r10(%rip)
+ movq %r11, r11(%rip)
+ movq %r12, r12(%rip)
+ movq %r13, r13(%rip)
+ movq %r14, r14(%rip)
+ movq %r15, r15(%rip)
+ vmovdqu32 %zmm0, zmm_regs+0(%rip)
+ vmovdqu32 %zmm1, zmm_regs+64(%rip)
+ vmovdqu32 %zmm2, zmm_regs+128(%rip)
+ vmovdqu32 %zmm3, zmm_regs+192(%rip)
+ vmovdqu32 %zmm4, zmm_regs+256(%rip)
+ vmovdqu32 %zmm5, zmm_regs+320(%rip)
+ vmovdqu32 %zmm6, zmm_regs+384(%rip)
+ vmovdqu32 %zmm7, zmm_regs+448(%rip)
+ vmovdqu32 %zmm8, zmm_regs+512(%rip)
+ vmovdqu32 %zmm9, zmm_regs+576(%rip)
+ vmovdqu32 %zmm10, zmm_regs+640(%rip)
+ vmovdqu32 %zmm11, zmm_regs+704(%rip)
+ vmovdqu32 %zmm12, zmm_regs+768(%rip)
+ vmovdqu32 %zmm13, zmm_regs+832(%rip)
+ vmovdqu32 %zmm14, zmm_regs+896(%rip)
+ vmovdqu32 %zmm15, zmm_regs+960(%rip)
+ vmovdqu32 %zmm16, zmm_regs+1024(%rip)
+ vmovdqu32 %zmm17, zmm_regs+1088(%rip)
+ vmovdqu32 %zmm18, zmm_regs+1152(%rip)
+ vmovdqu32 %zmm19, zmm_regs+1216(%rip)
+ vmovdqu32 %zmm20, zmm_regs+1280(%rip)
+ vmovdqu32 %zmm21, zmm_regs+1344(%rip)
+ vmovdqu32 %zmm22, zmm_regs+1408(%rip)
+ vmovdqu32 %zmm23, zmm_regs+1472(%rip)
+ vmovdqu32 %zmm24, zmm_regs+1536(%rip)
+ vmovdqu32 %zmm25, zmm_regs+1600(%rip)
+ vmovdqu32 %zmm26, zmm_regs+1664(%rip)
+ vmovdqu32 %zmm27, zmm_regs+1728(%rip)
+ vmovdqu32 %zmm28, zmm_regs+1792(%rip)
+ vmovdqu32 %zmm29, zmm_regs+1856(%rip)
+ vmovdqu32 %zmm30, zmm_regs+1920(%rip)
+ vmovdqu32 %zmm31, zmm_regs+1984(%rip)
+ jmp *callthis(%rip)
+.LFE3:
+ .size snapshot, .-snapshot
+
+ .p2align 4,,15
+.globl snapshot_ret
+ .type snapshot_ret, @function
+snapshot_ret:
+ movq %rdi, rdi(%rip)
+ subq $8, %rsp
+ call *callthis(%rip)
+ addq $8, %rsp
+ movq %rax, rax(%rip)
+ movq %rdx, rdx(%rip)
+ vmovdqu32 %zmm0, zmm_regs+0(%rip)
+ vmovdqu32 %zmm1, zmm_regs+64(%rip)
+ fstpt x87_regs(%rip)
+ fstpt x87_regs+16(%rip)
+ fldt x87_regs+16(%rip)
+ fldt x87_regs(%rip)
+ ret
+ .size snapshot_ret, .-snapshot_ret
+
+ .comm callthis,8,8
+ .comm rax,8,8
+ .comm rbx,8,8
+ .comm rcx,8,8
+ .comm rdx,8,8
+ .comm rsi,8,8
+ .comm rdi,8,8
+ .comm rsp,8,8
+ .comm rbp,8,8
+ .comm r8,8,8
+ .comm r9,8,8
+ .comm r10,8,8
+ .comm r11,8,8
+ .comm r12,8,8
+ .comm r13,8,8
+ .comm r14,8,8
+ .comm r15,8,8
+ .comm zmm_regs,2048,64
+ .comm x87_regs,128,32
+ .comm volatile_var,8,8
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/avx512f/avx512f-check.h b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/avx512f-check.h
new file mode 100644
index 00000000000..25ce544c4a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/avx512f-check.h
@@ -0,0 +1,41 @@
+#include <stdlib.h>
+#include "cpuid.h"
+
+static void avx512f_test (void);
+
+int
+main ()
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+#define DEBUG
+ /* Run AVX test only if host has AVX support. */
+ if ((ecx & bit_OSXSAVE) == bit_OSXSAVE)
+ {
+ if (__get_cpuid_max (0, NULL) < 7)
+ return 0;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+
+ if ((ebx & bit_AVX512F) == bit_AVX512F)
+ {
+ avx512f_test ();
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+ }
+#ifdef DEBUG
+ else
+ printf ("SKIPPED\n");
+#endif
+ }
+#ifdef DEBUG
+ else
+ printf ("SKIPPED\n");
+#endif
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_m512_returning.c b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_m512_returning.c
new file mode 100644
index 00000000000..ee126b5510c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_m512_returning.c
@@ -0,0 +1,32 @@
+#include <stdio.h>
+#include "avx512f-check.h"
+#include "args.h"
+
+struct IntegerRegisters iregs;
+struct FloatRegisters fregs;
+unsigned int num_iregs, num_fregs;
+
+__m512
+fun_test_returning___m512 (void)
+{
+ volatile_var++;
+ return (__m512){73,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+}
+
+__m512 test_512;
+
+static void
+avx512f_test (void)
+{
+ unsigned failed = 0;
+ ZMM_T zmmt1, zmmt2;
+
+ clear_struct_registers;
+ test_512 = (__m512){73,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ zmmt1._m512[0] = test_512;
+ zmmt2._m512[0] = WRAP_RET (fun_test_returning___m512)();
+ if (memcmp (&zmmt1, &zmmt2, sizeof (zmmt2)) != 0)
+ printf ("fail m512\n"), failed++;
+ if (failed)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_passing_m512.c b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_passing_m512.c
new file mode 100644
index 00000000000..ead9c6797e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_passing_m512.c
@@ -0,0 +1,168 @@
+#include <stdio.h>
+#include "avx512f-check.h"
+#include "args.h"
+
+struct IntegerRegisters iregs;
+struct FloatRegisters fregs;
+unsigned int num_iregs, num_fregs;
+
+/* This struct holds values for argument checking. */
+struct
+{
+ ZMM_T i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15, i16, i17, i18, i19, i20, i21, i22, i23;
+} values;
+
+char *pass;
+int failed = 0;
+
+#undef assert
+#define assert(c) do { \
+ if (!(c)) {failed++; printf ("failed %s\n", pass); } \
+} while (0)
+
+#define compare(X1,X2,T) do { \
+ assert (memcmp (&X1, &X2, sizeof (T)) == 0); \
+} while (0)
+
+fun_check_passing_m512_8_values (__m512 i0 ATTRIBUTE_UNUSED, __m512 i1 ATTRIBUTE_UNUSED, __m512 i2 ATTRIBUTE_UNUSED, __m512 i3 ATTRIBUTE_UNUSED, __m512 i4 ATTRIBUTE_UNUSED, __m512 i5 ATTRIBUTE_UNUSED, __m512 i6 ATTRIBUTE_UNUSED, __m512 i7 ATTRIBUTE_UNUSED)
+{
+ /* Check argument values. */
+ compare (values.i0, i0, __m512);
+ compare (values.i1, i1, __m512);
+ compare (values.i2, i2, __m512);
+ compare (values.i3, i3, __m512);
+ compare (values.i4, i4, __m512);
+ compare (values.i5, i5, __m512);
+ compare (values.i6, i6, __m512);
+ compare (values.i7, i7, __m512);
+}
+
+void
+fun_check_passing_m512_8_regs (__m512 i0 ATTRIBUTE_UNUSED, __m512 i1 ATTRIBUTE_UNUSED, __m512 i2 ATTRIBUTE_UNUSED, __m512 i3 ATTRIBUTE_UNUSED, __m512 i4 ATTRIBUTE_UNUSED, __m512 i5 ATTRIBUTE_UNUSED, __m512 i6 ATTRIBUTE_UNUSED, __m512 i7 ATTRIBUTE_UNUSED)
+{
+ /* Check register contents. */
+ check_m512_arguments;
+}
+
+void
+fun_check_passing_m512_20_values (__m512 i0 ATTRIBUTE_UNUSED, __m512 i1 ATTRIBUTE_UNUSED, __m512 i2 ATTRIBUTE_UNUSED, __m512 i3 ATTRIBUTE_UNUSED, __m512 i4 ATTRIBUTE_UNUSED, __m512 i5 ATTRIBUTE_UNUSED, __m512 i6 ATTRIBUTE_UNUSED, __m512 i7 ATTRIBUTE_UNUSED, __m512 i8 ATTRIBUTE_UNUSED, __m512 i9 ATTRIBUTE_UNUSED, __m512 i10 ATTRIBUTE_UNUSED, __m512 i11 ATTRIBUTE_UNUSED, __m512 i12 ATTRIBUTE_UNUSED, __m512 i13 ATTRIBUTE_UNUSED, __m512 i14 ATTRIBUTE_UNUSED, __m512 i15 ATTRIBUTE_UNUSED, __m512 i16 ATTRIBUTE_UNUSED, __m512 i17 ATTRIBUTE_UNUSED, __m512 i18 ATTRIBUTE_UNUSED, __m512 i19 ATTRIBUTE_UNUSED)
+{
+ /* Check argument values. */
+ compare (values.i0, i0, __m512);
+ compare (values.i1, i1, __m512);
+ compare (values.i2, i2, __m512);
+ compare (values.i3, i3, __m512);
+ compare (values.i4, i4, __m512);
+ compare (values.i5, i5, __m512);
+ compare (values.i6, i6, __m512);
+ compare (values.i7, i7, __m512);
+ compare (values.i8, i8, __m512);
+ compare (values.i9, i9, __m512);
+ compare (values.i10, i10, __m512);
+ compare (values.i11, i11, __m512);
+ compare (values.i12, i12, __m512);
+ compare (values.i13, i13, __m512);
+ compare (values.i14, i14, __m512);
+ compare (values.i15, i15, __m512);
+ compare (values.i16, i16, __m512);
+ compare (values.i17, i17, __m512);
+ compare (values.i18, i18, __m512);
+ compare (values.i19, i19, __m512);
+}
+
+void
+fun_check_passing_m512_20_regs (__m512 i0 ATTRIBUTE_UNUSED, __m512 i1 ATTRIBUTE_UNUSED, __m512 i2 ATTRIBUTE_UNUSED, __m512 i3 ATTRIBUTE_UNUSED, __m512 i4 ATTRIBUTE_UNUSED, __m512 i5 ATTRIBUTE_UNUSED, __m512 i6 ATTRIBUTE_UNUSED, __m512 i7 ATTRIBUTE_UNUSED, __m512 i8 ATTRIBUTE_UNUSED, __m512 i9 ATTRIBUTE_UNUSED, __m512 i10 ATTRIBUTE_UNUSED, __m512 i11 ATTRIBUTE_UNUSED, __m512 i12 ATTRIBUTE_UNUSED, __m512 i13 ATTRIBUTE_UNUSED, __m512 i14 ATTRIBUTE_UNUSED, __m512 i15 ATTRIBUTE_UNUSED, __m512 i16 ATTRIBUTE_UNUSED, __m512 i17 ATTRIBUTE_UNUSED, __m512 i18 ATTRIBUTE_UNUSED, __m512 i19 ATTRIBUTE_UNUSED)
+{
+ /* Check register contents. */
+ check_m512_arguments;
+}
+
+
+#define def_check_passing8(_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _func1, _func2, TYPE) \
+ values.i0.TYPE[0] = _i0; \
+ values.i1.TYPE[0] = _i1; \
+ values.i2.TYPE[0] = _i2; \
+ values.i3.TYPE[0] = _i3; \
+ values.i4.TYPE[0] = _i4; \
+ values.i5.TYPE[0] = _i5; \
+ values.i6.TYPE[0] = _i6; \
+ values.i7.TYPE[0] = _i7; \
+ WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7); \
+ \
+ clear_struct_registers; \
+ fregs.F0.TYPE[0] = _i0; \
+ fregs.F1.TYPE[0] = _i1; \
+ fregs.F2.TYPE[0] = _i2; \
+ fregs.F3.TYPE[0] = _i3; \
+ fregs.F4.TYPE[0] = _i4; \
+ fregs.F5.TYPE[0] = _i5; \
+ fregs.F6.TYPE[0] = _i6; \
+ fregs.F7.TYPE[0] = _i7; \
+ num_fregs = 8; \
+ WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7);
+
+#define def_check_passing20(_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9, _i10, _i11, _i12, _i13, _i14, _i15, _i16, _i17, _i18, _i19, _func1, _func2, TYPE) \
+ values.i0.TYPE[0] = _i0; \
+ values.i1.TYPE[0] = _i1; \
+ values.i2.TYPE[0] = _i2; \
+ values.i3.TYPE[0] = _i3; \
+ values.i4.TYPE[0] = _i4; \
+ values.i5.TYPE[0] = _i5; \
+ values.i6.TYPE[0] = _i6; \
+ values.i7.TYPE[0] = _i7; \
+ values.i8.TYPE[0] = _i8; \
+ values.i9.TYPE[0] = _i9; \
+ values.i10.TYPE[0] = _i10; \
+ values.i11.TYPE[0] = _i11; \
+ values.i12.TYPE[0] = _i12; \
+ values.i13.TYPE[0] = _i13; \
+ values.i14.TYPE[0] = _i14; \
+ values.i15.TYPE[0] = _i15; \
+ values.i16.TYPE[0] = _i16; \
+ values.i17.TYPE[0] = _i17; \
+ values.i18.TYPE[0] = _i18; \
+ values.i19.TYPE[0] = _i19; \
+ WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9, _i10, _i11, _i12, _i13, _i14, _i15, _i16, _i17, _i18, _i19); \
+ \
+ clear_struct_registers; \
+ fregs.F0.TYPE[0] = _i0; \
+ fregs.F1.TYPE[0] = _i1; \
+ fregs.F2.TYPE[0] = _i2; \
+ fregs.F3.TYPE[0] = _i3; \
+ fregs.F4.TYPE[0] = _i4; \
+ fregs.F5.TYPE[0] = _i5; \
+ fregs.F6.TYPE[0] = _i6; \
+ fregs.F7.TYPE[0] = _i7; \
+ num_fregs = 8; \
+ WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9, _i10, _i11, _i12, _i13, _i14, _i15, _i16, _i17, _i18, _i19);
+
+void
+test_m512_on_stack ()
+{
+ __m512 x[8];
+ int i;
+ for (i = 0; i < 8; i++)
+ x[i] = (__m512){32+i, 0, 0, 0, 0, 0, 0, 0};
+ pass = "m512-8";
+ def_check_passing8(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fun_check_passing_m512_8_values, fun_check_passing_m512_8_regs, _m512);
+}
+
+void
+test_too_many_m512 ()
+{
+ __m512 x[20];
+ int i;
+ for (i = 0; i < 20; i++)
+ x[i] = (__m512){32+i, 0, 0, 0, 0, 0, 0, 0};
+ pass = "m512-20";
+ def_check_passing20(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15], x[16], x[17], x[18], x[19], fun_check_passing_m512_20_values, fun_check_passing_m512_20_regs, _m512);
+}
+
+static void
+avx512f_test (void)
+{
+ test_m512_on_stack ();
+ test_too_many_m512 ();
+ if (failed)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_passing_structs.c b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_passing_structs.c
new file mode 100644
index 00000000000..355b5730b7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_passing_structs.c
@@ -0,0 +1,64 @@
+#include "avx512f-check.h"
+#include "args.h"
+
+struct IntegerRegisters iregs;
+struct FloatRegisters fregs;
+unsigned int num_iregs, num_fregs;
+
+struct m512_struct
+{
+ __m512 x;
+};
+
+struct m512_2_struct
+{
+ __m512 x1, x2;
+};
+
+/* Check that the struct is passed as the individual members in fregs. */
+void
+check_struct_passing1 (struct m512_struct ms1 ATTRIBUTE_UNUSED,
+ struct m512_struct ms2 ATTRIBUTE_UNUSED,
+ struct m512_struct ms3 ATTRIBUTE_UNUSED,
+ struct m512_struct ms4 ATTRIBUTE_UNUSED,
+ struct m512_struct ms5 ATTRIBUTE_UNUSED,
+ struct m512_struct ms6 ATTRIBUTE_UNUSED,
+ struct m512_struct ms7 ATTRIBUTE_UNUSED,
+ struct m512_struct ms8 ATTRIBUTE_UNUSED)
+{
+ check_m512_arguments;
+}
+
+void
+check_struct_passing2 (struct m512_2_struct ms ATTRIBUTE_UNUSED)
+{
+ /* Check the passing on the stack by comparing the address of the
+ stack elements to the expected place on the stack. */
+ assert ((unsigned long)&ms.x1 == rsp+8);
+ assert ((unsigned long)&ms.x2 == rsp+72);
+}
+
+static void
+avx512f_test (void)
+{
+ struct m512_struct m512s [8];
+ struct m512_2_struct m512_2s = {
+ { 48.394, 39.3, -397.9, 3484.9, -8.394, -93.3, 7.9, 84.94,
+ 48.3941, 39.31, -397.91, 3484.91, -8.3941, -93.31, 7.91, 84.941 },
+ { -8.394, -3.3, -39.9, 34.9, 7.9, 84.94, -48.394, 39.3,
+ -8.3942, -3.32, -39.92, 34.92, 7.92, 84.942, -48.3942, 39.32 }
+ };
+ int i;
+
+ for (i = 0; i < 8; i++)
+ m512s[i].x = (__m512){32+i, 0, i, 0, -i, 0, i - 12, i + 8,
+ 32+i, 0, i, 0, -i, 0, i - 12, i + 8};
+
+ clear_struct_registers;
+ for (i = 0; i < 8; i++)
+ (&fregs.zmm0)[i]._m512[0] = m512s[i].x;
+ num_fregs = 8;
+ WRAP_CALL (check_struct_passing1)(m512s[0], m512s[1], m512s[2], m512s[3],
+ m512s[4], m512s[5], m512s[6], m512s[7]);
+ WRAP_CALL (check_struct_passing2)(m512_2s);
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_passing_unions.c b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_passing_unions.c
new file mode 100644
index 00000000000..2a7669a77b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/test_passing_unions.c
@@ -0,0 +1,176 @@
+#include "avx512f-check.h"
+#include "args.h"
+
+struct IntegerRegisters iregs;
+struct FloatRegisters fregs;
+unsigned int num_iregs, num_fregs;
+
+union un1
+{
+ __m512 x;
+ float f;
+};
+
+union un2
+{
+ __m512 x;
+ double d;
+};
+
+union un3
+{
+ __m512 x;
+ __m128 v;
+};
+
+union un4
+{
+ __m512 x;
+ long double ld;
+};
+
+union un5
+{
+ __m512 x;
+ int i;
+};
+
+union un6
+{
+ __m512 x;
+ __m256 v;
+};
+
+
+void
+check_union_passing1(union un1 u1 ATTRIBUTE_UNUSED,
+ union un1 u2 ATTRIBUTE_UNUSED,
+ union un1 u3 ATTRIBUTE_UNUSED,
+ union un1 u4 ATTRIBUTE_UNUSED,
+ union un1 u5 ATTRIBUTE_UNUSED,
+ union un1 u6 ATTRIBUTE_UNUSED,
+ union un1 u7 ATTRIBUTE_UNUSED,
+ union un1 u8 ATTRIBUTE_UNUSED)
+{
+ check_m512_arguments;
+}
+
+void
+check_union_passing2(union un2 u1 ATTRIBUTE_UNUSED,
+ union un2 u2 ATTRIBUTE_UNUSED,
+ union un2 u3 ATTRIBUTE_UNUSED,
+ union un2 u4 ATTRIBUTE_UNUSED,
+ union un2 u5 ATTRIBUTE_UNUSED,
+ union un2 u6 ATTRIBUTE_UNUSED,
+ union un2 u7 ATTRIBUTE_UNUSED,
+ union un2 u8 ATTRIBUTE_UNUSED)
+{
+ check_m512_arguments;
+}
+
+void
+check_union_passing3(union un3 u1 ATTRIBUTE_UNUSED,
+ union un3 u2 ATTRIBUTE_UNUSED,
+ union un3 u3 ATTRIBUTE_UNUSED,
+ union un3 u4 ATTRIBUTE_UNUSED,
+ union un3 u5 ATTRIBUTE_UNUSED,
+ union un3 u6 ATTRIBUTE_UNUSED,
+ union un3 u7 ATTRIBUTE_UNUSED,
+ union un3 u8 ATTRIBUTE_UNUSED)
+{
+ check_m512_arguments;
+}
+
+void
+check_union_passing4(union un4 u ATTRIBUTE_UNUSED)
+{
+ /* Check the passing on the stack by comparing the address of the
+ stack elements to the expected place on the stack. */
+ assert ((unsigned long)&u.x == rsp+8);
+ assert ((unsigned long)&u.ld == rsp+8);
+}
+
+void
+check_union_passing5(union un5 u ATTRIBUTE_UNUSED)
+{
+ /* Check the passing on the stack by comparing the address of the
+ stack elements to the expected place on the stack. */
+ assert ((unsigned long)&u.x == rsp+8);
+ assert ((unsigned long)&u.i == rsp+8);
+}
+
+void
+check_union_passing6(union un6 u1 ATTRIBUTE_UNUSED,
+ union un6 u2 ATTRIBUTE_UNUSED,
+ union un6 u3 ATTRIBUTE_UNUSED,
+ union un6 u4 ATTRIBUTE_UNUSED,
+ union un6 u5 ATTRIBUTE_UNUSED,
+ union un6 u6 ATTRIBUTE_UNUSED,
+ union un6 u7 ATTRIBUTE_UNUSED,
+ union un6 u8 ATTRIBUTE_UNUSED)
+{
+ check_m512_arguments;
+}
+
+#define check_union_passing1 WRAP_CALL(check_union_passing1)
+#define check_union_passing2 WRAP_CALL(check_union_passing2)
+#define check_union_passing3 WRAP_CALL(check_union_passing3)
+#define check_union_passing4 WRAP_CALL(check_union_passing4)
+#define check_union_passing5 WRAP_CALL(check_union_passing5)
+#define check_union_passing6 WRAP_CALL(check_union_passing6)
+
+static void
+avx512f_test (void)
+{
+ union un1 u1[8];
+ union un2 u2[8];
+ union un3 u3[8];
+ union un4 u4;
+ union un5 u5;
+ union un6 u6[8];
+ int i;
+
+ for (i = 0; i < 8; i++)
+ u1[i].x = (__m512){32+i, 0, i, 0, -i, 0, i - 12, i + 8,
+ 32+i, 0, i, 0, -i, 0, i - 12, i + 8};
+
+ clear_struct_registers;
+ for (i = 0; i < 8; i++)
+ (&fregs.zmm0)[i]._m512[0] = u1[i].x;
+ num_fregs = 8;
+ check_union_passing1(u1[0], u1[1], u1[2], u1[3],
+ u1[4], u1[5], u1[6], u1[7]);
+
+ clear_struct_registers;
+ for (i = 0; i < 8; i++)
+ {
+ u2[i].x = u1[i].x;
+ (&fregs.zmm0)[i]._m512[0] = u2[i].x;
+ }
+ num_fregs = 8;
+ check_union_passing2(u2[0], u2[1], u2[2], u2[3],
+ u2[4], u2[5], u2[6], u2[7]);
+
+ clear_struct_registers;
+ for (i = 0; i < 8; i++)
+ {
+ u3[i].x = u1[i].x;
+ (&fregs.zmm0)[i]._m512[0] = u3[i].x;
+ }
+ num_fregs = 8;
+ check_union_passing3(u3[0], u3[1], u3[2], u3[3],
+ u3[4], u3[5], u3[6], u3[7]);
+
+ check_union_passing4(u4);
+ check_union_passing5(u5);
+
+ clear_struct_registers;
+ for (i = 0; i < 8; i++)
+ {
+ u6[i].x = u1[i].x;
+ (&fregs.zmm0)[i]._m512[0] = u6[i].x;
+ }
+ num_fregs = 8;
+ check_union_passing6(u6[0], u6[1], u6[2], u6[3],
+ u6[4], u6[5], u6[6], u6[7]);
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index fe51fde52d3..1dafe56b800 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5018,6 +5018,19 @@ proc check_prefer_avx128 { } {
}
+# Return 1 if avx512f instructions can be compiled.
+
+proc check_effective_target_avx512f { } {
+ return [check_no_compiler_messages avx object {
+ typedef double __m512d __attribute__ ((__vector_size__ (64)));
+
+ void _mm512_add (__m512d a)
+ {
+ __builtin_ia32_addpd512_mask (a, a, a, 1, 4);
+ }
+ } "-O2 -mavx512f" ]
+}
+
# Return 1 if avx instructions can be compiled.
proc check_effective_target_avx { } {
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 0580f7dfadc..5433077691b 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -4509,7 +4509,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
tree vec_oprnd0 = NULL_TREE, op;
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
- tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
+ tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
edge pe = loop_preheader_edge (loop);
gimple_seq seq;
basic_block new_bb;
@@ -4551,8 +4551,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
scaletype = TREE_VALUE (arglist);
- gcc_checking_assert (types_compatible_p (srctype, rettype)
- && types_compatible_p (srctype, masktype));
+ gcc_checking_assert (types_compatible_p (srctype, rettype));
vec_dest = vect_create_destination_var (scalar_dest, vectype);
@@ -4566,8 +4565,13 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
/* Currently we support only unconditional gather loads,
so mask should be all ones. */
- if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
- mask = build_int_cst (TREE_TYPE (masktype), -1);
+ if (TREE_CODE (masktype) == INTEGER_TYPE)
+ mask = build_int_cst (masktype, -1);
+ else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
+ {
+ mask = build_int_cst (TREE_TYPE (masktype), -1);
+ mask = build_vector_from_val (masktype, mask);
+ }
else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
{
REAL_VALUE_TYPE r;
@@ -4576,14 +4580,30 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
tmp[j] = -1;
real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
mask = build_real (TREE_TYPE (masktype), r);
+ mask = build_vector_from_val (masktype, mask);
}
else
gcc_unreachable ();
- mask = build_vector_from_val (masktype, mask);
mask = vect_init_vector (stmt, mask, masktype, NULL);
scale = build_int_cst (scaletype, gather_scale);
+ if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
+ merge = build_int_cst (TREE_TYPE (rettype), 0);
+ else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
+ {
+ REAL_VALUE_TYPE r;
+ long tmp[6];
+ for (j = 0; j < 6; ++j)
+ tmp[j] = 0;
+ real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
+ merge = build_real (TREE_TYPE (rettype), r);
+ }
+ else
+ gcc_unreachable ();
+ merge = build_vector_from_val (rettype, merge);
+ merge = vect_init_vector (stmt, merge, rettype, NULL);
+
prev_stmt_info = NULL;
for (j = 0; j < ncopies; ++j)
{
@@ -4612,7 +4632,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
}
new_stmt
- = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
+ = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
if (!useless_type_conversion_p (vectype, rettype))
{
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 7c5dfe884df..e840456dda5 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -631,8 +631,8 @@ typedef struct _stmt_vec_info {
conversion. */
#define MAX_INTERM_CVT_STEPS 3
-/* The maximum vectorization factor supported by any target (V32QI). */
-#define MAX_VECTORIZATION_FACTOR 32
+/* The maximum vectorization factor supported by any target (V64QI). */
+#define MAX_VECTORIZATION_FACTOR 64
/* Avoid GTY(()) on stmt_vec_info. */
typedef void *vec_void_p;